From 2047650071aafb70324396c359953673020ec60c Mon Sep 17 00:00:00 2001 From: Zsolt Felfoldi Date: Sat, 20 Jul 2024 14:35:52 +0200 Subject: [PATCH 01/23] core/filtermaps: two dimensional log filter --- core/filtermaps/filtermaps.go | 582 ++++++++++++++++++++++++++++++ core/filtermaps/indexer.go | 618 ++++++++++++++++++++++++++++++++ core/filtermaps/matcher.go | 500 ++++++++++++++++++++++++++ core/rawdb/accessors_indexes.go | 206 +++++++++++ core/rawdb/schema.go | 31 ++ eth/api_backend.go | 2 + eth/backend.go | 13 +- eth/filters/filter.go | 41 ++- eth/filters/filter_system.go | 5 + internal/ethapi/backend.go | 5 + 10 files changed, 1991 insertions(+), 12 deletions(-) create mode 100644 core/filtermaps/filtermaps.go create mode 100644 core/filtermaps/indexer.go create mode 100644 core/filtermaps/matcher.go diff --git a/core/filtermaps/filtermaps.go b/core/filtermaps/filtermaps.go new file mode 100644 index 000000000000..a265696041fa --- /dev/null +++ b/core/filtermaps/filtermaps.go @@ -0,0 +1,582 @@ +package filtermaps + +import ( + "context" + "crypto/sha256" + "encoding/binary" + "errors" + "sort" + "sync" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/common/lru" + "github.com/ethereum/go-ethereum/core" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" +) + +const ( + logMapHeight = 12 // log2(mapHeight) + mapHeight = 1 << logMapHeight // filter map height (number of rows) + logMapsPerEpoch = 6 // log2(mmapsPerEpochapsPerEpoch) + mapsPerEpoch = 1 << logMapsPerEpoch // number of maps in an epoch + logValuesPerMap = 16 // log2(logValuesPerMap) + valuesPerMap = 1 << logValuesPerMap // number of log values marked on each filter map + + headCacheSize = 8 // maximum number of recent filter maps cached in memory +) + +// FilterMaps is the in-memory representation of the log index structure that is +// responsible for building and updating the index according to the canonical +// chain. +// Note that FilterMaps implements the same data structure as proposed in EIP-7745 +// without the tree hashing and consensus changes: +// https://eips.ethereum.org/EIPS/eip-7745 +type FilterMaps struct { + lock sync.RWMutex + db ethdb.KeyValueStore + closeCh chan chan struct{} + + filterMapsRange + chain *core.BlockChain + + // filterMapCache caches certain filter maps (headCacheSize most recent maps + // and one tail map) that are expected to be frequently accessed and modified + // while updating the structure. Note that the set of cached maps depends + // only on filterMapsRange and rows of other maps are not cached here. + filterMapLock sync.Mutex + filterMapCache map[uint32]*filterMap + blockPtrCache *lru.Cache[uint32, uint64] + lvPointerCache *lru.Cache[uint64, uint64] + revertPoints map[uint64]*revertPoint +} + +// filterMap is a full or partial in-memory representation of a filter map where +// rows are allowed to have a nil value meaning the row is not stored in the +// structure. Note that therefore a known empty row should be represented with +// a zero-length slice. +// It can be used as a memory cache or an overlay while preparing a batch of +// changes to the structure. In either case a nil value should be interpreted +// as transparent (uncached/unchanged). +type filterMap [mapHeight]FilterRow + +// FilterRow encodes a single row of a filter map as a list of column indices. +// Note that the values are always stored in the same order as they were added +// and if the same column index is added twice, it is also stored twice. +// Order of column indices and potential duplications do not matter when searching +// for a value but leaving the original order makes reverting to a previous state +// simpler. +type FilterRow []uint32 + +// emptyRow represents an empty FilterRow. Note that in case of decoded FilterRows +// nil has a special meaning (transparent; not stored in the cache/overlay map) +// and therefore an empty row is represented by a zero length slice. +var emptyRow = FilterRow{} + +// filterMapsRange describes the block range that has been indexed and the log +// value index range it has been mapped to. +type filterMapsRange struct { + initialized bool + headLvPointer, tailLvPointer uint64 + headBlockNumber, tailBlockNumber uint64 + headBlockHash, tailParentHash common.Hash +} + +// NewFilterMaps creates a new FilterMaps and starts the indexer in order to keep +// the structure in sync with the given blockchain. +func NewFilterMaps(db ethdb.KeyValueStore, chain *core.BlockChain) *FilterMaps { + rs, err := rawdb.ReadFilterMapsRange(db) + if err != nil { + log.Error("Error reading log index range", "error", err) + } + fm := &FilterMaps{ + db: db, + chain: chain, + closeCh: make(chan chan struct{}), + filterMapsRange: filterMapsRange{ + initialized: rs.Initialized, + headLvPointer: rs.HeadLvPointer, + tailLvPointer: rs.TailLvPointer, + headBlockNumber: rs.HeadBlockNumber, + tailBlockNumber: rs.TailBlockNumber, + headBlockHash: rs.HeadBlockHash, + tailParentHash: rs.TailParentHash, + }, + filterMapCache: make(map[uint32]*filterMap), + blockPtrCache: lru.NewCache[uint32, uint64](1000), + lvPointerCache: lru.NewCache[uint64, uint64](1000), + revertPoints: make(map[uint64]*revertPoint), + } + if !fm.initialized { + fm.resetDb() + } + fm.updateMapCache() + if rp, err := fm.newUpdateBatch().makeRevertPoint(); err == nil { + fm.revertPoints[rp.blockNumber] = rp + } else { + log.Error("Error creating head revert point", "error", err) + } + go fm.updateLoop() + return fm +} + +// Close ensures that the indexer is fully stopped before returning. +func (f *FilterMaps) Close() { + ch := make(chan struct{}) + f.closeCh <- ch + <-ch +} + +// FilterMapsMatcherBackend implements MatcherBackend. +type FilterMapsMatcherBackend FilterMaps + +// GetFilterMapRow returns the given row of the given map. If the row is empty +// then a non-nil zero length row is returned. +// Note that the returned slices should not be modified, they should be copied +// on write. +// GetFilterMapRow implements MatcherBackend. +func (ff *FilterMapsMatcherBackend) GetFilterMapRow(ctx context.Context, mapIndex, rowIndex uint32) (FilterRow, error) { + f := (*FilterMaps)(ff) + return f.getFilterMapRow(mapIndex, rowIndex) +} + +// GetBlockLvPointer returns the starting log value index where the log values +// generated by the given block are located. If blockNumber is beyond the current +// head then the first unoccupied log value index is returned. +// GetBlockLvPointer implements MatcherBackend. +func (ff *FilterMapsMatcherBackend) GetBlockLvPointer(ctx context.Context, blockNumber uint64) (uint64, error) { + f := (*FilterMaps)(ff) + f.lock.RLock() + defer f.lock.RUnlock() + + return f.getBlockLvPointer(blockNumber) +} + +// GetLogByLvIndex returns the log at the given log value index. If the index does +// not point to the first log value entry of a log then no log and no error are +// returned as this can happen when the log value index was a false positive. +// Note that this function assumes that the log index structure is consistent +// with the canonical chain at the point where the given log value index points. +// If this is not the case then an invalid result or an error may be returned. +// GetLogByLvIndex implements MatcherBackend. +func (ff *FilterMapsMatcherBackend) GetLogByLvIndex(ctx context.Context, lvIndex uint64) (*types.Log, error) { + f := (*FilterMaps)(ff) + f.lock.RLock() + defer f.lock.RUnlock() + + return f.getLogByLvIndex(lvIndex) +} + +// reset un-initializes the FilterMaps structure and removes all related data from +// the database. +// Note that this function assumes that the read/write lock is being held. +func (f *FilterMaps) reset() { + // deleting the range first ensures that resetDb will be called again at next + // startup and any leftover data will be removed even if it cannot finish now. + rawdb.DeleteFilterMapsRange(f.db) + f.resetDb() + f.filterMapsRange = filterMapsRange{} + f.filterMapCache = make(map[uint32]*filterMap) + f.revertPoints = make(map[uint64]*revertPoint) + f.blockPtrCache.Purge() + f.lvPointerCache.Purge() +} + +// resetDb removes all log index data from the database. +func (f *FilterMaps) resetDb() { + var logged bool + for { + it := f.db.NewIterator(rawdb.FilterMapsPrefix, nil) + batch := f.db.NewBatch() + var count int + for ; count < 10000 && it.Next(); count++ { + batch.Delete(it.Key()) + } + it.Release() + if count == 0 { + break + } + if !logged { + log.Info("Resetting log index database...") + logged = true + } + batch.Write() + } + if logged { + log.Info("Resetting log index database finished") + } +} + +// setRange updates the covered range and also adds the changes to the given batch. +// Note that this function assumes that the read/write lock is being held. +func (f *FilterMaps) setRange(batch ethdb.Batch, newRange filterMapsRange) { + f.filterMapsRange = newRange + rs := rawdb.FilterMapsRange{ + Initialized: newRange.initialized, + HeadLvPointer: newRange.headLvPointer, + TailLvPointer: newRange.tailLvPointer, + HeadBlockNumber: newRange.headBlockNumber, + TailBlockNumber: newRange.tailBlockNumber, + HeadBlockHash: newRange.headBlockHash, + TailParentHash: newRange.tailParentHash, + } + rawdb.WriteFilterMapsRange(batch, rs) + f.updateMapCache() +} + +// updateMapCache updates the maps covered by the filterMapCache according to the +// covered range. +// Note that this function assumes that the read lock is being held. +func (f *FilterMaps) updateMapCache() { + if !f.initialized { + return + } + f.filterMapLock.Lock() + defer f.filterMapLock.Unlock() + + newFilterMapCache := make(map[uint32]*filterMap) + firstMap, afterLastMap := uint32(f.tailLvPointer>>logValuesPerMap), uint32((f.headLvPointer+valuesPerMap-1)>>logValuesPerMap) + headCacheFirst := firstMap + 1 + if afterLastMap > headCacheFirst+headCacheSize { + headCacheFirst = afterLastMap - headCacheSize + } + fm := f.filterMapCache[firstMap] + if fm == nil { + fm = new(filterMap) + } + newFilterMapCache[firstMap] = fm + for mapIndex := headCacheFirst; mapIndex < afterLastMap; mapIndex++ { + fm := f.filterMapCache[mapIndex] + if fm == nil { + fm = new(filterMap) + } + newFilterMapCache[mapIndex] = fm + } + f.filterMapCache = newFilterMapCache +} + +// getLogByLvIndex returns the log at the given log value index. If the index does +// not point to the first log value entry of a log then no log and no error are +// returned as this can happen when the log value index was a false positive. +// Note that this function assumes that the log index structure is consistent +// with the canonical chain at the point where the given log value index points. +// If this is not the case then an invalid result or an error may be returned. +// Note that this function assumes that the read lock is being held. +func (f *FilterMaps) getLogByLvIndex(lvIndex uint64) (*types.Log, error) { + if lvIndex < f.tailLvPointer || lvIndex > f.headLvPointer { + return nil, errors.New("log value index outside available range") + } + // find possible block range based on map to block pointers + mapIndex := uint32(lvIndex >> logValuesPerMap) + firstBlockNumber, err := f.getMapBlockPtr(mapIndex) + if err != nil { + return nil, err + } + var lastBlockNumber uint64 + if mapIndex+1 < uint32((f.headLvPointer+valuesPerMap-1)>>logValuesPerMap) { + lastBlockNumber, err = f.getMapBlockPtr(mapIndex + 1) + if err != nil { + return nil, err + } + } else { + lastBlockNumber = f.headBlockNumber + } + // find block with binary search based on block to log value index pointers + for firstBlockNumber < lastBlockNumber { + midBlockNumber := (firstBlockNumber + lastBlockNumber + 1) / 2 + midLvPointer, err := f.getBlockLvPointer(midBlockNumber) + if err != nil { + return nil, err + } + if lvIndex < midLvPointer { + lastBlockNumber = midBlockNumber - 1 + } else { + firstBlockNumber = midBlockNumber + } + } + // get block receipts + hash := f.chain.GetCanonicalHash(firstBlockNumber) + receipts := f.chain.GetReceiptsByHash(hash) //TODO small cache + if receipts == nil { + return nil, errors.New("receipts not found") + } + lvPointer, err := f.getBlockLvPointer(firstBlockNumber) + if err != nil { + return nil, err + } + // iterate through receipts to find the exact log starting at lvIndex + for _, receipt := range receipts { + for _, log := range receipt.Logs { + if lvPointer > lvIndex { + // lvIndex does not point to the first log value (address value) + // generated by a log as true matches should always do, so it + // is considered a false positive (no log and no error returned). + return nil, nil + } + if lvPointer == lvIndex { + return log, nil // potential match + } + lvPointer += uint64(len(log.Topics) + 1) + } + } + return nil, errors.New("log value index not found") +} + +// getFilterMapRow returns the given row of the given map. If the row is empty +// then a non-nil zero length row is returned. +// Note that the returned slices should not be modified, they should be copied +// on write. +func (f *FilterMaps) getFilterMapRow(mapIndex, rowIndex uint32) (FilterRow, error) { + f.filterMapLock.Lock() + defer f.filterMapLock.Unlock() + + fm := f.filterMapCache[mapIndex] + if fm != nil && fm[rowIndex] != nil { + return fm[rowIndex], nil + } + row, err := rawdb.ReadFilterMapRow(f.db, mapRowIndex(mapIndex, rowIndex)) + if err != nil { + return nil, err + } + if fm != nil { + fm[rowIndex] = FilterRow(row) + } + return FilterRow(row), nil +} + +// storeFilterMapRow stores a row at the given row index of the given map and also +// caches it in filterMapCache if the given map is cached. +// Note that empty rows are not stored in the database and therefore there is no +// separate delete function; deleting a row is the same as storing an empty row. +func (f *FilterMaps) storeFilterMapRow(batch ethdb.Batch, mapIndex, rowIndex uint32, row FilterRow) { + f.filterMapLock.Lock() + defer f.filterMapLock.Unlock() + + if fm := f.filterMapCache[mapIndex]; fm != nil { + (*fm)[rowIndex] = row + } + rawdb.WriteFilterMapRow(batch, mapRowIndex(mapIndex, rowIndex), []uint32(row)) +} + +// mapRowIndex calculates the unified storage index where the given row of the +// given map is stored. Note that this indexing scheme is the same as the one +// proposed in EIP-7745 for tree-hashing the filter map structure and for the +// same data proximity reasons it is also suitable for database representation. +// See also: +// https://eips.ethereum.org/EIPS/eip-7745#hash-tree-structure +func mapRowIndex(mapIndex, rowIndex uint32) uint64 { + epochIndex, mapSubIndex := mapIndex>>logMapsPerEpoch, mapIndex%mapsPerEpoch + return (uint64(epochIndex)< f.headBlockNumber { + return f.headLvPointer, nil + } + if lvPointer, ok := f.lvPointerCache.Get(blockNumber); ok { + return lvPointer, nil + } + lvPointer, err := rawdb.ReadBlockLvPointer(f.db, blockNumber) + if err != nil { + return 0, err + } + f.lvPointerCache.Add(blockNumber, lvPointer) + return lvPointer, nil +} + +// storeBlockLvPointer stores the starting log value index where the log values +// generated by the given block are located. +func (f *FilterMaps) storeBlockLvPointer(batch ethdb.Batch, blockNumber, lvPointer uint64) { + f.lvPointerCache.Add(blockNumber, lvPointer) + rawdb.WriteBlockLvPointer(batch, blockNumber, lvPointer) +} + +// deleteBlockLvPointer deletes the starting log value index where the log values +// generated by the given block are located. +func (f *FilterMaps) deleteBlockLvPointer(batch ethdb.Batch, blockNumber uint64) { + f.lvPointerCache.Remove(blockNumber) + rawdb.DeleteBlockLvPointer(batch, blockNumber) +} + +// getMapBlockPtr returns the number of the block that generated the first log +// value entry of the given map. +func (f *FilterMaps) getMapBlockPtr(mapIndex uint32) (uint64, error) { + if blockPtr, ok := f.blockPtrCache.Get(mapIndex); ok { + return blockPtr, nil + } + blockPtr, err := rawdb.ReadFilterMapBlockPtr(f.db, mapIndex) + if err != nil { + return 0, err + } + f.blockPtrCache.Add(mapIndex, blockPtr) + return blockPtr, nil +} + +// storeMapBlockPtr stores the number of the block that generated the first log +// value entry of the given map. +func (f *FilterMaps) storeMapBlockPtr(batch ethdb.Batch, mapIndex uint32, blockPtr uint64) { + f.blockPtrCache.Add(mapIndex, blockPtr) + rawdb.WriteFilterMapBlockPtr(batch, mapIndex, blockPtr) +} + +// deleteMapBlockPtr deletes the number of the block that generated the first log +// value entry of the given map. +func (f *FilterMaps) deleteMapBlockPtr(batch ethdb.Batch, mapIndex uint32) { + f.blockPtrCache.Remove(mapIndex) + rawdb.DeleteFilterMapBlockPtr(batch, mapIndex) +} + +// addressValue returns the log value hash of a log emitting address. +func addressValue(address common.Address) common.Hash { + var result common.Hash + hasher := sha256.New() + hasher.Write(address[:]) + hasher.Sum(result[:0]) + return result +} + +// topicValue returns the log value hash of a log topic. +func topicValue(topic common.Hash) common.Hash { + var result common.Hash + hasher := sha256.New() + hasher.Write(topic[:]) + hasher.Sum(result[:0]) + return result +} + +// rowIndex returns the row index in which the given log value should be marked +// during the given epoch. Note that row assignments are re-shuffled in every +// epoch in order to ensure that even though there are always a few more heavily +// used rows due to very popular addresses and topics, these will not make search +// for other log values very expensive. Even if certain values are occasionally +// sorted into these heavy rows, in most of the epochs they are placed in average +// length rows. +func rowIndex(epochIndex uint32, logValue common.Hash) uint32 { + hasher := sha256.New() + hasher.Write(logValue[:]) + var indexEnc [4]byte + binary.LittleEndian.PutUint32(indexEnc[:], epochIndex) + hasher.Write(indexEnc[:]) + var hash common.Hash + hasher.Sum(hash[:0]) + return binary.LittleEndian.Uint32(hash[:4]) % mapHeight +} + +// columnIndex returns the column index that should be added to the appropriate +// row in order to place a mark for the next log value. +func columnIndex(lvIndex uint64, logValue common.Hash) uint32 { + x := uint32(lvIndex % valuesPerMap) // log value sub-index + transformHash := transformHash(uint32(lvIndex/valuesPerMap), logValue) + // apply column index transformation function + x += binary.LittleEndian.Uint32(transformHash[0:4]) + x *= binary.LittleEndian.Uint32(transformHash[4:8])*2 + 1 + x ^= binary.LittleEndian.Uint32(transformHash[8:12]) + x *= binary.LittleEndian.Uint32(transformHash[12:16])*2 + 1 + x += binary.LittleEndian.Uint32(transformHash[16:20]) + x *= binary.LittleEndian.Uint32(transformHash[20:24])*2 + 1 + x ^= binary.LittleEndian.Uint32(transformHash[24:28]) + x *= binary.LittleEndian.Uint32(transformHash[28:32])*2 + 1 + return x +} + +// transformHash calculates a hash specific to a given map and log value hash +// that defines a bijective function on the uint32 range. This function is used +// to transform the log value sub-index (distance from the first index of the map) +// into a 32 bit column index, then applied in reverse when searching for potential +// matches for a given log value. +func transformHash(mapIndex uint32, logValue common.Hash) (result common.Hash) { + hasher := sha256.New() + hasher.Write(logValue[:]) + var indexEnc [4]byte + binary.LittleEndian.PutUint32(indexEnc[:], mapIndex) + hasher.Write(indexEnc[:]) + hasher.Sum(result[:0]) + return +} + +// potentialMatches returns the list of log value indices potentially matching +// the given log value hash in the range of the filter map the row belongs to. +// Note that the list of indices is always sorted and potential duplicates are +// removed. Though the column indices are stored in the same order they were +// added and therefore the true matches are automatically reverse transformed +// in the right order, false positives can ruin this property. Since these can +// only be separated from true matches after the combined pattern matching of the +// outputs of individual log value matchers and this pattern matcher assumes a +// sorted and duplicate-free list of indices, we should ensure these properties +// here. +func (row FilterRow) potentialMatches(mapIndex uint32, logValue common.Hash) potentialMatches { + results := make(potentialMatches, 0, 8) + transformHash := transformHash(mapIndex, logValue) + sub1 := binary.LittleEndian.Uint32(transformHash[0:4]) + mul1 := uint32ModInverse(binary.LittleEndian.Uint32(transformHash[4:8])*2 + 1) + xor1 := binary.LittleEndian.Uint32(transformHash[8:12]) + mul2 := uint32ModInverse(binary.LittleEndian.Uint32(transformHash[12:16])*2 + 1) + sub2 := binary.LittleEndian.Uint32(transformHash[16:20]) + mul3 := uint32ModInverse(binary.LittleEndian.Uint32(transformHash[20:24])*2 + 1) + xor2 := binary.LittleEndian.Uint32(transformHash[24:28]) + mul4 := uint32ModInverse(binary.LittleEndian.Uint32(transformHash[28:32])*2 + 1) + // perform reverse column index transformation on all column indices of the row. + // if a column index was added by the searched log value then the reverse + // transform will yield a valid log value sub-index of the given map. + // Column index is 32 bits long while there are 2**16 valid log value indices + // in the map's range, so this can also happen by accident with 1 in 2**16 + // chance, in which case we have a false positive. + for _, columnIndex := range row { + if potentialSubIndex := (((((((columnIndex * mul4) ^ xor2) * mul3) - sub2) * mul2) ^ xor1) * mul1) - sub1; potentialSubIndex < valuesPerMap { + results = append(results, uint64(mapIndex)*valuesPerMap+uint64(potentialSubIndex)) + } + } + sort.Sort(results) + // remove duplicates + j := 0 + for i, match := range results { + if i == 0 || match != results[i-1] { + results[j] = results[i] + j++ + } + } + return results[:j] +} + +// potentialMatches is a strictly monotonically increasing list of log value +// indices in the range of a filter map that are potential matches for certain +// filter criteria. +// Note that nil is used as a wildcard and therefore means that all log value +// indices in the filter map range are potential matches. If there are no +// potential matches in the given map's range then an empty slice should be used. +type potentialMatches []uint64 + +// noMatches means there are no potential matches in a given filter map's range. +var noMatches = potentialMatches{} + +func (p potentialMatches) Len() int { return len(p) } +func (p potentialMatches) Less(i, j int) bool { return p[i] < p[j] } +func (p potentialMatches) Swap(i, j int) { p[i], p[j] = p[j], p[i] } + +// uint32ModInverse takes an odd 32 bit number and returns its modular +// multiplicative inverse (mod 2**32), meaning that for any uint32 x and odd y +// x * y * uint32ModInverse(y) == 1. +func uint32ModInverse(v uint32) uint32 { + if v&1 == 0 { + panic("uint32ModInverse called with even argument") + } + m := int64(1) << 32 + m0 := m + a := int64(v) + x, y := int64(1), int64(0) + for a > 1 { + q := a / m + m, a = a%m, m + x, y = y, x-q*y + } + if x < 0 { + x += m0 + } + return uint32(x) +} diff --git a/core/filtermaps/indexer.go b/core/filtermaps/indexer.go new file mode 100644 index 000000000000..789bf4c4004d --- /dev/null +++ b/core/filtermaps/indexer.go @@ -0,0 +1,618 @@ +package filtermaps + +import ( + "errors" + "math" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/log" +) + +const ( + startLvPointer = valuesPerMap << 31 // log value index assigned to init block + removedPointer = math.MaxUint64 // used in updateBatch to signal removed items + revertPointFrequency = 256 // frequency of revert points in database + cachedRevertPoints = 64 // revert points for most recent blocks in memory +) + +// updateLoop initializes and updates the log index structure according to the +// canonical chain. +func (f *FilterMaps) updateLoop() { + headEventCh := make(chan core.ChainHeadEvent) + sub := f.chain.SubscribeChainHeadEvent(headEventCh) + defer sub.Unsubscribe() + + head := f.chain.CurrentBlock() + if head == nil { + select { + case ev := <-headEventCh: + head = ev.Block.Header() + case ch := <-f.closeCh: + close(ch) + return + } + } + fmr := f.getRange() + + var stop bool + wait := func() { + if stop { + return + } + select { + case ev := <-headEventCh: + head = ev.Block.Header() + case <-time.After(time.Second * 20): + // keep updating log index during syncing + head = f.chain.CurrentBlock() + case ch := <-f.closeCh: + close(ch) + stop = true + } + } + + for !stop { + if !fmr.initialized { + f.tryInit(head) + fmr = f.getRange() + if !fmr.initialized { + wait() + continue + } + } + // log index is initialized + if fmr.headBlockHash != head.Hash() { + f.tryUpdateHead(head) + fmr = f.getRange() + if fmr.headBlockHash != head.Hash() { + wait() + continue + } + } + // log index head is at latest chain head; process tail blocks if possible + f.tryExtendTail(func() bool { + // return true if tail processing needs to be stopped + select { + case ev := <-headEventCh: + head = ev.Block.Header() + case ch := <-f.closeCh: + close(ch) + stop = true + return true + default: + head = f.chain.CurrentBlock() + } + // stop if there is a new chain head (always prioritize head updates) + return fmr.headBlockHash != head.Hash() + }) + if fmr.headBlockHash == head.Hash() { + // if tail processing exited while there is no new head then no more + // tail blocks can be processed + wait() + } + } +} + +// getRange returns the current filterMapsRange. +func (f *FilterMaps) getRange() filterMapsRange { + f.lock.RLock() + defer f.lock.RUnlock() + + return f.filterMapsRange +} + +// tryInit attempts to initialize the log index structure. +func (f *FilterMaps) tryInit(head *types.Header) { + receipts := f.chain.GetReceiptsByHash(head.Hash()) + if receipts == nil { + log.Error("Could not retrieve block receipts for init block", "number", head.Number, "hash", head.Hash()) + return + } + update := f.newUpdateBatch() + if err := update.initWithBlock(head, receipts); err != nil { + log.Error("Could not initialize log index", "error", err) + } + f.applyUpdateBatch(update) +} + +// tryUpdateHead attempts to update the log index with a new head. If necessary, +// it reverts to a common ancestor with the old head before adding new block logs. +// If no suitable revert point is available (probably a reorg just after init) +// then it resets the index and tries to re-initialize with the new head. +func (f *FilterMaps) tryUpdateHead(newHead *types.Header) { + // iterate back from new head until the log index head or a revert point and + // collect headers of blocks to be added + var ( + newHeaders []*types.Header + chainPtr = newHead + rp *revertPoint + ) + for { + if rp == nil || chainPtr.Number.Uint64() < rp.blockNumber { + var err error + rp, err = f.getRevertPoint(chainPtr.Number.Uint64()) + if err != nil { + log.Error("Error fetching revert point", "block number", chainPtr.Number.Uint64(), "error", err) + return + } + if rp == nil { + // there are no more revert points available so we should reset and re-initialize + log.Warn("No suitable revert point exists; re-initializing log index", "block number", newHead.Number.Uint64()) + f.reset() + f.tryInit(newHead) + return + } + } + if chainPtr.Hash() == rp.blockHash { + // revert point found at an ancestor of the new head + break + } + // keep iterating backwards and collecting headers + newHeaders = append(newHeaders, chainPtr) + chainPtr = f.chain.GetHeader(chainPtr.ParentHash, chainPtr.Number.Uint64()-1) + if chainPtr == nil { + log.Error("Canonical header not found", "number", chainPtr.Number.Uint64()-1, "hash", chainPtr.ParentHash) + return + } + } + if rp.blockHash != f.headBlockHash { + if rp.blockNumber+128 <= f.headBlockNumber { + log.Warn("Rolling back log index", "old head", f.headBlockNumber, "new head", chainPtr.Number.Uint64()) + } + if err := f.revertTo(rp); err != nil { + log.Error("Error applying revert point", "block number", chainPtr.Number.Uint64(), "error", err) + return + } + } + + if newHeaders == nil { + return + } + // add logs of new blocks in reverse order + update := f.newUpdateBatch() + for i := len(newHeaders) - 1; i >= 0; i-- { + newHeader := newHeaders[i] + receipts := f.chain.GetReceiptsByHash(newHeader.Hash()) + if receipts == nil { + log.Error("Could not retrieve block receipts for new block", "number", newHeader.Number, "hash", newHeader.Hash()) + break + } + if err := update.addBlockToHead(newHeader, receipts); err != nil { + log.Error("Error adding new block", "number", newHeader.Number, "hash", newHeader.Hash(), "error", err) + break + } + if update.updatedRangeLength() >= mapsPerEpoch { + // limit the amount of data updated in a single batch + f.applyUpdateBatch(update) + update = f.newUpdateBatch() + } + } + f.applyUpdateBatch(update) +} + +// tryExtendTail attempts to extend the log index backwards until it indexes the +// genesis block or cannot find more block receipts. Since this is a long process, +// stopFn is called after adding each tail block and if it returns true, the +// latest batch is written and the function returns. +func (f *FilterMaps) tryExtendTail(stopFn func() bool) { + fmr := f.getRange() + number, parentHash := fmr.tailBlockNumber, fmr.tailParentHash + if number == 0 { + return + } + update := f.newUpdateBatch() + lastTailEpoch := update.tailEpoch() + for number > 0 && !stopFn() { + if tailEpoch := update.tailEpoch(); tailEpoch < lastTailEpoch { + // limit the amount of data updated in a single batch + f.applyUpdateBatch(update) + update = f.newUpdateBatch() + lastTailEpoch = tailEpoch + } + newTail := f.chain.GetHeader(parentHash, number-1) + if newTail == nil { + log.Error("Tail header not found", "number", number-1, "hash", parentHash) + break + } + receipts := f.chain.GetReceiptsByHash(newTail.Hash()) + if receipts == nil { + log.Error("Could not retrieve block receipts for tail block", "number", newTail.Number, "hash", newTail.Hash()) + break + } + if err := update.addBlockToTail(newTail, receipts); err != nil { + log.Error("Error adding tail block", "number", newTail.Number, "hash", newTail.Hash(), "error", err) + break + } + number, parentHash = newTail.Number.Uint64(), newTail.ParentHash + } + f.applyUpdateBatch(update) +} + +// updateBatch is a memory overlay collecting changes to the index log structure +// that can be written to the database in a single batch while the in-memory +// representations in FilterMaps are also updated. +type updateBatch struct { + filterMapsRange + maps map[uint32]*filterMap // nil rows are unchanged + getFilterMapRow func(mapIndex, rowIndex uint32) (FilterRow, error) + blockLvPointer map[uint64]uint64 // removedPointer means delete + mapBlockPtr map[uint32]uint64 // removedPointer means delete + revertPoints map[uint64]*revertPoint + firstMap, afterLastMap uint32 +} + +// newUpdateBatch creates a new updateBatch. +func (f *FilterMaps) newUpdateBatch() *updateBatch { + f.lock.RLock() + defer f.lock.RUnlock() + + return &updateBatch{ + filterMapsRange: f.filterMapsRange, + maps: make(map[uint32]*filterMap), + getFilterMapRow: f.getFilterMapRow, + blockLvPointer: make(map[uint64]uint64), + mapBlockPtr: make(map[uint32]uint64), + revertPoints: make(map[uint64]*revertPoint), + } +} + +// applyUpdateBatch writes creates a batch and writes all changes to the database +// and also updates the in-memory representations of log index data. +func (f *FilterMaps) applyUpdateBatch(u *updateBatch) { + f.lock.Lock() + defer f.lock.Unlock() + + batch := f.db.NewBatch() + // write or remove block to log value index pointers + for blockNumber, lvPointer := range u.blockLvPointer { + if lvPointer != removedPointer { + f.storeBlockLvPointer(batch, blockNumber, lvPointer) + } else { + f.deleteBlockLvPointer(batch, blockNumber) + } + } + // write or remove filter map to block number pointers + for mapIndex, blockNumber := range u.mapBlockPtr { + if blockNumber != removedPointer { + f.storeMapBlockPtr(batch, mapIndex, blockNumber) + } else { + f.deleteMapBlockPtr(batch, mapIndex) + } + } + // write filter map rows + for rowIndex := uint32(0); rowIndex < mapHeight; rowIndex++ { + for mapIndex := u.firstMap; mapIndex < u.afterLastMap; mapIndex++ { + if fm := u.maps[mapIndex]; fm != nil { + if row := (*fm)[rowIndex]; row != nil { + f.storeFilterMapRow(batch, mapIndex, rowIndex, row) + } + } + } + } + // delete removed revert points from the database + if u.headBlockNumber < f.headBlockNumber { + for b := u.headBlockNumber + 1; b <= f.headBlockNumber; b++ { + delete(f.revertPoints, b) + if b%revertPointFrequency == 0 { + rawdb.DeleteRevertPoint(batch, b) + } + } + } + // delete removed revert points from the memory cache + if u.headBlockNumber > f.headBlockNumber { + for b := f.headBlockNumber + 1; b <= u.headBlockNumber; b++ { + delete(f.revertPoints, b-cachedRevertPoints) + } + } + // store new revert points in database and/or memory + for b, rp := range u.revertPoints { + if b+cachedRevertPoints > u.headBlockNumber { + f.revertPoints[b] = rp + } + if b%revertPointFrequency == 0 { + rawdb.WriteRevertPoint(batch, b, &rawdb.RevertPoint{ + BlockHash: rp.blockHash, + MapIndex: rp.mapIndex, + RowLength: rp.rowLength[:], + }) + } + } + // update filterMapsRange + f.setRange(batch, u.filterMapsRange) + if err := batch.Write(); err != nil { + log.Crit("Could not write update batch", "error", err) + } + log.Info("Log index block range updated", "tail", u.tailBlockNumber, "head", u.headBlockNumber, "log values", u.headLvPointer-u.tailLvPointer) +} + +// updatedRangeLength returns the lenght of the updated filter map range. +func (u *updateBatch) updatedRangeLength() uint32 { + return u.afterLastMap - u.firstMap +} + +// tailEpoch returns the tail epoch index. +func (u *updateBatch) tailEpoch() uint32 { + return uint32(u.tailLvPointer >> (logValuesPerMap + logMapsPerEpoch)) +} + +// getRowPtr returns a pointer to a FilterRow that can be modified. If the batch +// did not have a modified version of the given row yet, it is retrieved using the +// request function from the backing FilterMaps cache or database and copied +// before modification. +func (u *updateBatch) getRowPtr(mapIndex, rowIndex uint32) (*FilterRow, error) { + fm := u.maps[mapIndex] + if fm == nil { + fm = new(filterMap) + u.maps[mapIndex] = fm + if mapIndex < u.firstMap || u.afterLastMap == 0 { + u.firstMap = mapIndex + } + if mapIndex >= u.afterLastMap { + u.afterLastMap = mapIndex + 1 + } + } + rowPtr := &(*fm)[rowIndex] + if *rowPtr == nil { + if filterRow, err := u.getFilterMapRow(mapIndex, rowIndex); err == nil { + // filterRow is read only, copy before write + *rowPtr = make(FilterRow, len(filterRow), len(filterRow)+8) + copy(*rowPtr, filterRow) + } else { + return nil, err + } + } + return rowPtr, nil +} + +// initWithBlock initializes the log index with the given block as head. +func (u *updateBatch) initWithBlock(header *types.Header, receipts types.Receipts) error { + if u.initialized { + return errors.New("already initialized") + } + u.initialized = true + u.headLvPointer, u.tailLvPointer = startLvPointer, startLvPointer + u.headBlockNumber, u.tailBlockNumber = header.Number.Uint64()-1, header.Number.Uint64() //TODO genesis? + u.headBlockHash, u.tailParentHash = header.ParentHash, header.ParentHash + u.addBlockToHead(header, receipts) + return nil +} + +// addValueToHead adds a single log value to the head of the log index. +func (u *updateBatch) addValueToHead(logValue common.Hash) error { + mapIndex := uint32(u.headLvPointer >> logValuesPerMap) + rowPtr, err := u.getRowPtr(mapIndex, rowIndex(mapIndex>>logMapsPerEpoch, logValue)) + if err != nil { + return err + } + column := columnIndex(u.headLvPointer, logValue) + *rowPtr = append(*rowPtr, column) + u.headLvPointer++ + return nil +} + +// addBlockToHead adds the logs of the given block to the head of the log index. +// It also adds block to log value index and filter map to block pointers and +// a new revert point. +func (u *updateBatch) addBlockToHead(header *types.Header, receipts types.Receipts) error { + if !u.initialized { + return errors.New("not initialized") + } + if header.ParentHash != u.headBlockHash { + return errors.New("addBlockToHead parent mismatch") + } + number := header.Number.Uint64() + u.blockLvPointer[number] = u.headLvPointer + startMap := uint32((u.headLvPointer + valuesPerMap - 1) >> logValuesPerMap) + if err := iterateReceipts(receipts, u.addValueToHead); err != nil { + return err + } + stopMap := uint32((u.headLvPointer + valuesPerMap - 1) >> logValuesPerMap) + for m := startMap; m < stopMap; m++ { + u.mapBlockPtr[m] = number + } + u.headBlockNumber, u.headBlockHash = number, header.Hash() + if (u.headBlockNumber-cachedRevertPoints)%revertPointFrequency != 0 { + delete(u.revertPoints, u.headBlockNumber-cachedRevertPoints) + } + if rp, err := u.makeRevertPoint(); err != nil { + return err + } else if rp != nil { + u.revertPoints[u.headBlockNumber] = rp + } + return nil +} + +// addValueToTail adds a single log value to the tail of the log index. +func (u *updateBatch) addValueToTail(logValue common.Hash) error { + if u.tailLvPointer == 0 { + return errors.New("tail log value pointer underflow") + } + u.tailLvPointer-- + mapIndex := uint32(u.tailLvPointer >> logValuesPerMap) + rowPtr, err := u.getRowPtr(mapIndex, rowIndex(mapIndex>>logMapsPerEpoch, logValue)) + if err != nil { + return err + } + column := columnIndex(u.tailLvPointer, logValue) + *rowPtr = append(*rowPtr, 0) + copy((*rowPtr)[1:], (*rowPtr)[:len(*rowPtr)-1]) + (*rowPtr)[0] = column + return nil +} + +// addBlockToTail adds the logs of the given block to the tail of the log index. +// It also adds block to log value index and filter map to block pointers. +func (u *updateBatch) addBlockToTail(header *types.Header, receipts types.Receipts) error { + if !u.initialized { + return errors.New("not initialized") + } + if header.Hash() != u.tailParentHash { + return errors.New("addBlockToTail parent mismatch") + } + number := header.Number.Uint64() + stopMap := uint32((u.tailLvPointer + valuesPerMap - 1) >> logValuesPerMap) + var cnt int + if err := iterateReceiptsReverse(receipts, func(lv common.Hash) error { + cnt++ + return u.addValueToTail(lv) + }); err != nil { + return err + } + startMap := uint32(u.tailLvPointer >> logValuesPerMap) + for m := startMap; m < stopMap; m++ { + u.mapBlockPtr[m] = number + } + u.blockLvPointer[number] = u.tailLvPointer + u.tailBlockNumber, u.tailParentHash = number, header.ParentHash + return nil +} + +// iterateReceipts iterates the given block receipts, generates log value hashes +// and passes them to the given callback function as a parameter. +func iterateReceipts(receipts types.Receipts, valueCb func(common.Hash) error) error { + for _, receipt := range receipts { + for _, log := range receipt.Logs { + if err := valueCb(addressValue(log.Address)); err != nil { + return err + } + for _, topic := range log.Topics { + if err := valueCb(topicValue(topic)); err != nil { + return err + } + } + } + } + return nil +} + +// iterateReceiptsReverse iterates the given block receipts, generates log value +// hashes in reverse order and passes them to the given callback function as a +// parameter. +func iterateReceiptsReverse(receipts types.Receipts, valueCb func(common.Hash) error) error { + for i := len(receipts) - 1; i >= 0; i-- { + logs := receipts[i].Logs + for j := len(logs) - 1; j >= 0; j-- { + log := logs[j] + for k := len(log.Topics) - 1; k >= 0; k-- { + if err := valueCb(topicValue(log.Topics[k])); err != nil { + return err + } + } + if err := valueCb(addressValue(log.Address)); err != nil { + return err + } + } + } + return nil +} + +// revertPoint can be used to revert the log index to a certain head block. +type revertPoint struct { + blockNumber uint64 + blockHash common.Hash + mapIndex uint32 + rowLength [mapHeight]uint +} + +// makeRevertPoint creates a new revertPoint. +func (u *updateBatch) makeRevertPoint() (*revertPoint, error) { + rp := &revertPoint{ + blockNumber: u.headBlockNumber, + blockHash: u.headBlockHash, + mapIndex: uint32(u.headLvPointer >> logValuesPerMap), + } + if u.tailLvPointer > uint64(rp.mapIndex)< f.headBlockNumber { + blockNumber = f.headBlockNumber + } + if rp := f.revertPoints[blockNumber]; rp != nil { + return rp, nil + } + blockNumber -= blockNumber % revertPointFrequency + rps, err := rawdb.ReadRevertPoint(f.db, blockNumber) + if err != nil { + return nil, err + } + if rps == nil { + return nil, nil + } + if len(rps.RowLength) != mapHeight { + return nil, errors.New("invalid number of rows in stored revert point") + } + rp := &revertPoint{ + blockNumber: blockNumber, + blockHash: rps.BlockHash, + mapIndex: rps.MapIndex, + } + copy(rp.rowLength[:], rps.RowLength) + return rp, nil +} + +// revertTo reverts the log index to the given revert point. +func (f *FilterMaps) revertTo(rp *revertPoint) error { + batch := f.db.NewBatch() + afterLastMap := uint32((f.headLvPointer + valuesPerMap - 1) >> logValuesPerMap) + if rp.mapIndex >= afterLastMap { + return errors.New("cannot revert (head map behind revert point)") + } + lvPointer := uint64(rp.mapIndex) << logValuesPerMap + for rowIndex, rowLen := range rp.rowLength[:] { + rowIndex := uint32(rowIndex) + row, err := f.getFilterMapRow(rp.mapIndex, rowIndex) + if err != nil { + return err + } + if uint(len(row)) < rowLen { + return errors.New("cannot revert (row too short)") + } + if uint(len(row)) > rowLen { + f.storeFilterMapRow(batch, rp.mapIndex, rowIndex, row[:rowLen]) + } + for mapIndex := rp.mapIndex + 1; mapIndex < afterLastMap; mapIndex++ { + f.storeFilterMapRow(batch, mapIndex, rowIndex, emptyRow) + } + lvPointer += uint64(rowLen) + } + for mapIndex := rp.mapIndex + 1; mapIndex < afterLastMap; mapIndex++ { + f.deleteMapBlockPtr(batch, mapIndex) + } + for blockNumber := rp.blockNumber + 1; blockNumber <= f.headBlockNumber; blockNumber++ { + f.deleteBlockLvPointer(batch, blockNumber) + } + newRange := f.filterMapsRange + newRange.headLvPointer = lvPointer + newRange.headBlockNumber = rp.blockNumber + newRange.headBlockHash = rp.blockHash + f.setRange(batch, newRange) + if err := batch.Write(); err != nil { + log.Crit("Could not write update batch", "error", err) + } + return nil +} diff --git a/core/filtermaps/matcher.go b/core/filtermaps/matcher.go new file mode 100644 index 000000000000..3bc08494e19a --- /dev/null +++ b/core/filtermaps/matcher.go @@ -0,0 +1,500 @@ +package filtermaps + +import ( + "context" + "math" + "sync" + "sync/atomic" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" +) + +// MatcherBackend defines the functions required for searching in the log index +// data structure. It is currently implemented by FilterMapsMatcherBackend but +// once EIP-7745 is implemented and active, these functions can also be trustlessly +// served by a remote prover. +type MatcherBackend interface { + GetBlockLvPointer(ctx context.Context, blockNumber uint64) (uint64, error) + GetFilterMapRow(ctx context.Context, mapIndex, rowIndex uint32) (FilterRow, error) + GetLogByLvIndex(ctx context.Context, lvIndex uint64) (*types.Log, error) +} + +// GetPotentialMatches returns a list of logs that are potential matches for the +// given filter criteria. Note that the returned list may still contain false +// positives. +//TODO add protection against reorgs during search +func GetPotentialMatches(ctx context.Context, backend MatcherBackend, firstBlock, lastBlock uint64, addresses []common.Address, topics [][]common.Hash) ([]*types.Log, error) { + // find the log value index range to search + firstIndex, err := backend.GetBlockLvPointer(ctx, firstBlock) + if err != nil { + return nil, err + } + lastIndex, err := backend.GetBlockLvPointer(ctx, lastBlock+1) + if err != nil { + return nil, err + } + if lastIndex > 0 { + lastIndex-- + } + firstMap, lastMap := uint32(firstIndex>>logValuesPerMap), uint32(lastIndex>>logValuesPerMap) + firstEpoch, lastEpoch := firstMap>>logMapsPerEpoch, lastMap>>logMapsPerEpoch + + // build matcher according to the given filter criteria + matchers := make([]matcher, len(topics)+1) + // matchAddress signals a match when there is a match for any of the given + // addresses. + // If the list of addresses is empty then it creates a "wild card" matcher + // that signals every index as a potential match. + matchAddress := make(matchAny, len(addresses)) + for i, address := range addresses { + matchAddress[i] = &singleMatcher{backend: backend, value: addressValue(address)} + } + matchers[0] = matchAddress + for i, topicList := range topics { + // matchTopic signals a match when there is a match for any of the topics + // specified for the given position (topicList). + // If topicList is empty then it creates a "wild card" matcher that signals + // every index as a potential match. + matchTopic := make(matchAny, len(topicList)) + for j, topic := range topicList { + matchTopic[j] = &singleMatcher{backend: backend, value: topicValue(topic)} + } + matchers[i+1] = matchTopic + } + // matcher is the final sequence matcher that signals a match when all underlying + // matchers signal a match for consecutive log value indices. + matcher := newMatchSequence(matchers) + + // processEpoch returns the potentially matching logs from the given epoch. + processEpoch := func(epochIndex uint32) ([]*types.Log, error) { + var logs []*types.Log + // create a list of map indices to process + fm, lm := epochIndex< lastMap { + lm = lastMap + } + // + mapIndices := make([]uint32, lm+1-fm) + for i := range mapIndices { + mapIndices[i] = fm + uint32(i) + } + // find potential matches + matches, err := matcher.getMatches(ctx, mapIndices) + if err != nil { + return logs, err + } + // get the actual logs located at the matching log value indices + for _, m := range matches { + mlogs, err := getLogsFromMatches(ctx, backend, firstIndex, lastIndex, m) + if err != nil { + return logs, err + } + logs = append(logs, mlogs...) + } + return logs, nil + } + + type task struct { + epochIndex uint32 + logs []*types.Log + err error + done chan struct{} + } + + taskCh := make(chan *task) + var wg sync.WaitGroup + defer func() { + close(taskCh) + wg.Wait() + }() + + worker := func() { + for task := range taskCh { + if task == nil { + break + } + task.logs, task.err = processEpoch(task.epochIndex) + close(task.done) + } + wg.Done() + return + } + + for i := 0; i < 4; i++ { + wg.Add(1) + go worker() + } + + var logs []*types.Log + // startEpoch is the next task to send whenever a worker can accept it. + // waitEpoch is the next task we are waiting for to finish in order to append + // results in the correct order. + startEpoch, waitEpoch := firstEpoch, firstEpoch + tasks := make(map[uint32]*task) + tasks[startEpoch] = &task{epochIndex: startEpoch, done: make(chan struct{})} + for waitEpoch <= lastEpoch { + select { + case taskCh <- tasks[startEpoch]: + startEpoch++ + if startEpoch <= lastEpoch { + if tasks[startEpoch] == nil { + tasks[startEpoch] = &task{epochIndex: startEpoch, done: make(chan struct{})} + } + } + case <-tasks[waitEpoch].done: + logs = append(logs, tasks[waitEpoch].logs...) + if err := tasks[waitEpoch].err; err != nil { + return logs, err + } + delete(tasks, waitEpoch) + waitEpoch++ + if waitEpoch <= lastEpoch { + if tasks[waitEpoch] == nil { + tasks[waitEpoch] = &task{epochIndex: waitEpoch, done: make(chan struct{})} + } + } + } + } + return logs, nil +} + +// getLogsFromMatches returns the list of potentially matching logs located at +// the given list of matching log indices. Matches outside the firstIndex to +// lastIndex range are not returned. +func getLogsFromMatches(ctx context.Context, backend MatcherBackend, firstIndex, lastIndex uint64, matches potentialMatches) ([]*types.Log, error) { + var logs []*types.Log + for _, match := range matches { + if match < firstIndex || match > lastIndex { + continue + } + log, err := backend.GetLogByLvIndex(ctx, match) + if err != nil { + return logs, err + } + if log != nil { + logs = append(logs, log) + } + } + return logs, nil +} + +// matcher interface is defined so that individual address/topic matchers can be +// combined into a pattern matcher (see matchAny and matchSequence). +type matcher interface { + // getMatches takes a list of map indices and returns an equal number of + // potentialMatches, one for each corresponding map index. + // Note that the map index list is typically a list of the potentially + // interesting maps from an epoch, plus sometimes the first map of the next + // epoch if it is required for sequence matching. + getMatches(ctx context.Context, mapIndices []uint32) ([]potentialMatches, error) +} + +// singleMatcher implements matcher by returning matches for a single log value hash. +type singleMatcher struct { + backend MatcherBackend + value common.Hash +} + +// getMatches implements matcher +func (s *singleMatcher) getMatches(ctx context.Context, mapIndices []uint32) ([]potentialMatches, error) { + results := make([]potentialMatches, len(mapIndices)) + for i, mapIndex := range mapIndices { + filterRow, err := s.backend.GetFilterMapRow(ctx, mapIndex, rowIndex(mapIndex>>logMapsPerEpoch, s.value)) + if err != nil { + return nil, err + } + results[i] = filterRow.potentialMatches(mapIndex, s.value) + } + return results, nil +} + +// matchAny combinines a set of matchers and returns a match for every position +// where any of the underlying matchers signaled a match. A zero-length matchAny +// acts as a "wild card" that signals a potential match at every position. +type matchAny []matcher + +// getMatches implements matcher +func (m matchAny) getMatches(ctx context.Context, mapIndices []uint32) ([]potentialMatches, error) { + if len(m) == 0 { + // return "wild card" results (potentialMatches(nil) is interpreted as a + // potential match at every log value index of the map). + return make([]potentialMatches, len(mapIndices)), nil + } + if len(m) == 1 { + return m[0].getMatches(ctx, mapIndices) + } + matches := make([][]potentialMatches, len(m)) + for i, matcher := range m { + var err error + if matches[i], err = matcher.getMatches(ctx, mapIndices); err != nil { + return nil, err + } + } + results := make([]potentialMatches, len(mapIndices)) + merge := make([]potentialMatches, len(m)) + for i := range results { + for j := range merge { + merge[j] = matches[j][i] + } + results[i] = mergeResults(merge) + } + return results, nil +} + +// mergeResults merges multiple lists of matches into a single one, preserving +// ascending order and filtering out any duplicates. +func mergeResults(results []potentialMatches) potentialMatches { + if len(results) == 0 { + return nil + } + var sumLen int + for _, res := range results { + if res == nil { + // nil is a wild card; all indices in map range are potential matches + return nil + } + sumLen += len(res) + } + merged := make(potentialMatches, 0, sumLen) + for { + best := -1 + for i, res := range results { + if len(res) == 0 { + continue + } + if best < 0 || res[0] < results[best][0] { + best = i + } + } + if best < 0 { + return merged + } + if len(merged) == 0 || results[best][0] > merged[len(merged)-1] { + merged = append(merged, results[best][0]) + } + results[best] = results[best][1:] + } +} + +// matchSequence combines two matchers, a "base" and a "next" matcher with a +// positive integer offset so that the resulting matcher signals a match at log +// value index X when the base matcher returns a match at X and the next matcher +// gives a match at X+offset. Note that matchSequence can be used recursively to +// detect any log value sequence. +type matchSequence struct { + base, next matcher + offset uint64 + // *EmptyRate == totalCount << 32 + emptyCount (atomically accessed) + baseEmptyRate, nextEmptyRate uint64 +} + +// newMatchSequence creates a recursive sequence matcher from a list of underlying +// matchers. The resulting matcher signals a match at log value index X when each +// underlying matcher matchers[i] returns a match at X+i. +func newMatchSequence(matchers []matcher) matcher { + if len(matchers) == 0 { + panic("zero length sequence matchers are not allowed") + } + if len(matchers) == 1 { + return matchers[0] + } + return &matchSequence{ + base: newMatchSequence(matchers[:len(matchers)-1]), + next: matchers[len(matchers)-1], + offset: uint64(len(matchers) - 1), + } +} + +// getMatches implements matcher +func (m *matchSequence) getMatches(ctx context.Context, mapIndices []uint32) ([]potentialMatches, error) { + // decide whether to evaluate base or next matcher first + baseEmptyRate := atomic.LoadUint64(&m.baseEmptyRate) + nextEmptyRate := atomic.LoadUint64(&m.nextEmptyRate) + baseTotal, baseEmpty := baseEmptyRate>>32, uint64(uint32(baseEmptyRate)) + nextTotal, nextEmpty := nextEmptyRate>>32, uint64(uint32(nextEmptyRate)) + baseFirst := baseEmpty*nextTotal >= nextEmpty*baseTotal/2 + + var ( + baseRes, nextRes []potentialMatches + baseIndices []uint32 + ) + if baseFirst { + // base first mode; request base matcher + baseIndices = mapIndices + var err error + baseRes, err = m.base.getMatches(ctx, baseIndices) + if err != nil { + return nil, err + } + } + + // determine set of indices to request from next matcher + nextIndices := make([]uint32, 0, len(mapIndices)*3/2) + lastAdded := uint32(math.MaxUint32) + for i, mapIndex := range mapIndices { + if baseFirst && baseRes[i] != nil && len(baseRes[i]) == 0 { + // do not request map index from next matcher if no results from base matcher + continue + } + if lastAdded != mapIndex { + nextIndices = append(nextIndices, mapIndex) + lastAdded = mapIndex + } + if !baseFirst || baseRes[i] == nil || baseRes[i][len(baseRes[i])-1] >= (uint64(mapIndex+1)<= len(nextIndices) { + break + } + if nextIndices[nextPtr] != mapIndex || nextIndices[nextPtr+1] != mapIndex+1 { + panic("invalid nextIndices") + } + next1, next2 := nextRes[nextPtr], nextRes[nextPtr+1] + if next1 == nil || (len(next1) > 0 && next1[len(next1)-1] >= (uint64(mapIndex)< 0 && next2[0] < (uint64(mapIndex+1)< 0 { + // discard items from nextRes whose corresponding base matcher results + // with the negative offset applied would be located at mapIndex-1. + start := 0 + for start < len(nextRes) && nextRes[start] < uint64(mapIndex)< 0 { + // discard items from nextNextRes whose corresponding base matcher results + // with the negative offset applied would still be located at mapIndex+1. + stop := 0 + for stop < len(nextNextRes) && nextNextRes[stop] < uint64(mapIndex+1)< 0 && len(baseRes) > 0 { + if nextRes[0] > baseRes[0]+offset { + baseRes = baseRes[1:] + } else if nextRes[0] < baseRes[0]+offset { + nextRes = nextRes[1:] + } else { + matchedRes = append(matchedRes, baseRes[0]) + baseRes = baseRes[1:] + nextRes = nextRes[1:] + } + } + } else { + // baseRes is a wild card so just return next matcher results with + // negative offset. + for len(nextRes) > 0 { + matchedRes = append(matchedRes, nextRes[0]-offset) + nextRes = nextRes[1:] + } + } + } + return matchedRes +} diff --git a/core/rawdb/accessors_indexes.go b/core/rawdb/accessors_indexes.go index 4f2ef0a88083..d1b0cf5053a6 100644 --- a/core/rawdb/accessors_indexes.go +++ b/core/rawdb/accessors_indexes.go @@ -18,6 +18,8 @@ package rawdb import ( "bytes" + "encoding/binary" + "errors" "math/big" "github.com/ethereum/go-ethereum/common" @@ -179,3 +181,207 @@ func DeleteBloombits(db ethdb.Database, bit uint, from uint64, to uint64) { log.Crit("Failed to delete bloom bits", "err", it.Error()) } } + +var emptyRow = []uint32{} + +// ReadFilterMapRow retrieves a filter map row at the given mapRowIndex +// (see filtermaps.mapRowIndex for the storage index encoding). +// Note that zero length rows are not stored in the database and therefore all +// non-existent entries are interpreted as empty rows and return no error. +// Also note that the mapRowIndex indexing scheme is the same as the one +// proposed in EIP-7745 for tree-hashing the filter map structure and for the +// same data proximity reasons it is also suitable for database representation. +// See also: +// https://eips.ethereum.org/EIPS/eip-7745#hash-tree-structure +func ReadFilterMapRow(db ethdb.KeyValueReader, mapRowIndex uint64) ([]uint32, error) { + key := filterMapRowKey(mapRowIndex) + has, err := db.Has(key) + if err != nil { + return nil, err + } + if !has { + return emptyRow, nil + } + encRow, err := db.Get(key) + if err != nil { + return nil, err + } + if len(encRow)&3 != 0 { + return nil, errors.New("Invalid encoded filter row length") + } + row := make([]uint32, len(encRow)/4) + for i := range row { + row[i] = binary.LittleEndian.Uint32(encRow[i*4 : (i+1)*4]) + } + return row, nil +} + +// WriteFilterMapRow stores a filter map row at the given mapRowIndex or deletes +// any existing entry if the row is empty. +func WriteFilterMapRow(db ethdb.KeyValueWriter, mapRowIndex uint64, row []uint32) { + var err error + if len(row) > 0 { + encRow := make([]byte, len(row)*4) + for i, c := range row { + binary.LittleEndian.PutUint32(encRow[i*4:(i+1)*4], c) + } + err = db.Put(filterMapRowKey(mapRowIndex), encRow) + } else { + err = db.Delete(filterMapRowKey(mapRowIndex)) + } + if err != nil { + log.Crit("Failed to store filter map row", "err", err) + } +} + +// ReadFilterMapBlockPtr retrieves the number of the block that generated the +// first log value entry of the given map. +func ReadFilterMapBlockPtr(db ethdb.KeyValueReader, mapIndex uint32) (uint64, error) { + encPtr, err := db.Get(filterMapBlockPtrKey(mapIndex)) + if err != nil { + return 0, err + } + if len(encPtr) != 8 { + return 0, errors.New("Invalid block number encoding") + } + return binary.BigEndian.Uint64(encPtr), nil +} + +// WriteFilterMapBlockPtr stores the number of the block that generated the +// first log value entry of the given map. +func WriteFilterMapBlockPtr(db ethdb.KeyValueWriter, mapIndex uint32, blockNumber uint64) { + var encPtr [8]byte + binary.BigEndian.PutUint64(encPtr[:], blockNumber) + if err := db.Put(filterMapBlockPtrKey(mapIndex), encPtr[:]); err != nil { + log.Crit("Failed to store filter map block pointer", "err", err) + } +} + +// DeleteFilterMapBlockPtr deletes the number of the block that generated the +// first log value entry of the given map. +func DeleteFilterMapBlockPtr(db ethdb.KeyValueWriter, mapIndex uint32) { + if err := db.Delete(filterMapBlockPtrKey(mapIndex)); err != nil { + log.Crit("Failed to delete filter map block pointer", "err", err) + } +} + +// ReadBlockLvPointer retrieves the starting log value index where the log values +// generated by the given block are located. +func ReadBlockLvPointer(db ethdb.KeyValueReader, blockNumber uint64) (uint64, error) { + encPtr, err := db.Get(blockLVKey(blockNumber)) + if err != nil { + return 0, err + } + if len(encPtr) != 8 { + return 0, errors.New("Invalid log value pointer encoding") + } + return binary.BigEndian.Uint64(encPtr), nil +} + +// WriteBlockLvPointer stores the starting log value index where the log values +// generated by the given block are located. +func WriteBlockLvPointer(db ethdb.KeyValueWriter, blockNumber, lvPointer uint64) { + var encPtr [8]byte + binary.BigEndian.PutUint64(encPtr[:], lvPointer) + if err := db.Put(blockLVKey(blockNumber), encPtr[:]); err != nil { + log.Crit("Failed to store block log value pointer", "err", err) + } +} + +// DeleteBlockLvPointer deletes the starting log value index where the log values +// generated by the given block are located. +func DeleteBlockLvPointer(db ethdb.KeyValueWriter, blockNumber uint64) { + if err := db.Delete(blockLVKey(blockNumber)); err != nil { + log.Crit("Failed to delete block log value pointer", "err", err) + } +} + +// FilterMapsRange is a storage representation of the block range covered by the +// filter maps structure and the corresponting log value index range. +type FilterMapsRange struct { + Initialized bool + HeadLvPointer, TailLvPointer uint64 + HeadBlockNumber, TailBlockNumber uint64 + HeadBlockHash, TailParentHash common.Hash +} + +// ReadFilterMapsRange retrieves the filter maps range data. Note that if the +// database entry is not present, that is interpreted as a valid non-initialized +// state and returns a blank range structure and no error. +func ReadFilterMapsRange(db ethdb.KeyValueReader) (FilterMapsRange, error) { + if has, err := db.Has(filterMapsRangeKey); !has || err != nil { + return FilterMapsRange{}, err + } + encRange, err := db.Get(filterMapsRangeKey) + if err != nil { + return FilterMapsRange{}, err + } + var fmRange FilterMapsRange + if err := rlp.DecodeBytes(encRange, &fmRange); err != nil { + return FilterMapsRange{}, err + } + return fmRange, err +} + +// WriteFilterMapsRange stores the filter maps range data. +func WriteFilterMapsRange(db ethdb.KeyValueWriter, fmRange FilterMapsRange) { + encRange, err := rlp.EncodeToBytes(&fmRange) + if err != nil { + log.Crit("Failed to encode filter maps range", "err", err) + } + if err := db.Put(filterMapsRangeKey, encRange); err != nil { + log.Crit("Failed to store filter maps range", "err", err) + } +} + +// DeleteFilterMapsRange deletes the filter maps range data which is interpreted +// as reverting to the un-initialized state. +func DeleteFilterMapsRange(db ethdb.KeyValueWriter) { + if err := db.Delete(filterMapsRangeKey); err != nil { + log.Crit("Failed to delete filter maps range", "err", err) + } +} + +// RevertPoint is the storage representation of a filter maps revert point. +type RevertPoint struct { + BlockHash common.Hash + MapIndex uint32 + RowLength []uint +} + +// ReadRevertPoint retrieves the revert point for the given block number if +// present. Note that revert points may or may not exist for any block number +// and a non-existent entry causes no error. +func ReadRevertPoint(db ethdb.KeyValueReader, blockNumber uint64) (*RevertPoint, error) { + key := revertPointKey(blockNumber) + if has, err := db.Has(key); !has || err != nil { + return nil, err + } + enc, err := db.Get(key) + if err != nil { + return nil, err + } + rp := new(RevertPoint) + if err := rlp.DecodeBytes(enc, rp); err != nil { + return nil, err + } + return rp, nil +} + +// WriteRevertPoint stores a revert point for the given block number. +func WriteRevertPoint(db ethdb.KeyValueWriter, blockNumber uint64, rp *RevertPoint) { + enc, err := rlp.EncodeToBytes(rp) + if err != nil { + log.Crit("Failed to encode revert point", "err", err) + } + if err := db.Put(revertPointKey(blockNumber), enc); err != nil { + log.Crit("Failed to store revert point", "err", err) + } +} + +// DeleteRevertPoint deletes the given revert point. +func DeleteRevertPoint(db ethdb.KeyValueWriter, blockNumber uint64) { + if err := db.Delete(revertPointKey(blockNumber)); err != nil { + log.Crit("Failed to delete revert point", "err", err) + } +} diff --git a/core/rawdb/schema.go b/core/rawdb/schema.go index 04b5d0d6d2c8..25c4d88a5463 100644 --- a/core/rawdb/schema.go +++ b/core/rawdb/schema.go @@ -145,6 +145,13 @@ var ( FixedCommitteeRootKey = []byte("fixedRoot-") // bigEndian64(syncPeriod) -> committee root hash SyncCommitteeKey = []byte("committee-") // bigEndian64(syncPeriod) -> serialized committee + FilterMapsPrefix = []byte("fT5-") //TODO fm- + filterMapsRangeKey = append(FilterMapsPrefix, byte('R')) + filterMapRowPrefix = append(FilterMapsPrefix, byte('r')) // filterMapRowPrefix + mapRowIndex (uint64 big endian) -> filter row + filterMapBlockPtrPrefix = append(FilterMapsPrefix, byte('b')) // filterMapBlockPtrPrefix + mapIndex (uint32 big endian) -> block number (uint64 big endian) + blockLVPrefix = append(FilterMapsPrefix, byte('p')) // blockLVPrefix + num (uint64 big endian) -> log value pointer (uint64 big endian) + revertPointPrefix = append(FilterMapsPrefix, byte('v')) // revertPointPrefix + num (uint64 big endian) -> revert data + preimageCounter = metrics.NewRegisteredCounter("db/preimage/total", nil) preimageHitCounter = metrics.NewRegisteredCounter("db/preimage/hits", nil) ) @@ -346,3 +353,27 @@ func IsStorageTrieNode(key []byte) bool { ok, _, _ := ResolveStorageTrieNode(key) return ok } + +// filterMapRowKey = filterMapRowPrefix + mapRowIndex (uint64 big endian) +func filterMapRowKey(mapRowIndex uint64) []byte { + key := append(filterMapRowPrefix, make([]byte, 8)...) + binary.BigEndian.PutUint64(key[1:], mapRowIndex) + return key +} + +// filterMapBlockPtrKey = filterMapBlockPtrPrefix + mapIndex (uint32 big endian) +func filterMapBlockPtrKey(mapIndex uint32) []byte { + key := append(filterMapBlockPtrPrefix, make([]byte, 4)...) + binary.BigEndian.PutUint32(key[1:], mapIndex) + return key +} + +// blockLVKey = blockLVPrefix + num (uint64 big endian) +func blockLVKey(number uint64) []byte { + return append(blockLVPrefix, encodeBlockNumber(number)...) +} + +// revertPointKey = revertPointPrefix + num (uint64 big endian) +func revertPointKey(number uint64) []byte { + return append(revertPointPrefix, encodeBlockNumber(number)...) +} diff --git a/eth/api_backend.go b/eth/api_backend.go index 8a9898b956f3..f00b12d40ec2 100644 --- a/eth/api_backend.go +++ b/eth/api_backend.go @@ -29,6 +29,7 @@ import ( "github.com/ethereum/go-ethereum/consensus/misc/eip4844" "github.com/ethereum/go-ethereum/core" "github.com/ethereum/go-ethereum/core/bloombits" + "github.com/ethereum/go-ethereum/core/filtermaps" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/state" "github.com/ethereum/go-ethereum/core/txpool" @@ -44,6 +45,7 @@ import ( // EthAPIBackend implements ethapi.Backend and tracers.Backend for full nodes type EthAPIBackend struct { + *filtermaps.FilterMapsMatcherBackend extRPCEnabled bool allowUnprotectedTxs bool eth *Ethereum diff --git a/eth/backend.go b/eth/backend.go index f10d99c3a70b..c8059f3f35c1 100644 --- a/eth/backend.go +++ b/eth/backend.go @@ -30,6 +30,7 @@ import ( "github.com/ethereum/go-ethereum/consensus" "github.com/ethereum/go-ethereum/core" "github.com/ethereum/go-ethereum/core/bloombits" + "github.com/ethereum/go-ethereum/core/filtermaps" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/state/pruner" "github.com/ethereum/go-ethereum/core/txpool" @@ -83,6 +84,8 @@ type Ethereum struct { bloomIndexer *core.ChainIndexer // Bloom indexer operating during block imports closeBloomHandler chan struct{} + filterMaps *filtermaps.FilterMaps + APIBackend *EthAPIBackend miner *miner.Miner @@ -220,6 +223,7 @@ func New(stack *node.Node, config *ethconfig.Config) (*Ethereum, error) { return nil, err } eth.bloomIndexer.Start(eth.blockchain) + eth.filterMaps = filtermaps.NewFilterMaps(chainDb, eth.blockchain) if config.BlobPool.Datadir != "" { config.BlobPool.Datadir = stack.ResolvePath(config.BlobPool.Datadir) @@ -254,7 +258,13 @@ func New(stack *node.Node, config *ethconfig.Config) (*Ethereum, error) { eth.miner = miner.New(eth, config.Miner, eth.engine) eth.miner.SetExtra(makeExtraData(config.Miner.ExtraData)) - eth.APIBackend = &EthAPIBackend{stack.Config().ExtRPCEnabled(), stack.Config().AllowUnprotectedTxs, eth, nil} + eth.APIBackend = &EthAPIBackend{ + FilterMapsMatcherBackend: (*filtermaps.FilterMapsMatcherBackend)(eth.filterMaps), + extRPCEnabled: stack.Config().ExtRPCEnabled(), + allowUnprotectedTxs: stack.Config().AllowUnprotectedTxs, + eth: eth, + gpo: nil, + } if eth.APIBackend.allowUnprotectedTxs { log.Info("Unprotected transactions allowed") } @@ -406,6 +416,7 @@ func (s *Ethereum) Stop() error { // Then stop everything else. s.bloomIndexer.Close() + s.filterMaps.Close() close(s.closeBloomHandler) s.txPool.Close() s.blockchain.Stop() diff --git a/eth/filters/filter.go b/eth/filters/filter.go index 09ccb939073a..e3d1adc5fec4 100644 --- a/eth/filters/filter.go +++ b/eth/filters/filter.go @@ -19,11 +19,16 @@ package filters import ( "context" "errors" + "fmt" "math/big" + + //"reflect" "slices" + "time" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/bloombits" + "github.com/ethereum/go-ethereum/core/filtermaps" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/rpc" ) @@ -35,8 +40,9 @@ type Filter struct { addresses []common.Address topics [][]common.Hash - block *common.Hash // Block hash if filtering a single block - begin, end int64 // Range interval if filtering multiple blocks + block *common.Hash // Block hash if filtering a single block + begin, end int64 // Range interval if filtering multiple blocks + bbMatchCount uint64 matcher *bloombits.Matcher } @@ -148,16 +154,28 @@ func (f *Filter) Logs(ctx context.Context) ([]*types.Log, error) { return nil, err } - logChan, errChan := f.rangeLogsAsync(ctx) - var logs []*types.Log - for { - select { - case log := <-logChan: - logs = append(logs, log) - case err := <-errChan: - return logs, err + start := time.Now() + logs, err := filtermaps.GetPotentialMatches(ctx, f.sys.backend, uint64(f.begin), uint64(f.end), f.addresses, f.topics) + fmLogs := filterLogs(logs, nil, nil, f.addresses, f.topics) + fmt.Println("filtermaps (new) runtime", time.Since(start), "true matches", len(fmLogs), "false positives", len(logs)-len(fmLogs)) + + //TODO remove + /*f.bbMatchCount = 0 + start = time.Now() + logChan, errChan := f.rangeLogsAsync(ctx) + var bbLogs []*types.Log + loop: + for { + select { + case log := <-logChan: + bbLogs = append(bbLogs, log) + case <-errChan: + break loop + } } - } + fmt.Println("bloombits (old) runtime", time.Since(start), "true matches", len(bbLogs), "false positives", f.bbMatchCount-uint64(len(bbLogs))) + fmt.Println("DeepEqual", reflect.DeepEqual(fmLogs, bbLogs))*/ + return fmLogs, err } // rangeLogsAsync retrieves block-range logs that match the filter criteria asynchronously, @@ -218,6 +236,7 @@ func (f *Filter) indexedLogs(ctx context.Context, end uint64, logChan chan *type for { select { case number, ok := <-matches: + f.bbMatchCount++ // Abort if all matches have been fulfilled if !ok { err := session.Error() diff --git a/eth/filters/filter_system.go b/eth/filters/filter_system.go index a3a2787a4144..62f4833607b8 100644 --- a/eth/filters/filter_system.go +++ b/eth/filters/filter_system.go @@ -30,6 +30,7 @@ import ( "github.com/ethereum/go-ethereum/common/lru" "github.com/ethereum/go-ethereum/core" "github.com/ethereum/go-ethereum/core/bloombits" + "github.com/ethereum/go-ethereum/core/filtermaps" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/event" @@ -71,6 +72,10 @@ type Backend interface { BloomStatus() (uint64, uint64) ServiceFilter(ctx context.Context, session *bloombits.MatcherSession) + + GetBlockLvPointer(ctx context.Context, blockNumber uint64) (uint64, error) + GetFilterMapRow(ctx context.Context, mapIndex, rowIndex uint32) (filtermaps.FilterRow, error) + GetLogByLvIndex(ctx context.Context, lvIndex uint64) (*types.Log, error) } // FilterSystem holds resources shared by all filters. diff --git a/internal/ethapi/backend.go b/internal/ethapi/backend.go index 2a45ba09210f..2012f3d83520 100644 --- a/internal/ethapi/backend.go +++ b/internal/ethapi/backend.go @@ -28,6 +28,7 @@ import ( "github.com/ethereum/go-ethereum/consensus" "github.com/ethereum/go-ethereum/core" "github.com/ethereum/go-ethereum/core/bloombits" + "github.com/ethereum/go-ethereum/core/filtermaps" "github.com/ethereum/go-ethereum/core/state" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/core/vm" @@ -97,6 +98,10 @@ type Backend interface { SubscribeLogsEvent(ch chan<- []*types.Log) event.Subscription BloomStatus() (uint64, uint64) ServiceFilter(ctx context.Context, session *bloombits.MatcherSession) + + GetBlockLvPointer(ctx context.Context, blockNumber uint64) (uint64, error) + GetFilterMapRow(ctx context.Context, mapIndex, rowIndex uint32) (filtermaps.FilterRow, error) + GetLogByLvIndex(ctx context.Context, lvIndex uint64) (*types.Log, error) } func GetAPIs(apiBackend Backend) []rpc.API { From 61d04dc04cec7a92385bb38802fd2563ec79fa91 Mon Sep 17 00:00:00 2001 From: Zsolt Felfoldi Date: Tue, 17 Sep 2024 09:55:55 +0200 Subject: [PATCH 02/23] core/filtermaps: use rawdb.ReadRawReceipts --- core/filtermaps/filtermaps.go | 6 +++--- core/filtermaps/indexer.go | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/core/filtermaps/filtermaps.go b/core/filtermaps/filtermaps.go index a265696041fa..87fec8a0a3aa 100644 --- a/core/filtermaps/filtermaps.go +++ b/core/filtermaps/filtermaps.go @@ -36,7 +36,7 @@ const ( // https://eips.ethereum.org/EIPS/eip-7745 type FilterMaps struct { lock sync.RWMutex - db ethdb.KeyValueStore + db ethdb.Database closeCh chan chan struct{} filterMapsRange @@ -86,7 +86,7 @@ type filterMapsRange struct { // NewFilterMaps creates a new FilterMaps and starts the indexer in order to keep // the structure in sync with the given blockchain. -func NewFilterMaps(db ethdb.KeyValueStore, chain *core.BlockChain) *FilterMaps { +func NewFilterMaps(db ethdb.Database, chain *core.BlockChain) *FilterMaps { rs, err := rawdb.ReadFilterMapsRange(db) if err != nil { log.Error("Error reading log index range", "error", err) @@ -298,7 +298,7 @@ func (f *FilterMaps) getLogByLvIndex(lvIndex uint64) (*types.Log, error) { } // get block receipts hash := f.chain.GetCanonicalHash(firstBlockNumber) - receipts := f.chain.GetReceiptsByHash(hash) //TODO small cache + receipts := rawdb.ReadRawReceipts(f.db, hash, firstBlockNumber) //TODO small cache if receipts == nil { return nil, errors.New("receipts not found") } diff --git a/core/filtermaps/indexer.go b/core/filtermaps/indexer.go index 789bf4c4004d..0d050923ee3b 100644 --- a/core/filtermaps/indexer.go +++ b/core/filtermaps/indexer.go @@ -107,7 +107,7 @@ func (f *FilterMaps) getRange() filterMapsRange { // tryInit attempts to initialize the log index structure. func (f *FilterMaps) tryInit(head *types.Header) { - receipts := f.chain.GetReceiptsByHash(head.Hash()) + receipts := rawdb.ReadRawReceipts(f.db, head.Hash(), head.Number.Uint64()) if receipts == nil { log.Error("Could not retrieve block receipts for init block", "number", head.Number, "hash", head.Hash()) return @@ -176,7 +176,7 @@ func (f *FilterMaps) tryUpdateHead(newHead *types.Header) { update := f.newUpdateBatch() for i := len(newHeaders) - 1; i >= 0; i-- { newHeader := newHeaders[i] - receipts := f.chain.GetReceiptsByHash(newHeader.Hash()) + receipts := rawdb.ReadRawReceipts(f.db, newHeader.Hash(), newHeader.Number.Uint64()) if receipts == nil { log.Error("Could not retrieve block receipts for new block", "number", newHeader.Number, "hash", newHeader.Hash()) break @@ -218,7 +218,7 @@ func (f *FilterMaps) tryExtendTail(stopFn func() bool) { log.Error("Tail header not found", "number", number-1, "hash", parentHash) break } - receipts := f.chain.GetReceiptsByHash(newTail.Hash()) + receipts := rawdb.ReadRawReceipts(f.db, newTail.Hash(), newTail.Number.Uint64()) if receipts == nil { log.Error("Could not retrieve block receipts for tail block", "number", newTail.Number, "hash", newTail.Hash()) break From 8fe1504553b96ad643a0fae414ec396b3bfa2da6 Mon Sep 17 00:00:00 2001 From: Zsolt Felfoldi Date: Tue, 17 Sep 2024 09:56:48 +0200 Subject: [PATCH 03/23] core/filtermaps: add filtermaps tests --- core/filtermaps/filtermaps_test.go | 112 +++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 core/filtermaps/filtermaps_test.go diff --git a/core/filtermaps/filtermaps_test.go b/core/filtermaps/filtermaps_test.go new file mode 100644 index 000000000000..70a4ce3b14ca --- /dev/null +++ b/core/filtermaps/filtermaps_test.go @@ -0,0 +1,112 @@ +package filtermaps + +import ( + "math/rand" + "testing" + + "github.com/ethereum/go-ethereum/common" +) + +func TestSingleMatch(t *testing.T) { + for count := 0; count < 100000; count++ { + // generate a row with a single random entry + mapIndex := rand.Uint32() + lvIndex := uint64(mapIndex)< 0; i-- { + j := rand.Intn(i) + row[i], row[j] = row[j], row[i] + } + // check retrieved matches while also counting false positives + for i, lvHash := range lvHashes { + matches := row.potentialMatches(mapIndex, lvHash) + if i < testPmLen { + // check single entry match + if len(matches) < 1 { + t.Fatalf("Invalid length of matches (got %d, expected >=1)", len(matches)) + } + var found bool + for _, lvi := range matches { + if lvi == lvIndices[i] { + found = true + } else { + falsePositives++ + } + } + if !found { + t.Fatalf("Expected match not found (got %v, expected %d)", matches, lvIndices[i]) + } + } else { + // check "long series" match + if len(matches) < testPmLen { + t.Fatalf("Invalid length of matches (got %d, expected >=%d)", len(matches), testPmLen) + } + // since results are ordered, first testPmLen entries should always match exactly + for j := 0; j < testPmLen; j++ { + if matches[j] != lvStart+uint64(j) { + t.Fatalf("Incorrect match at index %d (got %d, expected %d)", j, matches[j], lvStart+uint64(j)) + } + } + // the rest are false positives + falsePositives += len(matches) - testPmLen + } + } + } + // Whenever looking for a certain log value hash, each entry in the row that + // was generated by another log value hash (a "foreign entry") has an + // 1 / valuesPerMap chance of yielding a false positive. + // We have testPmLen unique hash entries and a testPmLen long series of entries + // for the same hash. For each of the testPmLen unique hash entries there are + // testPmLen*2-1 foreign entries while for the long series there are testPmLen + // foreign entries. This means that after performing all these filtering runs, + // we have processed 2*testPmLen^2 foreign entries, which given us an estimate + // of how many false positives to expect. + expFalse := testPmCount * testPmLen * testPmLen * 2 / valuesPerMap + if falsePositives < expFalse/2 || falsePositives > expFalse*3/2 { + t.Fatalf("False positive rate out of expected range (got %d, expected %d +-50%%)", falsePositives, expFalse) + } +} From 27652f2d4f3a09e03c479be6aa8a35fdfb4c3103 Mon Sep 17 00:00:00 2001 From: Zsolt Felfoldi Date: Thu, 19 Sep 2024 17:14:27 +0200 Subject: [PATCH 04/23] core/filtermaps: safe concurrent index update and search --- core/filtermaps/filtermaps.go | 65 ++++-------- core/filtermaps/indexer.go | 56 +++++++--- core/filtermaps/matcher.go | 117 ++++++++++++++++++++- core/filtermaps/matcher_backend.go | 158 +++++++++++++++++++++++++++++ eth/api_backend.go | 5 +- eth/backend.go | 1 - eth/filters/filter.go | 41 ++------ eth/filters/filter_system.go | 4 +- internal/ethapi/backend.go | 4 +- 9 files changed, 348 insertions(+), 103 deletions(-) create mode 100644 core/filtermaps/matcher_backend.go diff --git a/core/filtermaps/filtermaps.go b/core/filtermaps/filtermaps.go index 87fec8a0a3aa..8f5e436d9f2b 100644 --- a/core/filtermaps/filtermaps.go +++ b/core/filtermaps/filtermaps.go @@ -1,7 +1,6 @@ package filtermaps import ( - "context" "crypto/sha256" "encoding/binary" "errors" @@ -14,6 +13,7 @@ import ( "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/event" "github.com/ethereum/go-ethereum/log" ) @@ -28,6 +28,14 @@ const ( headCacheSize = 8 // maximum number of recent filter maps cached in memory ) +// blockchain defines functions required by the FilterMaps log indexer. +type blockchain interface { + CurrentBlock() *types.Header + SubscribeChainHeadEvent(ch chan<- core.ChainHeadEvent) event.Subscription + GetHeader(hash common.Hash, number uint64) *types.Header + GetCanonicalHash(number uint64) common.Hash +} + // FilterMaps is the in-memory representation of the log index structure that is // responsible for building and updating the index according to the canonical // chain. @@ -38,10 +46,10 @@ type FilterMaps struct { lock sync.RWMutex db ethdb.Database closeCh chan chan struct{} - filterMapsRange - chain *core.BlockChain - + chain blockchain + matcherSyncCh chan *FilterMapsMatcherBackend + matchers map[*FilterMapsMatcherBackend]struct{} // filterMapCache caches certain filter maps (headCacheSize most recent maps // and one tail map) that are expected to be frequently accessed and modified // while updating the structure. Note that the set of cached maps depends @@ -86,7 +94,7 @@ type filterMapsRange struct { // NewFilterMaps creates a new FilterMaps and starts the indexer in order to keep // the structure in sync with the given blockchain. -func NewFilterMaps(db ethdb.Database, chain *core.BlockChain) *FilterMaps { +func NewFilterMaps(db ethdb.Database, chain blockchain) *FilterMaps { rs, err := rawdb.ReadFilterMapsRange(db) if err != nil { log.Error("Error reading log index range", "error", err) @@ -104,6 +112,8 @@ func NewFilterMaps(db ethdb.Database, chain *core.BlockChain) *FilterMaps { headBlockHash: rs.HeadBlockHash, tailParentHash: rs.TailParentHash, }, + matcherSyncCh: make(chan *FilterMapsMatcherBackend), + matchers: make(map[*FilterMapsMatcherBackend]struct{}), filterMapCache: make(map[uint32]*filterMap), blockPtrCache: lru.NewCache[uint32, uint64](1000), lvPointerCache: lru.NewCache[uint64, uint64](1000), @@ -129,46 +139,6 @@ func (f *FilterMaps) Close() { <-ch } -// FilterMapsMatcherBackend implements MatcherBackend. -type FilterMapsMatcherBackend FilterMaps - -// GetFilterMapRow returns the given row of the given map. If the row is empty -// then a non-nil zero length row is returned. -// Note that the returned slices should not be modified, they should be copied -// on write. -// GetFilterMapRow implements MatcherBackend. -func (ff *FilterMapsMatcherBackend) GetFilterMapRow(ctx context.Context, mapIndex, rowIndex uint32) (FilterRow, error) { - f := (*FilterMaps)(ff) - return f.getFilterMapRow(mapIndex, rowIndex) -} - -// GetBlockLvPointer returns the starting log value index where the log values -// generated by the given block are located. If blockNumber is beyond the current -// head then the first unoccupied log value index is returned. -// GetBlockLvPointer implements MatcherBackend. -func (ff *FilterMapsMatcherBackend) GetBlockLvPointer(ctx context.Context, blockNumber uint64) (uint64, error) { - f := (*FilterMaps)(ff) - f.lock.RLock() - defer f.lock.RUnlock() - - return f.getBlockLvPointer(blockNumber) -} - -// GetLogByLvIndex returns the log at the given log value index. If the index does -// not point to the first log value entry of a log then no log and no error are -// returned as this can happen when the log value index was a false positive. -// Note that this function assumes that the log index structure is consistent -// with the canonical chain at the point where the given log value index points. -// If this is not the case then an invalid result or an error may be returned. -// GetLogByLvIndex implements MatcherBackend. -func (ff *FilterMapsMatcherBackend) GetLogByLvIndex(ctx context.Context, lvIndex uint64) (*types.Log, error) { - f := (*FilterMaps)(ff) - f.lock.RLock() - defer f.lock.RUnlock() - - return f.getLogByLvIndex(lvIndex) -} - // reset un-initializes the FilterMaps structure and removes all related data from // the database. // Note that this function assumes that the read/write lock is being held. @@ -224,6 +194,7 @@ func (f *FilterMaps) setRange(batch ethdb.Batch, newRange filterMapsRange) { } rawdb.WriteFilterMapsRange(batch, rs) f.updateMapCache() + f.updateMatchersValidRange() } // updateMapCache updates the maps covered by the filterMapCache according to the @@ -266,7 +237,7 @@ func (f *FilterMaps) updateMapCache() { // Note that this function assumes that the read lock is being held. func (f *FilterMaps) getLogByLvIndex(lvIndex uint64) (*types.Log, error) { if lvIndex < f.tailLvPointer || lvIndex > f.headLvPointer { - return nil, errors.New("log value index outside available range") + return nil, nil } // find possible block range based on map to block pointers mapIndex := uint32(lvIndex >> logValuesPerMap) @@ -321,7 +292,7 @@ func (f *FilterMaps) getLogByLvIndex(lvIndex uint64) (*types.Log, error) { lvPointer += uint64(len(log.Topics) + 1) } } - return nil, errors.New("log value index not found") + return nil, nil } // getFilterMapRow returns the given row of the given map. If the row is empty diff --git a/core/filtermaps/indexer.go b/core/filtermaps/indexer.go index 0d050923ee3b..a9b1b56136fd 100644 --- a/core/filtermaps/indexer.go +++ b/core/filtermaps/indexer.go @@ -22,30 +22,35 @@ const ( // updateLoop initializes and updates the log index structure according to the // canonical chain. func (f *FilterMaps) updateLoop() { - headEventCh := make(chan core.ChainHeadEvent) - sub := f.chain.SubscribeChainHeadEvent(headEventCh) - defer sub.Unsubscribe() + var ( + headEventCh = make(chan core.ChainHeadEvent) + sub = f.chain.SubscribeChainHeadEvent(headEventCh) + head *types.Header + stop bool + syncMatcher *FilterMapsMatcherBackend + ) - head := f.chain.CurrentBlock() - if head == nil { - select { - case ev := <-headEventCh: - head = ev.Block.Header() - case ch := <-f.closeCh: - close(ch) - return + defer func() { + sub.Unsubscribe() + if syncMatcher != nil { + syncMatcher.synced(head) + syncMatcher = nil } - } - fmr := f.getRange() + }() - var stop bool wait := func() { + if syncMatcher != nil { + syncMatcher.synced(head) + syncMatcher = nil + } if stop { return } select { case ev := <-headEventCh: head = ev.Block.Header() + case syncMatcher = <-f.matcherSyncCh: + head = f.chain.CurrentBlock() case <-time.After(time.Second * 20): // keep updating log index during syncing head = f.chain.CurrentBlock() @@ -54,10 +59,21 @@ func (f *FilterMaps) updateLoop() { stop = true } } + for head == nil { + wait() + if stop { + return + } + } + fmr := f.getRange() for !stop { if !fmr.initialized { f.tryInit(head) + if syncMatcher != nil { + syncMatcher.synced(head) + syncMatcher = nil + } fmr = f.getRange() if !fmr.initialized { wait() @@ -73,12 +89,18 @@ func (f *FilterMaps) updateLoop() { continue } } + if syncMatcher != nil { + syncMatcher.synced(head) + syncMatcher = nil + } // log index head is at latest chain head; process tail blocks if possible f.tryExtendTail(func() bool { // return true if tail processing needs to be stopped select { case ev := <-headEventCh: head = ev.Block.Header() + case syncMatcher = <-f.matcherSyncCh: + head = f.chain.CurrentBlock() case ch := <-f.closeCh: close(ch) stop = true @@ -549,6 +571,9 @@ func (u *updateBatch) makeRevertPoint() (*revertPoint, error) { // number from memory cache or from the database if available. If no such revert // point is available then it returns no result and no error. func (f *FilterMaps) getRevertPoint(blockNumber uint64) (*revertPoint, error) { + f.lock.RLock() + defer f.lock.RUnlock() + if blockNumber > f.headBlockNumber { blockNumber = f.headBlockNumber } @@ -577,6 +602,9 @@ func (f *FilterMaps) getRevertPoint(blockNumber uint64) (*revertPoint, error) { // revertTo reverts the log index to the given revert point. func (f *FilterMaps) revertTo(rp *revertPoint) error { + f.lock.Lock() + defer f.lock.Unlock() + batch := f.db.NewBatch() afterLastMap := uint32((f.headLvPointer + valuesPerMap - 1) >> logValuesPerMap) if rp.mapIndex >= afterLastMap { diff --git a/core/filtermaps/matcher.go b/core/filtermaps/matcher.go index 3bc08494e19a..8e2d236b6b6c 100644 --- a/core/filtermaps/matcher.go +++ b/core/filtermaps/matcher.go @@ -2,6 +2,7 @@ package filtermaps import ( "context" + "errors" "math" "sync" "sync/atomic" @@ -18,13 +19,121 @@ type MatcherBackend interface { GetBlockLvPointer(ctx context.Context, blockNumber uint64) (uint64, error) GetFilterMapRow(ctx context.Context, mapIndex, rowIndex uint32) (FilterRow, error) GetLogByLvIndex(ctx context.Context, lvIndex uint64) (*types.Log, error) + SyncLogIndex(ctx context.Context) (SyncRange, error) + Close() +} + +// SyncRange is returned by MatcherBackend.SyncLogIndex. It contains the latest +// chain head, the indexed range that is currently consistent with the chain +// and the valid range that has not been changed and has been consistent with +// all states of the chain since the previous SyncLogIndex or the creation of +// the matcher backend. +type SyncRange struct { + Head *types.Header + // block range where the index has not changed since the last matcher sync + // and therefore the set of matches found in this region is guaranteed to + // be valid and complete. + Valid bool + FirstValid, LastValid uint64 + // block range indexed according to the given chain head. + Indexed bool + FirstIndexed, LastIndexed uint64 } // GetPotentialMatches returns a list of logs that are potential matches for the -// given filter criteria. Note that the returned list may still contain false -// positives. -//TODO add protection against reorgs during search -func GetPotentialMatches(ctx context.Context, backend MatcherBackend, firstBlock, lastBlock uint64, addresses []common.Address, topics [][]common.Hash) ([]*types.Log, error) { +// given filter criteria. If parts of the requested range are not indexed then +// an error is returned. If parts of the requested range are changed during the +// search process then potentially incorrect logs are discarded and searched +// again, ensuring that the returned results are always consistent with the latest +// state of the chain. +// If firstBlock or lastBlock are bigger than the head block number then they are +// substituted with the latest head of the chain, ensuring that a search until +// the head block is still consistent with the latest canonical chain if a new +// head has been added during the process. +// Note that the returned list may still contain false positives. +func GetPotentialMatches(ctx context.Context, backend MatcherBackend, firstBlock, lastBlock uint64, addresses []common.Address, topics [][]common.Hash) ([]*types.Log, *types.Header, uint64, uint64, error) { + if firstBlock > lastBlock { + return nil, nil, 0, 0, errors.New("invalid search range") + } + // enforce a consistent state before starting the search in order to be able + // to determine valid range later + syncRange, err := backend.SyncLogIndex(ctx) + if err != nil { + return nil, nil, 0, 0, err + } + headBlock := syncRange.Head.Number.Uint64() // Head is guaranteed != nil + // if haveMatches == true then matches correspond to the block number range + // between matchFirst and matchLast + var ( + matches []*types.Log + haveMatches bool + matchFirst, matchLast uint64 + ) + for !haveMatches || (matchLast < lastBlock && matchLast < headBlock) { + // determine range to be searched; for simplicity we only extend the most + // recent end of the existing match set by matching between searchFirst + // and searchLast. + searchFirst, searchLast := firstBlock, lastBlock + if searchFirst > headBlock { + searchFirst = headBlock + } + if searchLast > headBlock { + searchLast = headBlock + } + if haveMatches && matchFirst != searchFirst { + // searchFirst might change if firstBlock > headBlock + matches, haveMatches = nil, false + } + if haveMatches && matchLast >= searchFirst { + searchFirst = matchLast + 1 + } + // check if indexed range covers the requested range + if !syncRange.Indexed || syncRange.FirstIndexed > searchFirst || syncRange.LastIndexed < searchLast { + return nil, nil, 0, 0, errors.New("log index not available for requested range") + } + // search for matches in the required range + newMatches, err := getPotentialMatches(ctx, backend, searchFirst, searchLast, addresses, topics) + if err != nil { + return nil, nil, 0, 0, err + } + // enforce a consistent state again in order to determine the guaranteed + // valid range in which the log index has not been changed since the last + // sync. + syncRange, err = backend.SyncLogIndex(ctx) + if err != nil { + return nil, nil, 0, 0, err + } + headBlock = syncRange.Head.Number.Uint64() + // return with error if the beginning of the recently searched range might + // be invalid due to removed log index + if !syncRange.Valid || syncRange.FirstValid > searchFirst || syncRange.LastValid < searchFirst { + return nil, nil, 0, 0, errors.New("log index not available for requested range") + } + // roll back most recent matches if they are not covered by the guaranteed + // valid range + if syncRange.LastValid < searchLast { + for len(newMatches) > 0 && newMatches[len(newMatches)-1].BlockNumber > syncRange.LastValid { + newMatches = newMatches[:len(newMatches)-1] + } + searchLast = syncRange.LastValid + } + // append new matches to existing ones if the were any + if haveMatches { + matches = append(matches, newMatches...) + } else { + matches, haveMatches = newMatches, true + } + matchLast = searchLast + } + return matches, syncRange.Head, firstBlock, matchLast, nil +} + +// getPotentialMatches returns a list of logs that are potential matches for the +// given filter criteria. If parts of the log index in the searched range are +// missing or changed during the search process then the resulting logs belonging +// to that block range might be missing or incorrect. +// Also note that the returned list may contain false positives. +func getPotentialMatches(ctx context.Context, backend MatcherBackend, firstBlock, lastBlock uint64, addresses []common.Address, topics [][]common.Hash) ([]*types.Log, error) { // find the log value index range to search firstIndex, err := backend.GetBlockLvPointer(ctx, firstBlock) if err != nil { diff --git a/core/filtermaps/matcher_backend.go b/core/filtermaps/matcher_backend.go new file mode 100644 index 000000000000..b340167bb411 --- /dev/null +++ b/core/filtermaps/matcher_backend.go @@ -0,0 +1,158 @@ +package filtermaps + +import ( + "context" + "errors" + + "github.com/ethereum/go-ethereum/core/types" +) + +// FilterMapsMatcherBackend implements MatcherBackend. +type FilterMapsMatcherBackend struct { + f *FilterMaps + valid bool + firstValid, lastValid uint64 + syncCh chan SyncRange +} + +// NewMatcherBackend returns a FilterMapsMatcherBackend after registering it in +// the active matcher set. +// Note that Close should always be called when the matcher is no longer used. +func (f *FilterMaps) NewMatcherBackend() *FilterMapsMatcherBackend { + f.lock.Lock() + defer f.lock.Unlock() + + fm := &FilterMapsMatcherBackend{ + f: f, + valid: f.initialized, + firstValid: f.tailBlockNumber, + lastValid: f.headBlockNumber, + } + f.matchers[fm] = struct{}{} + return fm +} + +// updateMatchersValidRange iterates through active matchers and limits their +// valid range with the current indexed range. This function should be called +// whenever a part of the log index has been removed, before adding new blocks +// to it. +func (f *FilterMaps) updateMatchersValidRange() { + for fm := range f.matchers { + if !f.initialized { + fm.valid = false + } + if !fm.valid { + continue + } + if fm.firstValid < f.tailBlockNumber { + fm.firstValid = f.tailBlockNumber + } + if fm.lastValid > f.headBlockNumber { + fm.lastValid = f.headBlockNumber + } + if fm.firstValid > fm.lastValid { + fm.valid = false + } + } +} + +// Close removes the matcher from the set of active matchers and ensures that +// any SyncLogIndex calls are cancelled. +func (fm *FilterMapsMatcherBackend) Close() { + fm.f.lock.Lock() + defer fm.f.lock.Unlock() + + delete(fm.f.matchers, fm) +} + +// GetFilterMapRow returns the given row of the given map. If the row is empty +// then a non-nil zero length row is returned. +// Note that the returned slices should not be modified, they should be copied +// on write. +// GetFilterMapRow implements MatcherBackend. +func (fm *FilterMapsMatcherBackend) GetFilterMapRow(ctx context.Context, mapIndex, rowIndex uint32) (FilterRow, error) { + return fm.f.getFilterMapRow(mapIndex, rowIndex) +} + +// GetBlockLvPointer returns the starting log value index where the log values +// generated by the given block are located. If blockNumber is beyond the current +// head then the first unoccupied log value index is returned. +// GetBlockLvPointer implements MatcherBackend. +func (fm *FilterMapsMatcherBackend) GetBlockLvPointer(ctx context.Context, blockNumber uint64) (uint64, error) { + fm.f.lock.RLock() + defer fm.f.lock.RUnlock() + + return fm.f.getBlockLvPointer(blockNumber) +} + +// GetLogByLvIndex returns the log at the given log value index. +// Note that this function assumes that the log index structure is consistent +// with the canonical chain at the point where the given log value index points. +// If this is not the case then an invalid result may be returned or certain +// logs might not be returned at all. +// No error is returned though because of an inconsistency between the chain and +// the log index. It is the caller's responsibility to verify this consistency +// using SyncLogIndex and re-process certain blocks if necessary. +// GetLogByLvIndex implements MatcherBackend. +func (fm *FilterMapsMatcherBackend) GetLogByLvIndex(ctx context.Context, lvIndex uint64) (*types.Log, error) { + fm.f.lock.RLock() + defer fm.f.lock.RUnlock() + + return fm.f.getLogByLvIndex(lvIndex) +} + +// synced signals to the matcher that has triggered a synchronisation that it +// has been finished and the log index is consistent with the chain head passed +// as a parameter. +// Note that if the log index head was far behind the chain head then it might not +// be synced up to the given head in a single step. Still, the latest chain head +// should be passed as a parameter and the existing log index should be consistent +// with that chain. +func (fm *FilterMapsMatcherBackend) synced(head *types.Header) { + fm.f.lock.Lock() + defer fm.f.lock.Unlock() + + fm.syncCh <- SyncRange{ + Head: head, + Valid: fm.valid, + FirstValid: fm.firstValid, + LastValid: fm.lastValid, + Indexed: fm.f.initialized, + FirstIndexed: fm.f.tailBlockNumber, + LastIndexed: fm.f.headBlockNumber, + } + fm.valid = fm.f.initialized + fm.firstValid = fm.f.tailBlockNumber + fm.lastValid = fm.f.headBlockNumber + fm.syncCh = nil +} + +// SyncLogIndex ensures that the log index is consistent with the current state +// of the chain (note that it may or may not be actually synced up to the head). +// It blocks until this state is achieved. +// If successful, it returns a SyncRange that contains the latest chain head, +// the indexed range that is currently consistent with the chain and the valid +// range that has not been changed and has been consistent with all states of the +// chain since the previous SyncLogIndex or the creation of the matcher backend. +func (fm *FilterMapsMatcherBackend) SyncLogIndex(ctx context.Context) (SyncRange, error) { + // add SyncRange return channel, ensuring that + syncCh := make(chan SyncRange, 1) + fm.f.lock.Lock() + fm.syncCh = syncCh + fm.f.lock.Unlock() + + select { + case fm.f.matcherSyncCh <- fm: + case <-ctx.Done(): + return SyncRange{}, ctx.Err() + } + select { + case vr := <-syncCh: + if vr.Head == nil { + return SyncRange{}, errors.New("canonical chain head not available") + } + return vr, nil + case <-ctx.Done(): + return SyncRange{}, ctx.Err() + } +} diff --git a/eth/api_backend.go b/eth/api_backend.go index f00b12d40ec2..ce5744017090 100644 --- a/eth/api_backend.go +++ b/eth/api_backend.go @@ -45,7 +45,6 @@ import ( // EthAPIBackend implements ethapi.Backend and tracers.Backend for full nodes type EthAPIBackend struct { - *filtermaps.FilterMapsMatcherBackend extRPCEnabled bool allowUnprotectedTxs bool eth *Ethereum @@ -418,6 +417,10 @@ func (b *EthAPIBackend) ServiceFilter(ctx context.Context, session *bloombits.Ma } } +func (b *EthAPIBackend) NewMatcherBackend() filtermaps.MatcherBackend { + return b.eth.filterMaps.NewMatcherBackend() +} + func (b *EthAPIBackend) Engine() consensus.Engine { return b.eth.engine } diff --git a/eth/backend.go b/eth/backend.go index c8059f3f35c1..902279e4ae55 100644 --- a/eth/backend.go +++ b/eth/backend.go @@ -259,7 +259,6 @@ func New(stack *node.Node, config *ethconfig.Config) (*Ethereum, error) { eth.miner.SetExtra(makeExtraData(config.Miner.ExtraData)) eth.APIBackend = &EthAPIBackend{ - FilterMapsMatcherBackend: (*filtermaps.FilterMapsMatcherBackend)(eth.filterMaps), extRPCEnabled: stack.Config().ExtRPCEnabled(), allowUnprotectedTxs: stack.Config().AllowUnprotectedTxs, eth: eth, diff --git a/eth/filters/filter.go b/eth/filters/filter.go index e3d1adc5fec4..343fa5f0ff05 100644 --- a/eth/filters/filter.go +++ b/eth/filters/filter.go @@ -19,10 +19,8 @@ package filters import ( "context" "errors" - "fmt" + "math" "math/big" - - //"reflect" "slices" "time" @@ -30,6 +28,7 @@ import ( "github.com/ethereum/go-ethereum/core/bloombits" "github.com/ethereum/go-ethereum/core/filtermaps" "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/rpc" ) @@ -120,29 +119,26 @@ func (f *Filter) Logs(ctx context.Context) ([]*types.Log, error) { } resolveSpecial := func(number int64) (int64, error) { - var hdr *types.Header switch number { case rpc.LatestBlockNumber.Int64(), rpc.PendingBlockNumber.Int64(): // we should return head here since we've already captured // that we need to get the pending logs in the pending boolean above - hdr, _ = f.sys.backend.HeaderByNumber(ctx, rpc.LatestBlockNumber) - if hdr == nil { - return 0, errors.New("latest header not found") - } + return math.MaxInt64, nil case rpc.FinalizedBlockNumber.Int64(): - hdr, _ = f.sys.backend.HeaderByNumber(ctx, rpc.FinalizedBlockNumber) + hdr, _ := f.sys.backend.HeaderByNumber(ctx, rpc.FinalizedBlockNumber) if hdr == nil { return 0, errors.New("finalized header not found") } + return hdr.Number.Int64(), nil case rpc.SafeBlockNumber.Int64(): - hdr, _ = f.sys.backend.HeaderByNumber(ctx, rpc.SafeBlockNumber) + hdr, _ := f.sys.backend.HeaderByNumber(ctx, rpc.SafeBlockNumber) if hdr == nil { return 0, errors.New("safe header not found") } + return hdr.Number.Int64(), nil default: return number, nil } - return hdr.Number.Int64(), nil } var err error @@ -155,26 +151,11 @@ func (f *Filter) Logs(ctx context.Context) ([]*types.Log, error) { } start := time.Now() - logs, err := filtermaps.GetPotentialMatches(ctx, f.sys.backend, uint64(f.begin), uint64(f.end), f.addresses, f.topics) + mb := f.sys.backend.NewMatcherBackend() + logs, _, _, _, err := filtermaps.GetPotentialMatches(ctx, mb, uint64(f.begin), uint64(f.end), f.addresses, f.topics) + mb.Close() fmLogs := filterLogs(logs, nil, nil, f.addresses, f.topics) - fmt.Println("filtermaps (new) runtime", time.Since(start), "true matches", len(fmLogs), "false positives", len(logs)-len(fmLogs)) - - //TODO remove - /*f.bbMatchCount = 0 - start = time.Now() - logChan, errChan := f.rangeLogsAsync(ctx) - var bbLogs []*types.Log - loop: - for { - select { - case log := <-logChan: - bbLogs = append(bbLogs, log) - case <-errChan: - break loop - } - } - fmt.Println("bloombits (old) runtime", time.Since(start), "true matches", len(bbLogs), "false positives", f.bbMatchCount-uint64(len(bbLogs))) - fmt.Println("DeepEqual", reflect.DeepEqual(fmLogs, bbLogs))*/ + log.Debug("Finished log search", "run time", time.Since(start), "true matches", len(fmLogs), "false positives", len(logs)-len(fmLogs)) return fmLogs, err } diff --git a/eth/filters/filter_system.go b/eth/filters/filter_system.go index 62f4833607b8..45f03f16d62d 100644 --- a/eth/filters/filter_system.go +++ b/eth/filters/filter_system.go @@ -73,9 +73,7 @@ type Backend interface { BloomStatus() (uint64, uint64) ServiceFilter(ctx context.Context, session *bloombits.MatcherSession) - GetBlockLvPointer(ctx context.Context, blockNumber uint64) (uint64, error) - GetFilterMapRow(ctx context.Context, mapIndex, rowIndex uint32) (filtermaps.FilterRow, error) - GetLogByLvIndex(ctx context.Context, lvIndex uint64) (*types.Log, error) + NewMatcherBackend() filtermaps.MatcherBackend } // FilterSystem holds resources shared by all filters. diff --git a/internal/ethapi/backend.go b/internal/ethapi/backend.go index 2012f3d83520..96bf894b0a74 100644 --- a/internal/ethapi/backend.go +++ b/internal/ethapi/backend.go @@ -99,9 +99,7 @@ type Backend interface { BloomStatus() (uint64, uint64) ServiceFilter(ctx context.Context, session *bloombits.MatcherSession) - GetBlockLvPointer(ctx context.Context, blockNumber uint64) (uint64, error) - GetFilterMapRow(ctx context.Context, mapIndex, rowIndex uint32) (filtermaps.FilterRow, error) - GetLogByLvIndex(ctx context.Context, lvIndex uint64) (*types.Log, error) + NewMatcherBackend() filtermaps.MatcherBackend } func GetAPIs(apiBackend Backend) []rpc.API { From c04968b4c8ba8ef942428af3a185bf645552d796 Mon Sep 17 00:00:00 2001 From: Zsolt Felfoldi Date: Thu, 26 Sep 2024 03:49:01 +0200 Subject: [PATCH 05/23] core/filtermaps: revert to legacy filter in case of "match all" search --- core/filtermaps/matcher.go | 8 ++++++++ eth/filters/filter.go | 24 ++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/core/filtermaps/matcher.go b/core/filtermaps/matcher.go index 8e2d236b6b6c..02fb6ed82d34 100644 --- a/core/filtermaps/matcher.go +++ b/core/filtermaps/matcher.go @@ -11,6 +11,11 @@ import ( "github.com/ethereum/go-ethereum/core/types" ) +// ErrMatchAll is returned when the specified filter matches everything. +// Handling this case in filtermaps would require an extra special case and +// would actually be slower than reverting to legacy filter. +var ErrMatchAll = errors.New("match all patterns not supported") + // MatcherBackend defines the functions required for searching in the log index // data structure. It is currently implemented by FilterMapsMatcherBackend but // once EIP-7745 is implemented and active, these functions can also be trustlessly @@ -198,6 +203,9 @@ func getPotentialMatches(ctx context.Context, backend MatcherBackend, firstBlock } // get the actual logs located at the matching log value indices for _, m := range matches { + if m == nil { + return nil, ErrMatchAll + } mlogs, err := getLogsFromMatches(ctx, backend, firstIndex, lastIndex, m) if err != nil { return logs, err diff --git a/eth/filters/filter.go b/eth/filters/filter.go index 343fa5f0ff05..7b0be8d0244f 100644 --- a/eth/filters/filter.go +++ b/eth/filters/filter.go @@ -154,6 +154,30 @@ func (f *Filter) Logs(ctx context.Context) ([]*types.Log, error) { mb := f.sys.backend.NewMatcherBackend() logs, _, _, _, err := filtermaps.GetPotentialMatches(ctx, mb, uint64(f.begin), uint64(f.end), f.addresses, f.topics) mb.Close() + if err == filtermaps.ErrMatchAll { + // revert to legacy filter + hdr, _ := f.sys.backend.HeaderByNumber(ctx, rpc.LatestBlockNumber) + if hdr == nil { + return nil, errors.New("latest header not found") + } + headNumber := hdr.Number.Int64() + if f.begin > headNumber { + f.begin = headNumber + } + if f.end > headNumber { + f.end = headNumber + } + logChan, errChan := f.rangeLogsAsync(ctx) + var logs []*types.Log + for { + select { + case log := <-logChan: + logs = append(logs, log) + case err := <-errChan: + return logs, err + } + } + } fmLogs := filterLogs(logs, nil, nil, f.addresses, f.topics) log.Debug("Finished log search", "run time", time.Since(start), "true matches", len(fmLogs), "false positives", len(logs)-len(fmLogs)) return fmLogs, err From f187df1d612a0e6ca05e4c57051f8810e05c4908 Mon Sep 17 00:00:00 2001 From: Zsolt Felfoldi Date: Fri, 27 Sep 2024 03:49:41 +0200 Subject: [PATCH 06/23] core/bloombits, eth/filters: removed bloombits --- core/blockchain.go | 2 +- core/bloom_indexer.go | 92 ---- core/bloombits/doc.go | 18 - core/bloombits/generator.go | 98 ---- core/bloombits/generator_test.go | 100 ---- core/bloombits/matcher.go | 649 ----------------------- core/bloombits/matcher_test.go | 292 ---------- core/bloombits/scheduler.go | 181 ------- core/bloombits/scheduler_test.go | 103 ---- core/chain_indexer.go | 523 ------------------ core/chain_indexer_test.go | 246 --------- core/rawdb/accessors_indexes.go | 36 -- core/rawdb/accessors_indexes_test.go | 43 -- core/rawdb/database.go | 6 - core/rawdb/schema.go | 10 - eth/api_backend.go | 12 - eth/backend.go | 43 +- eth/bloombits.go | 74 --- eth/filters/bench_test.go | 189 ------- eth/filters/filter.go | 90 +--- eth/filters/filter_system.go | 4 - eth/filters/filter_system_test.go | 32 -- internal/ethapi/api_test.go | 6 - internal/ethapi/backend.go | 3 - internal/ethapi/transaction_args_test.go | 7 +- params/network_params.go | 8 - 26 files changed, 18 insertions(+), 2849 deletions(-) delete mode 100644 core/bloom_indexer.go delete mode 100644 core/bloombits/doc.go delete mode 100644 core/bloombits/generator.go delete mode 100644 core/bloombits/generator_test.go delete mode 100644 core/bloombits/matcher.go delete mode 100644 core/bloombits/matcher_test.go delete mode 100644 core/bloombits/scheduler.go delete mode 100644 core/bloombits/scheduler_test.go delete mode 100644 core/chain_indexer.go delete mode 100644 core/chain_indexer_test.go delete mode 100644 eth/bloombits.go delete mode 100644 eth/filters/bench_test.go diff --git a/core/blockchain.go b/core/blockchain.go index f7c921fe64fe..74a75e9ed325 100644 --- a/core/blockchain.go +++ b/core/blockchain.go @@ -896,7 +896,7 @@ func (bc *BlockChain) setHeadBeyondRoot(head uint64, time uint64, root common.Ha rawdb.DeleteBody(db, hash, num) rawdb.DeleteReceipts(db, hash, num) } - // Todo(rjl493456442) txlookup, bloombits, etc + // Todo(rjl493456442) txlookup, log index, etc } // If SetHead was only called as a chain reparation method, try to skip // touching the header chain altogether, unless the freezer is broken diff --git a/core/bloom_indexer.go b/core/bloom_indexer.go deleted file mode 100644 index 68a35d811e41..000000000000 --- a/core/bloom_indexer.go +++ /dev/null @@ -1,92 +0,0 @@ -// Copyright 2021 The go-ethereum Authors -// This file is part of the go-ethereum library. -// -// The go-ethereum library is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// The go-ethereum library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with the go-ethereum library. If not, see . - -package core - -import ( - "context" - "time" - - "github.com/ethereum/go-ethereum/common" - "github.com/ethereum/go-ethereum/common/bitutil" - "github.com/ethereum/go-ethereum/core/bloombits" - "github.com/ethereum/go-ethereum/core/rawdb" - "github.com/ethereum/go-ethereum/core/types" - "github.com/ethereum/go-ethereum/ethdb" -) - -const ( - // bloomThrottling is the time to wait between processing two consecutive index - // sections. It's useful during chain upgrades to prevent disk overload. - bloomThrottling = 100 * time.Millisecond -) - -// BloomIndexer implements a core.ChainIndexer, building up a rotated bloom bits index -// for the Ethereum header bloom filters, permitting blazing fast filtering. -type BloomIndexer struct { - size uint64 // section size to generate bloombits for - db ethdb.Database // database instance to write index data and metadata into - gen *bloombits.Generator // generator to rotate the bloom bits crating the bloom index - section uint64 // Section is the section number being processed currently - head common.Hash // Head is the hash of the last header processed -} - -// NewBloomIndexer returns a chain indexer that generates bloom bits data for the -// canonical chain for fast logs filtering. -func NewBloomIndexer(db ethdb.Database, size, confirms uint64) *ChainIndexer { - backend := &BloomIndexer{ - db: db, - size: size, - } - table := rawdb.NewTable(db, string(rawdb.BloomBitsIndexPrefix)) - - return NewChainIndexer(db, table, backend, size, confirms, bloomThrottling, "bloombits") -} - -// Reset implements core.ChainIndexerBackend, starting a new bloombits index -// section. -func (b *BloomIndexer) Reset(ctx context.Context, section uint64, lastSectionHead common.Hash) error { - gen, err := bloombits.NewGenerator(uint(b.size)) - b.gen, b.section, b.head = gen, section, common.Hash{} - return err -} - -// Process implements core.ChainIndexerBackend, adding a new header's bloom into -// the index. -func (b *BloomIndexer) Process(ctx context.Context, header *types.Header) error { - b.gen.AddBloom(uint(header.Number.Uint64()-b.section*b.size), header.Bloom) - b.head = header.Hash() - return nil -} - -// Commit implements core.ChainIndexerBackend, finalizing the bloom section and -// writing it out into the database. -func (b *BloomIndexer) Commit() error { - batch := b.db.NewBatchWithSize((int(b.size) / 8) * types.BloomBitLength) - for i := 0; i < types.BloomBitLength; i++ { - bits, err := b.gen.Bitset(uint(i)) - if err != nil { - return err - } - rawdb.WriteBloomBits(batch, uint(i), b.section, b.head, bitutil.CompressBytes(bits)) - } - return batch.Write() -} - -// Prune returns an empty error since we don't support pruning here. -func (b *BloomIndexer) Prune(threshold uint64) error { - return nil -} diff --git a/core/bloombits/doc.go b/core/bloombits/doc.go deleted file mode 100644 index 3d159e74f775..000000000000 --- a/core/bloombits/doc.go +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright 2017 The go-ethereum Authors -// This file is part of the go-ethereum library. -// -// The go-ethereum library is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// The go-ethereum library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with the go-ethereum library. If not, see . - -// Package bloombits implements bloom filtering on batches of data. -package bloombits diff --git a/core/bloombits/generator.go b/core/bloombits/generator.go deleted file mode 100644 index 646151db0bfd..000000000000 --- a/core/bloombits/generator.go +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright 2017 The go-ethereum Authors -// This file is part of the go-ethereum library. -// -// The go-ethereum library is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// The go-ethereum library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with the go-ethereum library. If not, see . - -package bloombits - -import ( - "errors" - - "github.com/ethereum/go-ethereum/core/types" -) - -var ( - // errSectionOutOfBounds is returned if the user tried to add more bloom filters - // to the batch than available space, or if tries to retrieve above the capacity. - errSectionOutOfBounds = errors.New("section out of bounds") - - // errBloomBitOutOfBounds is returned if the user tried to retrieve specified - // bit bloom above the capacity. - errBloomBitOutOfBounds = errors.New("bloom bit out of bounds") -) - -// Generator takes a number of bloom filters and generates the rotated bloom bits -// to be used for batched filtering. -type Generator struct { - blooms [types.BloomBitLength][]byte // Rotated blooms for per-bit matching - sections uint // Number of sections to batch together - nextSec uint // Next section to set when adding a bloom -} - -// NewGenerator creates a rotated bloom generator that can iteratively fill a -// batched bloom filter's bits. -func NewGenerator(sections uint) (*Generator, error) { - if sections%8 != 0 { - return nil, errors.New("section count not multiple of 8") - } - b := &Generator{sections: sections} - for i := 0; i < types.BloomBitLength; i++ { - b.blooms[i] = make([]byte, sections/8) - } - return b, nil -} - -// AddBloom takes a single bloom filter and sets the corresponding bit column -// in memory accordingly. -func (b *Generator) AddBloom(index uint, bloom types.Bloom) error { - // Make sure we're not adding more bloom filters than our capacity - if b.nextSec >= b.sections { - return errSectionOutOfBounds - } - if b.nextSec != index { - return errors.New("bloom filter with unexpected index") - } - // Rotate the bloom and insert into our collection - byteIndex := b.nextSec / 8 - bitIndex := byte(7 - b.nextSec%8) - for byt := 0; byt < types.BloomByteLength; byt++ { - bloomByte := bloom[types.BloomByteLength-1-byt] - if bloomByte == 0 { - continue - } - base := 8 * byt - b.blooms[base+7][byteIndex] |= ((bloomByte >> 7) & 1) << bitIndex - b.blooms[base+6][byteIndex] |= ((bloomByte >> 6) & 1) << bitIndex - b.blooms[base+5][byteIndex] |= ((bloomByte >> 5) & 1) << bitIndex - b.blooms[base+4][byteIndex] |= ((bloomByte >> 4) & 1) << bitIndex - b.blooms[base+3][byteIndex] |= ((bloomByte >> 3) & 1) << bitIndex - b.blooms[base+2][byteIndex] |= ((bloomByte >> 2) & 1) << bitIndex - b.blooms[base+1][byteIndex] |= ((bloomByte >> 1) & 1) << bitIndex - b.blooms[base][byteIndex] |= (bloomByte & 1) << bitIndex - } - b.nextSec++ - return nil -} - -// Bitset returns the bit vector belonging to the given bit index after all -// blooms have been added. -func (b *Generator) Bitset(idx uint) ([]byte, error) { - if b.nextSec != b.sections { - return nil, errors.New("bloom not fully generated yet") - } - if idx >= types.BloomBitLength { - return nil, errBloomBitOutOfBounds - } - return b.blooms[idx], nil -} diff --git a/core/bloombits/generator_test.go b/core/bloombits/generator_test.go deleted file mode 100644 index ac1aee0b2524..000000000000 --- a/core/bloombits/generator_test.go +++ /dev/null @@ -1,100 +0,0 @@ -// Copyright 2017 The go-ethereum Authors -// This file is part of the go-ethereum library. -// -// The go-ethereum library is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// The go-ethereum library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with the go-ethereum library. If not, see . - -package bloombits - -import ( - "bytes" - crand "crypto/rand" - "math/rand" - "testing" - - "github.com/ethereum/go-ethereum/core/types" -) - -// Tests that batched bloom bits are correctly rotated from the input bloom -// filters. -func TestGenerator(t *testing.T) { - // Generate the input and the rotated output - var input, output [types.BloomBitLength][types.BloomByteLength]byte - - for i := 0; i < types.BloomBitLength; i++ { - for j := 0; j < types.BloomBitLength; j++ { - bit := byte(rand.Int() % 2) - - input[i][j/8] |= bit << byte(7-j%8) - output[types.BloomBitLength-1-j][i/8] |= bit << byte(7-i%8) - } - } - // Crunch the input through the generator and verify the result - gen, err := NewGenerator(types.BloomBitLength) - if err != nil { - t.Fatalf("failed to create bloombit generator: %v", err) - } - for i, bloom := range input { - if err := gen.AddBloom(uint(i), bloom); err != nil { - t.Fatalf("bloom %d: failed to add: %v", i, err) - } - } - for i, want := range output { - have, err := gen.Bitset(uint(i)) - if err != nil { - t.Fatalf("output %d: failed to retrieve bits: %v", i, err) - } - if !bytes.Equal(have, want[:]) { - t.Errorf("output %d: bit vector mismatch have %x, want %x", i, have, want) - } - } -} - -func BenchmarkGenerator(b *testing.B) { - var input [types.BloomBitLength][types.BloomByteLength]byte - b.Run("empty", func(b *testing.B) { - b.ReportAllocs() - b.ResetTimer() - for i := 0; i < b.N; i++ { - // Crunch the input through the generator and verify the result - gen, err := NewGenerator(types.BloomBitLength) - if err != nil { - b.Fatalf("failed to create bloombit generator: %v", err) - } - for j, bloom := range &input { - if err := gen.AddBloom(uint(j), bloom); err != nil { - b.Fatalf("bloom %d: failed to add: %v", i, err) - } - } - } - }) - for i := 0; i < types.BloomBitLength; i++ { - crand.Read(input[i][:]) - } - b.Run("random", func(b *testing.B) { - b.ReportAllocs() - b.ResetTimer() - for i := 0; i < b.N; i++ { - // Crunch the input through the generator and verify the result - gen, err := NewGenerator(types.BloomBitLength) - if err != nil { - b.Fatalf("failed to create bloombit generator: %v", err) - } - for j, bloom := range &input { - if err := gen.AddBloom(uint(j), bloom); err != nil { - b.Fatalf("bloom %d: failed to add: %v", i, err) - } - } - } - }) -} diff --git a/core/bloombits/matcher.go b/core/bloombits/matcher.go deleted file mode 100644 index 486581fe23d7..000000000000 --- a/core/bloombits/matcher.go +++ /dev/null @@ -1,649 +0,0 @@ -// Copyright 2017 The go-ethereum Authors -// This file is part of the go-ethereum library. -// -// The go-ethereum library is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// The go-ethereum library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with the go-ethereum library. If not, see . - -package bloombits - -import ( - "bytes" - "context" - "errors" - "math" - "sort" - "sync" - "sync/atomic" - "time" - - "github.com/ethereum/go-ethereum/common/bitutil" - "github.com/ethereum/go-ethereum/crypto" -) - -// bloomIndexes represents the bit indexes inside the bloom filter that belong -// to some key. -type bloomIndexes [3]uint - -// calcBloomIndexes returns the bloom filter bit indexes belonging to the given key. -func calcBloomIndexes(b []byte) bloomIndexes { - b = crypto.Keccak256(b) - - var idxs bloomIndexes - for i := 0; i < len(idxs); i++ { - idxs[i] = (uint(b[2*i])<<8)&2047 + uint(b[2*i+1]) - } - return idxs -} - -// partialMatches with a non-nil vector represents a section in which some sub- -// matchers have already found potential matches. Subsequent sub-matchers will -// binary AND their matches with this vector. If vector is nil, it represents a -// section to be processed by the first sub-matcher. -type partialMatches struct { - section uint64 - bitset []byte -} - -// Retrieval represents a request for retrieval task assignments for a given -// bit with the given number of fetch elements, or a response for such a request. -// It can also have the actual results set to be used as a delivery data struct. -// -// The context and error fields are used by the light client to terminate matching -// early if an error is encountered on some path of the pipeline. -type Retrieval struct { - Bit uint - Sections []uint64 - Bitsets [][]byte - - Context context.Context - Error error -} - -// Matcher is a pipelined system of schedulers and logic matchers which perform -// binary AND/OR operations on the bit-streams, creating a stream of potential -// blocks to inspect for data content. -type Matcher struct { - sectionSize uint64 // Size of the data batches to filter on - - filters [][]bloomIndexes // Filter the system is matching for - schedulers map[uint]*scheduler // Retrieval schedulers for loading bloom bits - - retrievers chan chan uint // Retriever processes waiting for bit allocations - counters chan chan uint // Retriever processes waiting for task count reports - retrievals chan chan *Retrieval // Retriever processes waiting for task allocations - deliveries chan *Retrieval // Retriever processes waiting for task response deliveries - - running atomic.Bool // Atomic flag whether a session is live or not -} - -// NewMatcher creates a new pipeline for retrieving bloom bit streams and doing -// address and topic filtering on them. Setting a filter component to `nil` is -// allowed and will result in that filter rule being skipped (OR 0x11...1). -func NewMatcher(sectionSize uint64, filters [][][]byte) *Matcher { - // Create the matcher instance - m := &Matcher{ - sectionSize: sectionSize, - schedulers: make(map[uint]*scheduler), - retrievers: make(chan chan uint), - counters: make(chan chan uint), - retrievals: make(chan chan *Retrieval), - deliveries: make(chan *Retrieval), - } - // Calculate the bloom bit indexes for the groups we're interested in - m.filters = nil - - for _, filter := range filters { - // Gather the bit indexes of the filter rule, special casing the nil filter - if len(filter) == 0 { - continue - } - bloomBits := make([]bloomIndexes, len(filter)) - for i, clause := range filter { - if clause == nil { - bloomBits = nil - break - } - bloomBits[i] = calcBloomIndexes(clause) - } - // Accumulate the filter rules if no nil rule was within - if bloomBits != nil { - m.filters = append(m.filters, bloomBits) - } - } - // For every bit, create a scheduler to load/download the bit vectors - for _, bloomIndexLists := range m.filters { - for _, bloomIndexList := range bloomIndexLists { - for _, bloomIndex := range bloomIndexList { - m.addScheduler(bloomIndex) - } - } - } - return m -} - -// addScheduler adds a bit stream retrieval scheduler for the given bit index if -// it has not existed before. If the bit is already selected for filtering, the -// existing scheduler can be used. -func (m *Matcher) addScheduler(idx uint) { - if _, ok := m.schedulers[idx]; ok { - return - } - m.schedulers[idx] = newScheduler(idx) -} - -// Start starts the matching process and returns a stream of bloom matches in -// a given range of blocks. If there are no more matches in the range, the result -// channel is closed. -func (m *Matcher) Start(ctx context.Context, begin, end uint64, results chan uint64) (*MatcherSession, error) { - // Make sure we're not creating concurrent sessions - if m.running.Swap(true) { - return nil, errors.New("matcher already running") - } - defer m.running.Store(false) - - // Initiate a new matching round - session := &MatcherSession{ - matcher: m, - quit: make(chan struct{}), - ctx: ctx, - } - for _, scheduler := range m.schedulers { - scheduler.reset() - } - sink := m.run(begin, end, cap(results), session) - - // Read the output from the result sink and deliver to the user - session.pend.Add(1) - go func() { - defer session.pend.Done() - defer close(results) - - for { - select { - case <-session.quit: - return - - case res, ok := <-sink: - // New match result found - if !ok { - return - } - // Calculate the first and last blocks of the section - sectionStart := res.section * m.sectionSize - - first := sectionStart - if begin > first { - first = begin - } - last := sectionStart + m.sectionSize - 1 - if end < last { - last = end - } - // Iterate over all the blocks in the section and return the matching ones - for i := first; i <= last; i++ { - // Skip the entire byte if no matches are found inside (and we're processing an entire byte!) - next := res.bitset[(i-sectionStart)/8] - if next == 0 { - if i%8 == 0 { - i += 7 - } - continue - } - // Some bit it set, do the actual submatching - if bit := 7 - i%8; next&(1<= req.section }) - requests[req.bit] = append(queue[:index], append([]uint64{req.section}, queue[index:]...)...) - - // If it's a new bit and we have waiting fetchers, allocate to them - if len(queue) == 0 { - assign(req.bit) - } - - case fetcher := <-retrievers: - // New retriever arrived, find the lowest section-ed bit to assign - bit, best := uint(0), uint64(math.MaxUint64) - for idx := range unallocs { - if requests[idx][0] < best { - bit, best = idx, requests[idx][0] - } - } - // Stop tracking this bit (and alloc notifications if no more work is available) - delete(unallocs, bit) - if len(unallocs) == 0 { - retrievers = nil - } - allocs++ - fetcher <- bit - - case fetcher := <-m.counters: - // New task count request arrives, return number of items - fetcher <- uint(len(requests[<-fetcher])) - - case fetcher := <-m.retrievals: - // New fetcher waiting for tasks to retrieve, assign - task := <-fetcher - if want := len(task.Sections); want >= len(requests[task.Bit]) { - task.Sections = requests[task.Bit] - delete(requests, task.Bit) - } else { - task.Sections = append(task.Sections[:0], requests[task.Bit][:want]...) - requests[task.Bit] = append(requests[task.Bit][:0], requests[task.Bit][want:]...) - } - fetcher <- task - - // If anything was left unallocated, try to assign to someone else - if len(requests[task.Bit]) > 0 { - assign(task.Bit) - } - - case result := <-m.deliveries: - // New retrieval task response from fetcher, split out missing sections and - // deliver complete ones - var ( - sections = make([]uint64, 0, len(result.Sections)) - bitsets = make([][]byte, 0, len(result.Bitsets)) - missing = make([]uint64, 0, len(result.Sections)) - ) - for i, bitset := range result.Bitsets { - if len(bitset) == 0 { - missing = append(missing, result.Sections[i]) - continue - } - sections = append(sections, result.Sections[i]) - bitsets = append(bitsets, bitset) - } - m.schedulers[result.Bit].deliver(sections, bitsets) - allocs-- - - // Reschedule missing sections and allocate bit if newly available - if len(missing) > 0 { - queue := requests[result.Bit] - for _, section := range missing { - index := sort.Search(len(queue), func(i int) bool { return queue[i] >= section }) - queue = append(queue[:index], append([]uint64{section}, queue[index:]...)...) - } - requests[result.Bit] = queue - - if len(queue) == len(missing) { - assign(result.Bit) - } - } - - // End the session when all pending deliveries have arrived. - if shutdown == nil && allocs == 0 { - return - } - } - } -} - -// MatcherSession is returned by a started matcher to be used as a terminator -// for the actively running matching operation. -type MatcherSession struct { - matcher *Matcher - - closer sync.Once // Sync object to ensure we only ever close once - quit chan struct{} // Quit channel to request pipeline termination - - ctx context.Context // Context used by the light client to abort filtering - err error // Global error to track retrieval failures deep in the chain - errLock sync.Mutex - - pend sync.WaitGroup -} - -// Close stops the matching process and waits for all subprocesses to terminate -// before returning. The timeout may be used for graceful shutdown, allowing the -// currently running retrievals to complete before this time. -func (s *MatcherSession) Close() { - s.closer.Do(func() { - // Signal termination and wait for all goroutines to tear down - close(s.quit) - s.pend.Wait() - }) -} - -// Error returns any failure encountered during the matching session. -func (s *MatcherSession) Error() error { - s.errLock.Lock() - defer s.errLock.Unlock() - - return s.err -} - -// allocateRetrieval assigns a bloom bit index to a client process that can either -// immediately request and fetch the section contents assigned to this bit or wait -// a little while for more sections to be requested. -func (s *MatcherSession) allocateRetrieval() (uint, bool) { - fetcher := make(chan uint) - - select { - case <-s.quit: - return 0, false - case s.matcher.retrievers <- fetcher: - bit, ok := <-fetcher - return bit, ok - } -} - -// pendingSections returns the number of pending section retrievals belonging to -// the given bloom bit index. -func (s *MatcherSession) pendingSections(bit uint) int { - fetcher := make(chan uint) - - select { - case <-s.quit: - return 0 - case s.matcher.counters <- fetcher: - fetcher <- bit - return int(<-fetcher) - } -} - -// allocateSections assigns all or part of an already allocated bit-task queue -// to the requesting process. -func (s *MatcherSession) allocateSections(bit uint, count int) []uint64 { - fetcher := make(chan *Retrieval) - - select { - case <-s.quit: - return nil - case s.matcher.retrievals <- fetcher: - task := &Retrieval{ - Bit: bit, - Sections: make([]uint64, count), - } - fetcher <- task - return (<-fetcher).Sections - } -} - -// deliverSections delivers a batch of section bit-vectors for a specific bloom -// bit index to be injected into the processing pipeline. -func (s *MatcherSession) deliverSections(bit uint, sections []uint64, bitsets [][]byte) { - s.matcher.deliveries <- &Retrieval{Bit: bit, Sections: sections, Bitsets: bitsets} -} - -// Multiplex polls the matcher session for retrieval tasks and multiplexes it into -// the requested retrieval queue to be serviced together with other sessions. -// -// This method will block for the lifetime of the session. Even after termination -// of the session, any request in-flight need to be responded to! Empty responses -// are fine though in that case. -func (s *MatcherSession) Multiplex(batch int, wait time.Duration, mux chan chan *Retrieval) { - waitTimer := time.NewTimer(wait) - defer waitTimer.Stop() - - for { - // Allocate a new bloom bit index to retrieve data for, stopping when done - bit, ok := s.allocateRetrieval() - if !ok { - return - } - // Bit allocated, throttle a bit if we're below our batch limit - if s.pendingSections(bit) < batch { - waitTimer.Reset(wait) - select { - case <-s.quit: - // Session terminating, we can't meaningfully service, abort - s.allocateSections(bit, 0) - s.deliverSections(bit, []uint64{}, [][]byte{}) - return - - case <-waitTimer.C: - // Throttling up, fetch whatever is available - } - } - // Allocate as much as we can handle and request servicing - sections := s.allocateSections(bit, batch) - request := make(chan *Retrieval) - - select { - case <-s.quit: - // Session terminating, we can't meaningfully service, abort - s.deliverSections(bit, sections, make([][]byte, len(sections))) - return - - case mux <- request: - // Retrieval accepted, something must arrive before we're aborting - request <- &Retrieval{Bit: bit, Sections: sections, Context: s.ctx} - - result := <-request - - // Deliver a result before s.Close() to avoid a deadlock - s.deliverSections(result.Bit, result.Sections, result.Bitsets) - - if result.Error != nil { - s.errLock.Lock() - s.err = result.Error - s.errLock.Unlock() - s.Close() - } - } - } -} diff --git a/core/bloombits/matcher_test.go b/core/bloombits/matcher_test.go deleted file mode 100644 index 7f3d5f279ca3..000000000000 --- a/core/bloombits/matcher_test.go +++ /dev/null @@ -1,292 +0,0 @@ -// Copyright 2017 The go-ethereum Authors -// This file is part of the go-ethereum library. -// -// The go-ethereum library is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// The go-ethereum library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with the go-ethereum library. If not, see . - -package bloombits - -import ( - "context" - "math/rand" - "sync/atomic" - "testing" - "time" - - "github.com/ethereum/go-ethereum/common" -) - -const testSectionSize = 4096 - -// Tests that wildcard filter rules (nil) can be specified and are handled well. -func TestMatcherWildcards(t *testing.T) { - t.Parallel() - matcher := NewMatcher(testSectionSize, [][][]byte{ - {common.Address{}.Bytes(), common.Address{0x01}.Bytes()}, // Default address is not a wildcard - {common.Hash{}.Bytes(), common.Hash{0x01}.Bytes()}, // Default hash is not a wildcard - {common.Hash{0x01}.Bytes()}, // Plain rule, sanity check - {common.Hash{0x01}.Bytes(), nil}, // Wildcard suffix, drop rule - {nil, common.Hash{0x01}.Bytes()}, // Wildcard prefix, drop rule - {nil, nil}, // Wildcard combo, drop rule - {}, // Inited wildcard rule, drop rule - nil, // Proper wildcard rule, drop rule - }) - if len(matcher.filters) != 3 { - t.Fatalf("filter system size mismatch: have %d, want %d", len(matcher.filters), 3) - } - if len(matcher.filters[0]) != 2 { - t.Fatalf("address clause size mismatch: have %d, want %d", len(matcher.filters[0]), 2) - } - if len(matcher.filters[1]) != 2 { - t.Fatalf("combo topic clause size mismatch: have %d, want %d", len(matcher.filters[1]), 2) - } - if len(matcher.filters[2]) != 1 { - t.Fatalf("singletone topic clause size mismatch: have %d, want %d", len(matcher.filters[2]), 1) - } -} - -// Tests the matcher pipeline on a single continuous workflow without interrupts. -func TestMatcherContinuous(t *testing.T) { - t.Parallel() - testMatcherDiffBatches(t, [][]bloomIndexes{{{10, 20, 30}}}, 0, 100000, false, 75) - testMatcherDiffBatches(t, [][]bloomIndexes{{{32, 3125, 100}}, {{40, 50, 10}}}, 0, 100000, false, 81) - testMatcherDiffBatches(t, [][]bloomIndexes{{{4, 8, 11}, {7, 8, 17}}, {{9, 9, 12}, {15, 20, 13}}, {{18, 15, 15}, {12, 10, 4}}}, 0, 10000, false, 36) -} - -// Tests the matcher pipeline on a constantly interrupted and resumed work pattern -// with the aim of ensuring data items are requested only once. -func TestMatcherIntermittent(t *testing.T) { - t.Parallel() - testMatcherDiffBatches(t, [][]bloomIndexes{{{10, 20, 30}}}, 0, 100000, true, 75) - testMatcherDiffBatches(t, [][]bloomIndexes{{{32, 3125, 100}}, {{40, 50, 10}}}, 0, 100000, true, 81) - testMatcherDiffBatches(t, [][]bloomIndexes{{{4, 8, 11}, {7, 8, 17}}, {{9, 9, 12}, {15, 20, 13}}, {{18, 15, 15}, {12, 10, 4}}}, 0, 10000, true, 36) -} - -// Tests the matcher pipeline on random input to hopefully catch anomalies. -func TestMatcherRandom(t *testing.T) { - t.Parallel() - for i := 0; i < 10; i++ { - testMatcherBothModes(t, makeRandomIndexes([]int{1}, 50), 0, 10000, 0) - testMatcherBothModes(t, makeRandomIndexes([]int{3}, 50), 0, 10000, 0) - testMatcherBothModes(t, makeRandomIndexes([]int{2, 2, 2}, 20), 0, 10000, 0) - testMatcherBothModes(t, makeRandomIndexes([]int{5, 5, 5}, 50), 0, 10000, 0) - testMatcherBothModes(t, makeRandomIndexes([]int{4, 4, 4}, 20), 0, 10000, 0) - } -} - -// Tests that the matcher can properly find matches if the starting block is -// shifted from a multiple of 8. This is needed to cover an optimisation with -// bitset matching https://github.com/ethereum/go-ethereum/issues/15309. -func TestMatcherShifted(t *testing.T) { - t.Parallel() - // Block 0 always matches in the tests, skip ahead of first 8 blocks with the - // start to get a potential zero byte in the matcher bitset. - - // To keep the second bitset byte zero, the filter must only match for the first - // time in block 16, so doing an all-16 bit filter should suffice. - - // To keep the starting block non divisible by 8, block number 9 is the first - // that would introduce a shift and not match block 0. - testMatcherBothModes(t, [][]bloomIndexes{{{16, 16, 16}}}, 9, 64, 0) -} - -// Tests that matching on everything doesn't crash (special case internally). -func TestWildcardMatcher(t *testing.T) { - t.Parallel() - testMatcherBothModes(t, nil, 0, 10000, 0) -} - -// makeRandomIndexes generates a random filter system, composed of multiple filter -// criteria, each having one bloom list component for the address and arbitrarily -// many topic bloom list components. -func makeRandomIndexes(lengths []int, max int) [][]bloomIndexes { - res := make([][]bloomIndexes, len(lengths)) - for i, topics := range lengths { - res[i] = make([]bloomIndexes, topics) - for j := 0; j < topics; j++ { - for k := 0; k < len(res[i][j]); k++ { - res[i][j][k] = uint(rand.Intn(max-1) + 2) - } - } - } - return res -} - -// testMatcherDiffBatches runs the given matches test in single-delivery and also -// in batches delivery mode, verifying that all kinds of deliveries are handled -// correctly within. -func testMatcherDiffBatches(t *testing.T, filter [][]bloomIndexes, start, blocks uint64, intermittent bool, retrievals uint32) { - singleton := testMatcher(t, filter, start, blocks, intermittent, retrievals, 1) - batched := testMatcher(t, filter, start, blocks, intermittent, retrievals, 16) - - if singleton != batched { - t.Errorf("filter = %v blocks = %v intermittent = %v: request count mismatch, %v in singleton vs. %v in batched mode", filter, blocks, intermittent, singleton, batched) - } -} - -// testMatcherBothModes runs the given matcher test in both continuous as well as -// in intermittent mode, verifying that the request counts match each other. -func testMatcherBothModes(t *testing.T, filter [][]bloomIndexes, start, blocks uint64, retrievals uint32) { - continuous := testMatcher(t, filter, start, blocks, false, retrievals, 16) - intermittent := testMatcher(t, filter, start, blocks, true, retrievals, 16) - - if continuous != intermittent { - t.Errorf("filter = %v blocks = %v: request count mismatch, %v in continuous vs. %v in intermittent mode", filter, blocks, continuous, intermittent) - } -} - -// testMatcher is a generic tester to run the given matcher test and return the -// number of requests made for cross validation between different modes. -func testMatcher(t *testing.T, filter [][]bloomIndexes, start, blocks uint64, intermittent bool, retrievals uint32, maxReqCount int) uint32 { - // Create a new matcher an simulate our explicit random bitsets - matcher := NewMatcher(testSectionSize, nil) - matcher.filters = filter - - for _, rule := range filter { - for _, topic := range rule { - for _, bit := range topic { - matcher.addScheduler(bit) - } - } - } - // Track the number of retrieval requests made - var requested atomic.Uint32 - - // Start the matching session for the filter and the retriever goroutines - quit := make(chan struct{}) - matches := make(chan uint64, 16) - - session, err := matcher.Start(context.Background(), start, blocks-1, matches) - if err != nil { - t.Fatalf("failed to stat matcher session: %v", err) - } - startRetrievers(session, quit, &requested, maxReqCount) - - // Iterate over all the blocks and verify that the pipeline produces the correct matches - for i := start; i < blocks; i++ { - if expMatch3(filter, i) { - match, ok := <-matches - if !ok { - t.Errorf("filter = %v blocks = %v intermittent = %v: expected #%v, results channel closed", filter, blocks, intermittent, i) - return 0 - } - if match != i { - t.Errorf("filter = %v blocks = %v intermittent = %v: expected #%v, got #%v", filter, blocks, intermittent, i, match) - } - // If we're testing intermittent mode, abort and restart the pipeline - if intermittent { - session.Close() - close(quit) - - quit = make(chan struct{}) - matches = make(chan uint64, 16) - - session, err = matcher.Start(context.Background(), i+1, blocks-1, matches) - if err != nil { - t.Fatalf("failed to stat matcher session: %v", err) - } - startRetrievers(session, quit, &requested, maxReqCount) - } - } - } - // Ensure the result channel is torn down after the last block - match, ok := <-matches - if ok { - t.Errorf("filter = %v blocks = %v intermittent = %v: expected closed channel, got #%v", filter, blocks, intermittent, match) - } - // Clean up the session and ensure we match the expected retrieval count - session.Close() - close(quit) - - if retrievals != 0 && requested.Load() != retrievals { - t.Errorf("filter = %v blocks = %v intermittent = %v: request count mismatch, have #%v, want #%v", filter, blocks, intermittent, requested.Load(), retrievals) - } - return requested.Load() -} - -// startRetrievers starts a batch of goroutines listening for section requests -// and serving them. -func startRetrievers(session *MatcherSession, quit chan struct{}, retrievals *atomic.Uint32, batch int) { - requests := make(chan chan *Retrieval) - - for i := 0; i < 10; i++ { - // Start a multiplexer to test multiple threaded execution - go session.Multiplex(batch, 100*time.Microsecond, requests) - - // Start a services to match the above multiplexer - go func() { - for { - // Wait for a service request or a shutdown - select { - case <-quit: - return - - case request := <-requests: - task := <-request - - task.Bitsets = make([][]byte, len(task.Sections)) - for i, section := range task.Sections { - if rand.Int()%4 != 0 { // Handle occasional missing deliveries - task.Bitsets[i] = generateBitset(task.Bit, section) - retrievals.Add(1) - } - } - request <- task - } - } - }() - } -} - -// generateBitset generates the rotated bitset for the given bloom bit and section -// numbers. -func generateBitset(bit uint, section uint64) []byte { - bitset := make([]byte, testSectionSize/8) - for i := 0; i < len(bitset); i++ { - for b := 0; b < 8; b++ { - blockIdx := section*testSectionSize + uint64(i*8+b) - bitset[i] += bitset[i] - if (blockIdx % uint64(bit)) == 0 { - bitset[i]++ - } - } - } - return bitset -} - -func expMatch1(filter bloomIndexes, i uint64) bool { - for _, ii := range filter { - if (i % uint64(ii)) != 0 { - return false - } - } - return true -} - -func expMatch2(filter []bloomIndexes, i uint64) bool { - for _, ii := range filter { - if expMatch1(ii, i) { - return true - } - } - return false -} - -func expMatch3(filter [][]bloomIndexes, i uint64) bool { - for _, ii := range filter { - if !expMatch2(ii, i) { - return false - } - } - return true -} diff --git a/core/bloombits/scheduler.go b/core/bloombits/scheduler.go deleted file mode 100644 index a523bc55ab49..000000000000 --- a/core/bloombits/scheduler.go +++ /dev/null @@ -1,181 +0,0 @@ -// Copyright 2017 The go-ethereum Authors -// This file is part of the go-ethereum library. -// -// The go-ethereum library is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// The go-ethereum library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with the go-ethereum library. If not, see . - -package bloombits - -import ( - "sync" -) - -// request represents a bloom retrieval task to prioritize and pull from the local -// database or remotely from the network. -type request struct { - section uint64 // Section index to retrieve the bit-vector from - bit uint // Bit index within the section to retrieve the vector of -} - -// response represents the state of a requested bit-vector through a scheduler. -type response struct { - cached []byte // Cached bits to dedup multiple requests - done chan struct{} // Channel to allow waiting for completion -} - -// scheduler handles the scheduling of bloom-filter retrieval operations for -// entire section-batches belonging to a single bloom bit. Beside scheduling the -// retrieval operations, this struct also deduplicates the requests and caches -// the results to minimize network/database overhead even in complex filtering -// scenarios. -type scheduler struct { - bit uint // Index of the bit in the bloom filter this scheduler is responsible for - responses map[uint64]*response // Currently pending retrieval requests or already cached responses - lock sync.Mutex // Lock protecting the responses from concurrent access -} - -// newScheduler creates a new bloom-filter retrieval scheduler for a specific -// bit index. -func newScheduler(idx uint) *scheduler { - return &scheduler{ - bit: idx, - responses: make(map[uint64]*response), - } -} - -// run creates a retrieval pipeline, receiving section indexes from sections and -// returning the results in the same order through the done channel. Concurrent -// runs of the same scheduler are allowed, leading to retrieval task deduplication. -func (s *scheduler) run(sections chan uint64, dist chan *request, done chan []byte, quit chan struct{}, wg *sync.WaitGroup) { - // Create a forwarder channel between requests and responses of the same size as - // the distribution channel (since that will block the pipeline anyway). - pend := make(chan uint64, cap(dist)) - - // Start the pipeline schedulers to forward between user -> distributor -> user - wg.Add(2) - go s.scheduleRequests(sections, dist, pend, quit, wg) - go s.scheduleDeliveries(pend, done, quit, wg) -} - -// reset cleans up any leftovers from previous runs. This is required before a -// restart to ensure the no previously requested but never delivered state will -// cause a lockup. -func (s *scheduler) reset() { - s.lock.Lock() - defer s.lock.Unlock() - - for section, res := range s.responses { - if res.cached == nil { - delete(s.responses, section) - } - } -} - -// scheduleRequests reads section retrieval requests from the input channel, -// deduplicates the stream and pushes unique retrieval tasks into the distribution -// channel for a database or network layer to honour. -func (s *scheduler) scheduleRequests(reqs chan uint64, dist chan *request, pend chan uint64, quit chan struct{}, wg *sync.WaitGroup) { - // Clean up the goroutine and pipeline when done - defer wg.Done() - defer close(pend) - - // Keep reading and scheduling section requests - for { - select { - case <-quit: - return - - case section, ok := <-reqs: - // New section retrieval requested - if !ok { - return - } - // Deduplicate retrieval requests - unique := false - - s.lock.Lock() - if s.responses[section] == nil { - s.responses[section] = &response{ - done: make(chan struct{}), - } - unique = true - } - s.lock.Unlock() - - // Schedule the section for retrieval and notify the deliverer to expect this section - if unique { - select { - case <-quit: - return - case dist <- &request{bit: s.bit, section: section}: - } - } - select { - case <-quit: - return - case pend <- section: - } - } - } -} - -// scheduleDeliveries reads section acceptance notifications and waits for them -// to be delivered, pushing them into the output data buffer. -func (s *scheduler) scheduleDeliveries(pend chan uint64, done chan []byte, quit chan struct{}, wg *sync.WaitGroup) { - // Clean up the goroutine and pipeline when done - defer wg.Done() - defer close(done) - - // Keep reading notifications and scheduling deliveries - for { - select { - case <-quit: - return - - case idx, ok := <-pend: - // New section retrieval pending - if !ok { - return - } - // Wait until the request is honoured - s.lock.Lock() - res := s.responses[idx] - s.lock.Unlock() - - select { - case <-quit: - return - case <-res.done: - } - // Deliver the result - select { - case <-quit: - return - case done <- res.cached: - } - } - } -} - -// deliver is called by the request distributor when a reply to a request arrives. -func (s *scheduler) deliver(sections []uint64, data [][]byte) { - s.lock.Lock() - defer s.lock.Unlock() - - for i, section := range sections { - if res := s.responses[section]; res != nil && res.cached == nil { // Avoid non-requests and double deliveries - res.cached = data[i] - close(res.done) - } - } -} diff --git a/core/bloombits/scheduler_test.go b/core/bloombits/scheduler_test.go deleted file mode 100644 index dcaaa915258a..000000000000 --- a/core/bloombits/scheduler_test.go +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright 2017 The go-ethereum Authors -// This file is part of the go-ethereum library. -// -// The go-ethereum library is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// The go-ethereum library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with the go-ethereum library. If not, see . - -package bloombits - -import ( - "bytes" - "math/big" - "sync" - "sync/atomic" - "testing" -) - -// Tests that the scheduler can deduplicate and forward retrieval requests to -// underlying fetchers and serve responses back, irrelevant of the concurrency -// of the requesting clients or serving data fetchers. -func TestSchedulerSingleClientSingleFetcher(t *testing.T) { testScheduler(t, 1, 1, 5000) } -func TestSchedulerSingleClientMultiFetcher(t *testing.T) { testScheduler(t, 1, 10, 5000) } -func TestSchedulerMultiClientSingleFetcher(t *testing.T) { testScheduler(t, 10, 1, 5000) } -func TestSchedulerMultiClientMultiFetcher(t *testing.T) { testScheduler(t, 10, 10, 5000) } - -func testScheduler(t *testing.T, clients int, fetchers int, requests int) { - t.Parallel() - f := newScheduler(0) - - // Create a batch of handler goroutines that respond to bloom bit requests and - // deliver them to the scheduler. - var fetchPend sync.WaitGroup - fetchPend.Add(fetchers) - defer fetchPend.Wait() - - fetch := make(chan *request, 16) - defer close(fetch) - - var delivered atomic.Uint32 - for i := 0; i < fetchers; i++ { - go func() { - defer fetchPend.Done() - - for req := range fetch { - delivered.Add(1) - - f.deliver([]uint64{ - req.section + uint64(requests), // Non-requested data (ensure it doesn't go out of bounds) - req.section, // Requested data - req.section, // Duplicated data (ensure it doesn't double close anything) - }, [][]byte{ - {}, - new(big.Int).SetUint64(req.section).Bytes(), - new(big.Int).SetUint64(req.section).Bytes(), - }) - } - }() - } - // Start a batch of goroutines to concurrently run scheduling tasks - quit := make(chan struct{}) - - var pend sync.WaitGroup - pend.Add(clients) - - for i := 0; i < clients; i++ { - go func() { - defer pend.Done() - - in := make(chan uint64, 16) - out := make(chan []byte, 16) - - f.run(in, fetch, out, quit, &pend) - - go func() { - for j := 0; j < requests; j++ { - in <- uint64(j) - } - close(in) - }() - b := new(big.Int) - for j := 0; j < requests; j++ { - bits := <-out - if want := b.SetUint64(uint64(j)).Bytes(); !bytes.Equal(bits, want) { - t.Errorf("vector %d: delivered content mismatch: have %x, want %x", j, bits, want) - } - } - }() - } - pend.Wait() - - if have := delivered.Load(); int(have) != requests { - t.Errorf("request count mismatch: have %v, want %v", have, requests) - } -} diff --git a/core/chain_indexer.go b/core/chain_indexer.go deleted file mode 100644 index f5fce7258831..000000000000 --- a/core/chain_indexer.go +++ /dev/null @@ -1,523 +0,0 @@ -// Copyright 2017 The go-ethereum Authors -// This file is part of the go-ethereum library. -// -// The go-ethereum library is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// The go-ethereum library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with the go-ethereum library. If not, see . - -package core - -import ( - "context" - "encoding/binary" - "errors" - "fmt" - "sync" - "sync/atomic" - "time" - - "github.com/ethereum/go-ethereum/common" - "github.com/ethereum/go-ethereum/core/rawdb" - "github.com/ethereum/go-ethereum/core/types" - "github.com/ethereum/go-ethereum/ethdb" - "github.com/ethereum/go-ethereum/event" - "github.com/ethereum/go-ethereum/log" -) - -// ChainIndexerBackend defines the methods needed to process chain segments in -// the background and write the segment results into the database. These can be -// used to create filter blooms or CHTs. -type ChainIndexerBackend interface { - // Reset initiates the processing of a new chain segment, potentially terminating - // any partially completed operations (in case of a reorg). - Reset(ctx context.Context, section uint64, prevHead common.Hash) error - - // Process crunches through the next header in the chain segment. The caller - // will ensure a sequential order of headers. - Process(ctx context.Context, header *types.Header) error - - // Commit finalizes the section metadata and stores it into the database. - Commit() error - - // Prune deletes the chain index older than the given threshold. - Prune(threshold uint64) error -} - -// ChainIndexerChain interface is used for connecting the indexer to a blockchain -type ChainIndexerChain interface { - // CurrentHeader retrieves the latest locally known header. - CurrentHeader() *types.Header - - // SubscribeChainHeadEvent subscribes to new head header notifications. - SubscribeChainHeadEvent(ch chan<- ChainHeadEvent) event.Subscription -} - -// ChainIndexer does a post-processing job for equally sized sections of the -// canonical chain (like BlooomBits and CHT structures). A ChainIndexer is -// connected to the blockchain through the event system by starting a -// ChainHeadEventLoop in a goroutine. -// -// Further child ChainIndexers can be added which use the output of the parent -// section indexer. These child indexers receive new head notifications only -// after an entire section has been finished or in case of rollbacks that might -// affect already finished sections. -type ChainIndexer struct { - chainDb ethdb.Database // Chain database to index the data from - indexDb ethdb.Database // Prefixed table-view of the db to write index metadata into - backend ChainIndexerBackend // Background processor generating the index data content - children []*ChainIndexer // Child indexers to cascade chain updates to - - active atomic.Bool // Flag whether the event loop was started - update chan struct{} // Notification channel that headers should be processed - quit chan chan error // Quit channel to tear down running goroutines - ctx context.Context - ctxCancel func() - - sectionSize uint64 // Number of blocks in a single chain segment to process - confirmsReq uint64 // Number of confirmations before processing a completed segment - - storedSections uint64 // Number of sections successfully indexed into the database - knownSections uint64 // Number of sections known to be complete (block wise) - cascadedHead uint64 // Block number of the last completed section cascaded to subindexers - - checkpointSections uint64 // Number of sections covered by the checkpoint - checkpointHead common.Hash // Section head belonging to the checkpoint - - throttling time.Duration // Disk throttling to prevent a heavy upgrade from hogging resources - - log log.Logger - lock sync.Mutex -} - -// NewChainIndexer creates a new chain indexer to do background processing on -// chain segments of a given size after certain number of confirmations passed. -// The throttling parameter might be used to prevent database thrashing. -func NewChainIndexer(chainDb ethdb.Database, indexDb ethdb.Database, backend ChainIndexerBackend, section, confirm uint64, throttling time.Duration, kind string) *ChainIndexer { - c := &ChainIndexer{ - chainDb: chainDb, - indexDb: indexDb, - backend: backend, - update: make(chan struct{}, 1), - quit: make(chan chan error), - sectionSize: section, - confirmsReq: confirm, - throttling: throttling, - log: log.New("type", kind), - } - // Initialize database dependent fields and start the updater - c.loadValidSections() - c.ctx, c.ctxCancel = context.WithCancel(context.Background()) - - go c.updateLoop() - - return c -} - -// AddCheckpoint adds a checkpoint. Sections are never processed and the chain -// is not expected to be available before this point. The indexer assumes that -// the backend has sufficient information available to process subsequent sections. -// -// Note: knownSections == 0 and storedSections == checkpointSections until -// syncing reaches the checkpoint -func (c *ChainIndexer) AddCheckpoint(section uint64, shead common.Hash) { - c.lock.Lock() - defer c.lock.Unlock() - - // Short circuit if the given checkpoint is below than local's. - if c.checkpointSections >= section+1 || section < c.storedSections { - return - } - c.checkpointSections = section + 1 - c.checkpointHead = shead - - c.setSectionHead(section, shead) - c.setValidSections(section + 1) -} - -// Start creates a goroutine to feed chain head events into the indexer for -// cascading background processing. Children do not need to be started, they -// are notified about new events by their parents. -func (c *ChainIndexer) Start(chain ChainIndexerChain) { - events := make(chan ChainHeadEvent, 10) - sub := chain.SubscribeChainHeadEvent(events) - - go c.eventLoop(chain.CurrentHeader(), events, sub) -} - -// Close tears down all goroutines belonging to the indexer and returns any error -// that might have occurred internally. -func (c *ChainIndexer) Close() error { - var errs []error - - c.ctxCancel() - - // Tear down the primary update loop - errc := make(chan error) - c.quit <- errc - if err := <-errc; err != nil { - errs = append(errs, err) - } - // If needed, tear down the secondary event loop - if c.active.Load() { - c.quit <- errc - if err := <-errc; err != nil { - errs = append(errs, err) - } - } - // Close all children - for _, child := range c.children { - if err := child.Close(); err != nil { - errs = append(errs, err) - } - } - // Return any failures - switch { - case len(errs) == 0: - return nil - - case len(errs) == 1: - return errs[0] - - default: - return fmt.Errorf("%v", errs) - } -} - -// eventLoop is a secondary - optional - event loop of the indexer which is only -// started for the outermost indexer to push chain head events into a processing -// queue. -func (c *ChainIndexer) eventLoop(currentHeader *types.Header, events chan ChainHeadEvent, sub event.Subscription) { - // Mark the chain indexer as active, requiring an additional teardown - c.active.Store(true) - - defer sub.Unsubscribe() - - // Fire the initial new head event to start any outstanding processing - c.newHead(currentHeader.Number.Uint64(), false) - - var ( - prevHeader = currentHeader - prevHash = currentHeader.Hash() - ) - for { - select { - case errc := <-c.quit: - // Chain indexer terminating, report no failure and abort - errc <- nil - return - - case ev, ok := <-events: - // Received a new event, ensure it's not nil (closing) and update - if !ok { - errc := <-c.quit - errc <- nil - return - } - header := ev.Block.Header() - if header.ParentHash != prevHash { - // Reorg to the common ancestor if needed (might not exist in light sync mode, skip reorg then) - // TODO(karalabe, zsfelfoldi): This seems a bit brittle, can we detect this case explicitly? - - if rawdb.ReadCanonicalHash(c.chainDb, prevHeader.Number.Uint64()) != prevHash { - if h := rawdb.FindCommonAncestor(c.chainDb, prevHeader, header); h != nil { - c.newHead(h.Number.Uint64(), true) - } - } - } - c.newHead(header.Number.Uint64(), false) - - prevHeader, prevHash = header, header.Hash() - } - } -} - -// newHead notifies the indexer about new chain heads and/or reorgs. -func (c *ChainIndexer) newHead(head uint64, reorg bool) { - c.lock.Lock() - defer c.lock.Unlock() - - // If a reorg happened, invalidate all sections until that point - if reorg { - // Revert the known section number to the reorg point - known := (head + 1) / c.sectionSize - stored := known - if known < c.checkpointSections { - known = 0 - } - if stored < c.checkpointSections { - stored = c.checkpointSections - } - if known < c.knownSections { - c.knownSections = known - } - // Revert the stored sections from the database to the reorg point - if stored < c.storedSections { - c.setValidSections(stored) - } - // Update the new head number to the finalized section end and notify children - head = known * c.sectionSize - - if head < c.cascadedHead { - c.cascadedHead = head - for _, child := range c.children { - child.newHead(c.cascadedHead, true) - } - } - return - } - // No reorg, calculate the number of newly known sections and update if high enough - var sections uint64 - if head >= c.confirmsReq { - sections = (head + 1 - c.confirmsReq) / c.sectionSize - if sections < c.checkpointSections { - sections = 0 - } - if sections > c.knownSections { - if c.knownSections < c.checkpointSections { - // syncing reached the checkpoint, verify section head - syncedHead := rawdb.ReadCanonicalHash(c.chainDb, c.checkpointSections*c.sectionSize-1) - if syncedHead != c.checkpointHead { - c.log.Error("Synced chain does not match checkpoint", "number", c.checkpointSections*c.sectionSize-1, "expected", c.checkpointHead, "synced", syncedHead) - return - } - } - c.knownSections = sections - - select { - case c.update <- struct{}{}: - default: - } - } - } -} - -// updateLoop is the main event loop of the indexer which pushes chain segments -// down into the processing backend. -func (c *ChainIndexer) updateLoop() { - var ( - updating bool - updated time.Time - ) - - for { - select { - case errc := <-c.quit: - // Chain indexer terminating, report no failure and abort - errc <- nil - return - - case <-c.update: - // Section headers completed (or rolled back), update the index - c.lock.Lock() - if c.knownSections > c.storedSections { - // Periodically print an upgrade log message to the user - if time.Since(updated) > 8*time.Second { - if c.knownSections > c.storedSections+1 { - updating = true - c.log.Info("Upgrading chain index", "percentage", c.storedSections*100/c.knownSections) - } - updated = time.Now() - } - // Cache the current section count and head to allow unlocking the mutex - c.verifyLastHead() - section := c.storedSections - var oldHead common.Hash - if section > 0 { - oldHead = c.SectionHead(section - 1) - } - // Process the newly defined section in the background - c.lock.Unlock() - newHead, err := c.processSection(section, oldHead) - if err != nil { - select { - case <-c.ctx.Done(): - <-c.quit <- nil - return - default: - } - c.log.Error("Section processing failed", "error", err) - } - c.lock.Lock() - - // If processing succeeded and no reorgs occurred, mark the section completed - if err == nil && (section == 0 || oldHead == c.SectionHead(section-1)) { - c.setSectionHead(section, newHead) - c.setValidSections(section + 1) - if c.storedSections == c.knownSections && updating { - updating = false - c.log.Info("Finished upgrading chain index") - } - c.cascadedHead = c.storedSections*c.sectionSize - 1 - for _, child := range c.children { - c.log.Trace("Cascading chain index update", "head", c.cascadedHead) - child.newHead(c.cascadedHead, false) - } - } else { - // If processing failed, don't retry until further notification - c.log.Debug("Chain index processing failed", "section", section, "err", err) - c.verifyLastHead() - c.knownSections = c.storedSections - } - } - // If there are still further sections to process, reschedule - if c.knownSections > c.storedSections { - time.AfterFunc(c.throttling, func() { - select { - case c.update <- struct{}{}: - default: - } - }) - } - c.lock.Unlock() - } - } -} - -// processSection processes an entire section by calling backend functions while -// ensuring the continuity of the passed headers. Since the chain mutex is not -// held while processing, the continuity can be broken by a long reorg, in which -// case the function returns with an error. -func (c *ChainIndexer) processSection(section uint64, lastHead common.Hash) (common.Hash, error) { - c.log.Trace("Processing new chain section", "section", section) - - // Reset and partial processing - if err := c.backend.Reset(c.ctx, section, lastHead); err != nil { - c.setValidSections(0) - return common.Hash{}, err - } - - for number := section * c.sectionSize; number < (section+1)*c.sectionSize; number++ { - hash := rawdb.ReadCanonicalHash(c.chainDb, number) - if hash == (common.Hash{}) { - return common.Hash{}, fmt.Errorf("canonical block #%d unknown", number) - } - header := rawdb.ReadHeader(c.chainDb, hash, number) - if header == nil { - return common.Hash{}, fmt.Errorf("block #%d [%x..] not found", number, hash[:4]) - } else if header.ParentHash != lastHead { - return common.Hash{}, errors.New("chain reorged during section processing") - } - if err := c.backend.Process(c.ctx, header); err != nil { - return common.Hash{}, err - } - lastHead = header.Hash() - } - if err := c.backend.Commit(); err != nil { - return common.Hash{}, err - } - return lastHead, nil -} - -// verifyLastHead compares last stored section head with the corresponding block hash in the -// actual canonical chain and rolls back reorged sections if necessary to ensure that stored -// sections are all valid -func (c *ChainIndexer) verifyLastHead() { - for c.storedSections > 0 && c.storedSections > c.checkpointSections { - if c.SectionHead(c.storedSections-1) == rawdb.ReadCanonicalHash(c.chainDb, c.storedSections*c.sectionSize-1) { - return - } - c.setValidSections(c.storedSections - 1) - } -} - -// Sections returns the number of processed sections maintained by the indexer -// and also the information about the last header indexed for potential canonical -// verifications. -func (c *ChainIndexer) Sections() (uint64, uint64, common.Hash) { - c.lock.Lock() - defer c.lock.Unlock() - - c.verifyLastHead() - return c.storedSections, c.storedSections*c.sectionSize - 1, c.SectionHead(c.storedSections - 1) -} - -// AddChildIndexer adds a child ChainIndexer that can use the output of this one -func (c *ChainIndexer) AddChildIndexer(indexer *ChainIndexer) { - if indexer == c { - panic("can't add indexer as a child of itself") - } - c.lock.Lock() - defer c.lock.Unlock() - - c.children = append(c.children, indexer) - - // Cascade any pending updates to new children too - sections := c.storedSections - if c.knownSections < sections { - // if a section is "stored" but not "known" then it is a checkpoint without - // available chain data so we should not cascade it yet - sections = c.knownSections - } - if sections > 0 { - indexer.newHead(sections*c.sectionSize-1, false) - } -} - -// Prune deletes all chain data older than given threshold. -func (c *ChainIndexer) Prune(threshold uint64) error { - return c.backend.Prune(threshold) -} - -// loadValidSections reads the number of valid sections from the index database -// and caches is into the local state. -func (c *ChainIndexer) loadValidSections() { - data, _ := c.indexDb.Get([]byte("count")) - if len(data) == 8 { - c.storedSections = binary.BigEndian.Uint64(data) - } -} - -// setValidSections writes the number of valid sections to the index database -func (c *ChainIndexer) setValidSections(sections uint64) { - // Set the current number of valid sections in the database - var data [8]byte - binary.BigEndian.PutUint64(data[:], sections) - c.indexDb.Put([]byte("count"), data[:]) - - // Remove any reorged sections, caching the valids in the mean time - for c.storedSections > sections { - c.storedSections-- - c.removeSectionHead(c.storedSections) - } - c.storedSections = sections // needed if new > old -} - -// SectionHead retrieves the last block hash of a processed section from the -// index database. -func (c *ChainIndexer) SectionHead(section uint64) common.Hash { - var data [8]byte - binary.BigEndian.PutUint64(data[:], section) - - hash, _ := c.indexDb.Get(append([]byte("shead"), data[:]...)) - if len(hash) == len(common.Hash{}) { - return common.BytesToHash(hash) - } - return common.Hash{} -} - -// setSectionHead writes the last block hash of a processed section to the index -// database. -func (c *ChainIndexer) setSectionHead(section uint64, hash common.Hash) { - var data [8]byte - binary.BigEndian.PutUint64(data[:], section) - - c.indexDb.Put(append([]byte("shead"), data[:]...), hash.Bytes()) -} - -// removeSectionHead removes the reference to a processed section from the index -// database. -func (c *ChainIndexer) removeSectionHead(section uint64) { - var data [8]byte - binary.BigEndian.PutUint64(data[:], section) - - c.indexDb.Delete(append([]byte("shead"), data[:]...)) -} diff --git a/core/chain_indexer_test.go b/core/chain_indexer_test.go deleted file mode 100644 index bf3bde756cb9..000000000000 --- a/core/chain_indexer_test.go +++ /dev/null @@ -1,246 +0,0 @@ -// Copyright 2017 The go-ethereum Authors -// This file is part of the go-ethereum library. -// -// The go-ethereum library is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// The go-ethereum library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with the go-ethereum library. If not, see . - -package core - -import ( - "context" - "errors" - "fmt" - "math/big" - "math/rand" - "testing" - "time" - - "github.com/ethereum/go-ethereum/common" - "github.com/ethereum/go-ethereum/core/rawdb" - "github.com/ethereum/go-ethereum/core/types" -) - -// Runs multiple tests with randomized parameters. -func TestChainIndexerSingle(t *testing.T) { - for i := 0; i < 10; i++ { - testChainIndexer(t, 1) - } -} - -// Runs multiple tests with randomized parameters and different number of -// chain backends. -func TestChainIndexerWithChildren(t *testing.T) { - for i := 2; i < 8; i++ { - testChainIndexer(t, i) - } -} - -// testChainIndexer runs a test with either a single chain indexer or a chain of -// multiple backends. The section size and required confirmation count parameters -// are randomized. -func testChainIndexer(t *testing.T, count int) { - db := rawdb.NewMemoryDatabase() - defer db.Close() - - // Create a chain of indexers and ensure they all report empty - backends := make([]*testChainIndexBackend, count) - for i := 0; i < count; i++ { - var ( - sectionSize = uint64(rand.Intn(100) + 1) - confirmsReq = uint64(rand.Intn(10)) - ) - backends[i] = &testChainIndexBackend{t: t, processCh: make(chan uint64)} - backends[i].indexer = NewChainIndexer(db, rawdb.NewTable(db, string([]byte{byte(i)})), backends[i], sectionSize, confirmsReq, 0, fmt.Sprintf("indexer-%d", i)) - - if sections, _, _ := backends[i].indexer.Sections(); sections != 0 { - t.Fatalf("Canonical section count mismatch: have %v, want %v", sections, 0) - } - if i > 0 { - backends[i-1].indexer.AddChildIndexer(backends[i].indexer) - } - } - defer backends[0].indexer.Close() // parent indexer shuts down children - // notify pings the root indexer about a new head or reorg, then expect - // processed blocks if a section is processable - notify := func(headNum, failNum uint64, reorg bool) { - backends[0].indexer.newHead(headNum, reorg) - if reorg { - for _, backend := range backends { - headNum = backend.reorg(headNum) - backend.assertSections() - } - return - } - var cascade bool - for _, backend := range backends { - headNum, cascade = backend.assertBlocks(headNum, failNum) - if !cascade { - break - } - backend.assertSections() - } - } - // inject inserts a new random canonical header into the database directly - inject := func(number uint64) { - header := &types.Header{Number: big.NewInt(int64(number)), Extra: big.NewInt(rand.Int63()).Bytes()} - if number > 0 { - header.ParentHash = rawdb.ReadCanonicalHash(db, number-1) - } - rawdb.WriteHeader(db, header) - rawdb.WriteCanonicalHash(db, header.Hash(), number) - } - // Start indexer with an already existing chain - for i := uint64(0); i <= 100; i++ { - inject(i) - } - notify(100, 100, false) - - // Add new blocks one by one - for i := uint64(101); i <= 1000; i++ { - inject(i) - notify(i, i, false) - } - // Do a reorg - notify(500, 500, true) - - // Create new fork - for i := uint64(501); i <= 1000; i++ { - inject(i) - notify(i, i, false) - } - for i := uint64(1001); i <= 1500; i++ { - inject(i) - } - // Failed processing scenario where less blocks are available than notified - notify(2000, 1500, false) - - // Notify about a reorg (which could have caused the missing blocks if happened during processing) - notify(1500, 1500, true) - - // Create new fork - for i := uint64(1501); i <= 2000; i++ { - inject(i) - notify(i, i, false) - } -} - -// testChainIndexBackend implements ChainIndexerBackend -type testChainIndexBackend struct { - t *testing.T - indexer *ChainIndexer - section, headerCnt, stored uint64 - processCh chan uint64 -} - -// assertSections verifies if a chain indexer has the correct number of section. -func (b *testChainIndexBackend) assertSections() { - // Keep trying for 3 seconds if it does not match - var sections uint64 - for i := 0; i < 300; i++ { - sections, _, _ = b.indexer.Sections() - if sections == b.stored { - return - } - time.Sleep(10 * time.Millisecond) - } - b.t.Fatalf("Canonical section count mismatch: have %v, want %v", sections, b.stored) -} - -// assertBlocks expects processing calls after new blocks have arrived. If the -// failNum < headNum then we are simulating a scenario where a reorg has happened -// after the processing has started and the processing of a section fails. -func (b *testChainIndexBackend) assertBlocks(headNum, failNum uint64) (uint64, bool) { - var sections uint64 - if headNum >= b.indexer.confirmsReq { - sections = (headNum + 1 - b.indexer.confirmsReq) / b.indexer.sectionSize - if sections > b.stored { - // expect processed blocks - for expectd := b.stored * b.indexer.sectionSize; expectd < sections*b.indexer.sectionSize; expectd++ { - if expectd > failNum { - // rolled back after processing started, no more process calls expected - // wait until updating is done to make sure that processing actually fails - var updating bool - for i := 0; i < 300; i++ { - b.indexer.lock.Lock() - updating = b.indexer.knownSections > b.indexer.storedSections - b.indexer.lock.Unlock() - if !updating { - break - } - time.Sleep(10 * time.Millisecond) - } - if updating { - b.t.Fatalf("update did not finish") - } - sections = expectd / b.indexer.sectionSize - break - } - select { - case <-time.After(10 * time.Second): - b.t.Fatalf("Expected processed block #%d, got nothing", expectd) - case processed := <-b.processCh: - if processed != expectd { - b.t.Errorf("Expected processed block #%d, got #%d", expectd, processed) - } - } - } - b.stored = sections - } - } - if b.stored == 0 { - return 0, false - } - return b.stored*b.indexer.sectionSize - 1, true -} - -func (b *testChainIndexBackend) reorg(headNum uint64) uint64 { - firstChanged := (headNum + 1) / b.indexer.sectionSize - if firstChanged < b.stored { - b.stored = firstChanged - } - return b.stored * b.indexer.sectionSize -} - -func (b *testChainIndexBackend) Reset(ctx context.Context, section uint64, prevHead common.Hash) error { - b.section = section - b.headerCnt = 0 - return nil -} - -func (b *testChainIndexBackend) Process(ctx context.Context, header *types.Header) error { - b.headerCnt++ - if b.headerCnt > b.indexer.sectionSize { - b.t.Error("Processing too many headers") - } - //t.processCh <- header.Number.Uint64() - select { - case <-time.After(10 * time.Second): - b.t.Error("Unexpected call to Process") - // Can't use Fatal since this is not the test's goroutine. - // Returning error stops the chainIndexer's updateLoop - return errors.New("unexpected call to Process") - case b.processCh <- header.Number.Uint64(): - } - return nil -} - -func (b *testChainIndexBackend) Commit() error { - if b.headerCnt != b.indexer.sectionSize { - b.t.Error("Not enough headers processed") - } - return nil -} - -func (b *testChainIndexBackend) Prune(threshold uint64) error { - return nil -} diff --git a/core/rawdb/accessors_indexes.go b/core/rawdb/accessors_indexes.go index d1b0cf5053a6..68c3454e43fb 100644 --- a/core/rawdb/accessors_indexes.go +++ b/core/rawdb/accessors_indexes.go @@ -17,7 +17,6 @@ package rawdb import ( - "bytes" "encoding/binary" "errors" "math/big" @@ -147,41 +146,6 @@ func ReadReceipt(db ethdb.Reader, hash common.Hash, config *params.ChainConfig) return nil, common.Hash{}, 0, 0 } -// ReadBloomBits retrieves the compressed bloom bit vector belonging to the given -// section and bit index from the. -func ReadBloomBits(db ethdb.KeyValueReader, bit uint, section uint64, head common.Hash) ([]byte, error) { - return db.Get(bloomBitsKey(bit, section, head)) -} - -// WriteBloomBits stores the compressed bloom bits vector belonging to the given -// section and bit index. -func WriteBloomBits(db ethdb.KeyValueWriter, bit uint, section uint64, head common.Hash, bits []byte) { - if err := db.Put(bloomBitsKey(bit, section, head), bits); err != nil { - log.Crit("Failed to store bloom bits", "err", err) - } -} - -// DeleteBloombits removes all compressed bloom bits vector belonging to the -// given section range and bit index. -func DeleteBloombits(db ethdb.Database, bit uint, from uint64, to uint64) { - start, end := bloomBitsKey(bit, from, common.Hash{}), bloomBitsKey(bit, to, common.Hash{}) - it := db.NewIterator(nil, start) - defer it.Release() - - for it.Next() { - if bytes.Compare(it.Key(), end) >= 0 { - break - } - if len(it.Key()) != len(bloomBitsPrefix)+2+8+32 { - continue - } - db.Delete(it.Key()) - } - if it.Error() != nil { - log.Crit("Failed to delete bloom bits", "err", it.Error()) - } -} - var emptyRow = []uint32{} // ReadFilterMapRow retrieves a filter map row at the given mapRowIndex diff --git a/core/rawdb/accessors_indexes_test.go b/core/rawdb/accessors_indexes_test.go index 78dba000fcef..2ce4330f70bf 100644 --- a/core/rawdb/accessors_indexes_test.go +++ b/core/rawdb/accessors_indexes_test.go @@ -111,46 +111,3 @@ func TestLookupStorage(t *testing.T) { }) } } - -func TestDeleteBloomBits(t *testing.T) { - // Prepare testing data - db := NewMemoryDatabase() - for i := uint(0); i < 2; i++ { - for s := uint64(0); s < 2; s++ { - WriteBloomBits(db, i, s, params.MainnetGenesisHash, []byte{0x01, 0x02}) - WriteBloomBits(db, i, s, params.SepoliaGenesisHash, []byte{0x01, 0x02}) - } - } - check := func(bit uint, section uint64, head common.Hash, exist bool) { - bits, _ := ReadBloomBits(db, bit, section, head) - if exist && !bytes.Equal(bits, []byte{0x01, 0x02}) { - t.Fatalf("Bloombits mismatch") - } - if !exist && len(bits) > 0 { - t.Fatalf("Bloombits should be removed") - } - } - // Check the existence of written data. - check(0, 0, params.MainnetGenesisHash, true) - check(0, 0, params.SepoliaGenesisHash, true) - - // Check the existence of deleted data. - DeleteBloombits(db, 0, 0, 1) - check(0, 0, params.MainnetGenesisHash, false) - check(0, 0, params.SepoliaGenesisHash, false) - check(0, 1, params.MainnetGenesisHash, true) - check(0, 1, params.SepoliaGenesisHash, true) - - // Check the existence of deleted data. - DeleteBloombits(db, 0, 0, 2) - check(0, 0, params.MainnetGenesisHash, false) - check(0, 0, params.SepoliaGenesisHash, false) - check(0, 1, params.MainnetGenesisHash, false) - check(0, 1, params.SepoliaGenesisHash, false) - - // Bit1 shouldn't be affect. - check(1, 0, params.MainnetGenesisHash, true) - check(1, 0, params.SepoliaGenesisHash, true) - check(1, 1, params.MainnetGenesisHash, true) - check(1, 1, params.SepoliaGenesisHash, true) -} diff --git a/core/rawdb/database.go b/core/rawdb/database.go index 13233406fe6c..ab4dfd76b7a4 100644 --- a/core/rawdb/database.go +++ b/core/rawdb/database.go @@ -471,7 +471,6 @@ func InspectDatabase(db ethdb.Database, keyPrefix, keyStart []byte) error { accountSnaps stat storageSnaps stat preimages stat - bloomBits stat beaconHeaders stat cliqueSnaps stat @@ -532,10 +531,6 @@ func InspectDatabase(db ethdb.Database, keyPrefix, keyStart []byte) error { metadata.Add(size) case bytes.HasPrefix(key, genesisPrefix) && len(key) == (len(genesisPrefix)+common.HashLength): metadata.Add(size) - case bytes.HasPrefix(key, bloomBitsPrefix) && len(key) == (len(bloomBitsPrefix)+10+common.HashLength): - bloomBits.Add(size) - case bytes.HasPrefix(key, BloomBitsIndexPrefix): - bloomBits.Add(size) case bytes.HasPrefix(key, skeletonHeaderPrefix) && len(key) == (len(skeletonHeaderPrefix)+8): beaconHeaders.Add(size) case bytes.HasPrefix(key, CliqueSnapshotPrefix) && len(key) == 7+common.HashLength: @@ -600,7 +595,6 @@ func InspectDatabase(db ethdb.Database, keyPrefix, keyStart []byte) error { {"Key-Value store", "Block number->hash", numHashPairings.Size(), numHashPairings.Count()}, {"Key-Value store", "Block hash->number", hashNumPairings.Size(), hashNumPairings.Count()}, {"Key-Value store", "Transaction index", txLookups.Size(), txLookups.Count()}, - {"Key-Value store", "Bloombit index", bloomBits.Size(), bloomBits.Count()}, {"Key-Value store", "Contract codes", codes.Size(), codes.Count()}, {"Key-Value store", "Hash trie nodes", legacyTries.Size(), legacyTries.Count()}, {"Key-Value store", "Path trie state lookups", stateLookups.Size(), stateLookups.Count()}, diff --git a/core/rawdb/schema.go b/core/rawdb/schema.go index 25c4d88a5463..60d6a58ade49 100644 --- a/core/rawdb/schema.go +++ b/core/rawdb/schema.go @@ -230,16 +230,6 @@ func storageSnapshotsKey(accountHash common.Hash) []byte { return append(SnapshotStoragePrefix, accountHash.Bytes()...) } -// bloomBitsKey = bloomBitsPrefix + bit (uint16 big endian) + section (uint64 big endian) + hash -func bloomBitsKey(bit uint, section uint64, hash common.Hash) []byte { - key := append(append(bloomBitsPrefix, make([]byte, 10)...), hash.Bytes()...) - - binary.BigEndian.PutUint16(key[1:], uint16(bit)) - binary.BigEndian.PutUint64(key[3:], section) - - return key -} - // skeletonHeaderKey = skeletonHeaderPrefix + num (uint64 big endian) func skeletonHeaderKey(number uint64) []byte { return append(skeletonHeaderPrefix, encodeBlockNumber(number)...) diff --git a/eth/api_backend.go b/eth/api_backend.go index ce5744017090..4fc7da83dac1 100644 --- a/eth/api_backend.go +++ b/eth/api_backend.go @@ -28,7 +28,6 @@ import ( "github.com/ethereum/go-ethereum/consensus" "github.com/ethereum/go-ethereum/consensus/misc/eip4844" "github.com/ethereum/go-ethereum/core" - "github.com/ethereum/go-ethereum/core/bloombits" "github.com/ethereum/go-ethereum/core/filtermaps" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/state" @@ -406,17 +405,6 @@ func (b *EthAPIBackend) RPCTxFeeCap() float64 { return b.eth.config.RPCTxFeeCap } -func (b *EthAPIBackend) BloomStatus() (uint64, uint64) { - sections, _, _ := b.eth.bloomIndexer.Sections() - return params.BloomBitsBlocks, sections -} - -func (b *EthAPIBackend) ServiceFilter(ctx context.Context, session *bloombits.MatcherSession) { - for i := 0; i < bloomFilterThreads; i++ { - go session.Multiplex(bloomRetrievalBatch, bloomRetrievalWait, b.eth.bloomRequests) - } -} - func (b *EthAPIBackend) NewMatcherBackend() filtermaps.MatcherBackend { return b.eth.filterMaps.NewMatcherBackend() } diff --git a/eth/backend.go b/eth/backend.go index 902279e4ae55..f66ed1bbb35c 100644 --- a/eth/backend.go +++ b/eth/backend.go @@ -29,7 +29,6 @@ import ( "github.com/ethereum/go-ethereum/common/hexutil" "github.com/ethereum/go-ethereum/consensus" "github.com/ethereum/go-ethereum/core" - "github.com/ethereum/go-ethereum/core/bloombits" "github.com/ethereum/go-ethereum/core/filtermaps" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/state/pruner" @@ -80,10 +79,6 @@ type Ethereum struct { engine consensus.Engine accountManager *accounts.Manager - bloomRequests chan chan *bloombits.Retrieval // Channel receiving bloom data retrieval requests - bloomIndexer *core.ChainIndexer // Bloom indexer operating during block imports - closeBloomHandler chan struct{} - filterMaps *filtermaps.FilterMaps APIBackend *EthAPIBackend @@ -152,19 +147,16 @@ func New(stack *node.Node, config *ethconfig.Config) (*Ethereum, error) { networkID = chainConfig.ChainID.Uint64() } eth := &Ethereum{ - config: config, - chainDb: chainDb, - eventMux: stack.EventMux(), - accountManager: stack.AccountManager(), - engine: engine, - closeBloomHandler: make(chan struct{}), - networkID: networkID, - gasPrice: config.Miner.GasPrice, - bloomRequests: make(chan chan *bloombits.Retrieval), - bloomIndexer: core.NewBloomIndexer(chainDb, params.BloomBitsBlocks, params.BloomConfirms), - p2pServer: stack.Server(), - discmix: enode.NewFairMix(0), - shutdownTracker: shutdowncheck.NewShutdownTracker(chainDb), + config: config, + chainDb: chainDb, + eventMux: stack.EventMux(), + accountManager: stack.AccountManager(), + engine: engine, + networkID: networkID, + gasPrice: config.Miner.GasPrice, + p2pServer: stack.Server(), + discmix: enode.NewFairMix(0), + shutdownTracker: shutdowncheck.NewShutdownTracker(chainDb), } bcVersion := rawdb.ReadDatabaseVersion(chainDb) var dbVer = "" @@ -222,7 +214,6 @@ func New(stack *node.Node, config *ethconfig.Config) (*Ethereum, error) { if err != nil { return nil, err } - eth.bloomIndexer.Start(eth.blockchain) eth.filterMaps = filtermaps.NewFilterMaps(chainDb, eth.blockchain) if config.BlobPool.Datadir != "" { @@ -259,10 +250,10 @@ func New(stack *node.Node, config *ethconfig.Config) (*Ethereum, error) { eth.miner.SetExtra(makeExtraData(config.Miner.ExtraData)) eth.APIBackend = &EthAPIBackend{ - extRPCEnabled: stack.Config().ExtRPCEnabled(), - allowUnprotectedTxs: stack.Config().AllowUnprotectedTxs, - eth: eth, - gpo: nil, + extRPCEnabled: stack.Config().ExtRPCEnabled(), + allowUnprotectedTxs: stack.Config().AllowUnprotectedTxs, + eth: eth, + gpo: nil, } if eth.APIBackend.allowUnprotectedTxs { log.Info("Unprotected transactions allowed") @@ -346,7 +337,6 @@ func (s *Ethereum) Downloader() *downloader.Downloader { return s.handler.downlo func (s *Ethereum) Synced() bool { return s.handler.synced.Load() } func (s *Ethereum) SetSynced() { s.handler.enableSyncedFeatures() } func (s *Ethereum) ArchiveMode() bool { return s.config.NoPruning } -func (s *Ethereum) BloomIndexer() *core.ChainIndexer { return s.bloomIndexer } // Protocols returns all the currently configured // network protocols to start. @@ -363,9 +353,6 @@ func (s *Ethereum) Protocols() []p2p.Protocol { func (s *Ethereum) Start() error { s.setupDiscovery() - // Start the bloom bits servicing goroutines - s.startBloomHandlers(params.BloomBitsBlocks) - // Regularly update shutdown marker s.shutdownTracker.Start() @@ -414,9 +401,7 @@ func (s *Ethereum) Stop() error { s.handler.Stop() // Then stop everything else. - s.bloomIndexer.Close() s.filterMaps.Close() - close(s.closeBloomHandler) s.txPool.Close() s.blockchain.Stop() s.engine.Close() diff --git a/eth/bloombits.go b/eth/bloombits.go deleted file mode 100644 index 0cb7050d2327..000000000000 --- a/eth/bloombits.go +++ /dev/null @@ -1,74 +0,0 @@ -// Copyright 2017 The go-ethereum Authors -// This file is part of the go-ethereum library. -// -// The go-ethereum library is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// The go-ethereum library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with the go-ethereum library. If not, see . - -package eth - -import ( - "time" - - "github.com/ethereum/go-ethereum/common/bitutil" - "github.com/ethereum/go-ethereum/core/rawdb" -) - -const ( - // bloomServiceThreads is the number of goroutines used globally by an Ethereum - // instance to service bloombits lookups for all running filters. - bloomServiceThreads = 16 - - // bloomFilterThreads is the number of goroutines used locally per filter to - // multiplex requests onto the global servicing goroutines. - bloomFilterThreads = 3 - - // bloomRetrievalBatch is the maximum number of bloom bit retrievals to service - // in a single batch. - bloomRetrievalBatch = 16 - - // bloomRetrievalWait is the maximum time to wait for enough bloom bit requests - // to accumulate request an entire batch (avoiding hysteresis). - bloomRetrievalWait = time.Duration(0) -) - -// startBloomHandlers starts a batch of goroutines to accept bloom bit database -// retrievals from possibly a range of filters and serving the data to satisfy. -func (eth *Ethereum) startBloomHandlers(sectionSize uint64) { - for i := 0; i < bloomServiceThreads; i++ { - go func() { - for { - select { - case <-eth.closeBloomHandler: - return - - case request := <-eth.bloomRequests: - task := <-request - task.Bitsets = make([][]byte, len(task.Sections)) - for i, section := range task.Sections { - head := rawdb.ReadCanonicalHash(eth.chainDb, (section+1)*sectionSize-1) - if compVector, err := rawdb.ReadBloomBits(eth.chainDb, task.Bit, section, head); err == nil { - if blob, err := bitutil.DecompressBytes(compVector, int(sectionSize/8)); err == nil { - task.Bitsets[i] = blob - } else { - task.Error = err - } - } else { - task.Error = err - } - } - request <- task - } - } - }() - } -} diff --git a/eth/filters/bench_test.go b/eth/filters/bench_test.go deleted file mode 100644 index 73b96b77af62..000000000000 --- a/eth/filters/bench_test.go +++ /dev/null @@ -1,189 +0,0 @@ -// Copyright 2017 The go-ethereum Authors -// This file is part of the go-ethereum library. -// -// The go-ethereum library is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// The go-ethereum library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with the go-ethereum library. If not, see . - -package filters - -import ( - "context" - "fmt" - "testing" - "time" - - "github.com/ethereum/go-ethereum/common" - "github.com/ethereum/go-ethereum/common/bitutil" - "github.com/ethereum/go-ethereum/core/bloombits" - "github.com/ethereum/go-ethereum/core/rawdb" - "github.com/ethereum/go-ethereum/core/types" - "github.com/ethereum/go-ethereum/ethdb" - "github.com/ethereum/go-ethereum/node" -) - -func BenchmarkBloomBits512(b *testing.B) { - benchmarkBloomBits(b, 512) -} - -func BenchmarkBloomBits1k(b *testing.B) { - benchmarkBloomBits(b, 1024) -} - -func BenchmarkBloomBits2k(b *testing.B) { - benchmarkBloomBits(b, 2048) -} - -func BenchmarkBloomBits4k(b *testing.B) { - benchmarkBloomBits(b, 4096) -} - -func BenchmarkBloomBits8k(b *testing.B) { - benchmarkBloomBits(b, 8192) -} - -func BenchmarkBloomBits16k(b *testing.B) { - benchmarkBloomBits(b, 16384) -} - -func BenchmarkBloomBits32k(b *testing.B) { - benchmarkBloomBits(b, 32768) -} - -const benchFilterCnt = 2000 - -func benchmarkBloomBits(b *testing.B, sectionSize uint64) { - b.Skip("test disabled: this tests presume (and modify) an existing datadir.") - benchDataDir := node.DefaultDataDir() + "/geth/chaindata" - b.Log("Running bloombits benchmark section size:", sectionSize) - - db, err := rawdb.NewLevelDBDatabase(benchDataDir, 128, 1024, "", false) - if err != nil { - b.Fatalf("error opening database at %v: %v", benchDataDir, err) - } - head := rawdb.ReadHeadBlockHash(db) - if head == (common.Hash{}) { - b.Fatalf("chain data not found at %v", benchDataDir) - } - - clearBloomBits(db) - b.Log("Generating bloombits data...") - headNum := rawdb.ReadHeaderNumber(db, head) - if headNum == nil || *headNum < sectionSize+512 { - b.Fatalf("not enough blocks for running a benchmark") - } - - start := time.Now() - cnt := (*headNum - 512) / sectionSize - var dataSize, compSize uint64 - for sectionIdx := uint64(0); sectionIdx < cnt; sectionIdx++ { - bc, err := bloombits.NewGenerator(uint(sectionSize)) - if err != nil { - b.Fatalf("failed to create generator: %v", err) - } - var header *types.Header - for i := sectionIdx * sectionSize; i < (sectionIdx+1)*sectionSize; i++ { - hash := rawdb.ReadCanonicalHash(db, i) - if header = rawdb.ReadHeader(db, hash, i); header == nil { - b.Fatalf("Error creating bloomBits data") - return - } - bc.AddBloom(uint(i-sectionIdx*sectionSize), header.Bloom) - } - sectionHead := rawdb.ReadCanonicalHash(db, (sectionIdx+1)*sectionSize-1) - for i := 0; i < types.BloomBitLength; i++ { - data, err := bc.Bitset(uint(i)) - if err != nil { - b.Fatalf("failed to retrieve bitset: %v", err) - } - comp := bitutil.CompressBytes(data) - dataSize += uint64(len(data)) - compSize += uint64(len(comp)) - rawdb.WriteBloomBits(db, uint(i), sectionIdx, sectionHead, comp) - } - //if sectionIdx%50 == 0 { - // b.Log(" section", sectionIdx, "/", cnt) - //} - } - - d := time.Since(start) - b.Log("Finished generating bloombits data") - b.Log(" ", d, "total ", d/time.Duration(cnt*sectionSize), "per block") - b.Log(" data size:", dataSize, " compressed size:", compSize, " compression ratio:", float64(compSize)/float64(dataSize)) - - b.Log("Running filter benchmarks...") - start = time.Now() - - var ( - backend *testBackend - sys *FilterSystem - ) - for i := 0; i < benchFilterCnt; i++ { - if i%20 == 0 { - db.Close() - db, _ = rawdb.NewLevelDBDatabase(benchDataDir, 128, 1024, "", false) - backend = &testBackend{db: db, sections: cnt} - sys = NewFilterSystem(backend, Config{}) - } - var addr common.Address - addr[0] = byte(i) - addr[1] = byte(i / 256) - filter := sys.NewRangeFilter(0, int64(cnt*sectionSize-1), []common.Address{addr}, nil) - if _, err := filter.Logs(context.Background()); err != nil { - b.Error("filter.Logs error:", err) - } - } - - d = time.Since(start) - b.Log("Finished running filter benchmarks") - b.Log(" ", d, "total ", d/time.Duration(benchFilterCnt), "per address", d*time.Duration(1000000)/time.Duration(benchFilterCnt*cnt*sectionSize), "per million blocks") - db.Close() -} - -//nolint:unused -func clearBloomBits(db ethdb.Database) { - var bloomBitsPrefix = []byte("bloomBits-") - fmt.Println("Clearing bloombits data...") - it := db.NewIterator(bloomBitsPrefix, nil) - for it.Next() { - db.Delete(it.Key()) - } - it.Release() -} - -func BenchmarkNoBloomBits(b *testing.B) { - b.Skip("test disabled: this tests presume (and modify) an existing datadir.") - benchDataDir := node.DefaultDataDir() + "/geth/chaindata" - b.Log("Running benchmark without bloombits") - db, err := rawdb.NewLevelDBDatabase(benchDataDir, 128, 1024, "", false) - if err != nil { - b.Fatalf("error opening database at %v: %v", benchDataDir, err) - } - head := rawdb.ReadHeadBlockHash(db) - if head == (common.Hash{}) { - b.Fatalf("chain data not found at %v", benchDataDir) - } - headNum := rawdb.ReadHeaderNumber(db, head) - - clearBloomBits(db) - - _, sys := newTestFilterSystem(b, db, Config{}) - - b.Log("Running filter benchmarks...") - start := time.Now() - filter := sys.NewRangeFilter(0, int64(*headNum), []common.Address{{}}, nil) - filter.Logs(context.Background()) - d := time.Since(start) - b.Log("Finished running filter benchmarks") - b.Log(" ", d, "total ", d*time.Duration(1000000)/time.Duration(*headNum+1), "per million blocks") - db.Close() -} diff --git a/eth/filters/filter.go b/eth/filters/filter.go index 7b0be8d0244f..2fcf0945ba95 100644 --- a/eth/filters/filter.go +++ b/eth/filters/filter.go @@ -25,7 +25,6 @@ import ( "time" "github.com/ethereum/go-ethereum/common" - "github.com/ethereum/go-ethereum/core/bloombits" "github.com/ethereum/go-ethereum/core/filtermaps" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/log" @@ -42,37 +41,13 @@ type Filter struct { block *common.Hash // Block hash if filtering a single block begin, end int64 // Range interval if filtering multiple blocks bbMatchCount uint64 - - matcher *bloombits.Matcher } // NewRangeFilter creates a new filter which uses a bloom filter on blocks to // figure out whether a particular block is interesting or not. func (sys *FilterSystem) NewRangeFilter(begin, end int64, addresses []common.Address, topics [][]common.Hash) *Filter { - // Flatten the address and topic filter clauses into a single bloombits filter - // system. Since the bloombits are not positional, nil topics are permitted, - // which get flattened into a nil byte slice. - var filters [][][]byte - if len(addresses) > 0 { - filter := make([][]byte, len(addresses)) - for i, address := range addresses { - filter[i] = address.Bytes() - } - filters = append(filters, filter) - } - for _, topicList := range topics { - filter := make([][]byte, len(topicList)) - for i, topic := range topicList { - filter[i] = topic.Bytes() - } - filters = append(filters, filter) - } - size, _ := sys.backend.BloomStatus() - // Create a generic filter and convert it into a range filter filter := newFilter(sys, addresses, topics) - - filter.matcher = bloombits.NewMatcher(size, filters) filter.begin = begin filter.end = end @@ -197,23 +172,7 @@ func (f *Filter) rangeLogsAsync(ctx context.Context) (chan *types.Log, chan erro close(logChan) }() - // Gather all indexed logs, and finish with non indexed ones - var ( - end = uint64(f.end) - size, sections = f.sys.backend.BloomStatus() - err error - ) - if indexed := sections * size; indexed > uint64(f.begin) { - if indexed > end { - indexed = end + 1 - } - if err = f.indexedLogs(ctx, indexed-1, logChan); err != nil { - errChan <- err - return - } - } - - if err := f.unindexedLogs(ctx, end, logChan); err != nil { + if err := f.unindexedLogs(ctx, uint64(f.end), logChan); err != nil { errChan <- err return } @@ -224,53 +183,6 @@ func (f *Filter) rangeLogsAsync(ctx context.Context) (chan *types.Log, chan erro return logChan, errChan } -// indexedLogs returns the logs matching the filter criteria based on the bloom -// bits indexed available locally or via the network. -func (f *Filter) indexedLogs(ctx context.Context, end uint64, logChan chan *types.Log) error { - // Create a matcher session and request servicing from the backend - matches := make(chan uint64, 64) - - session, err := f.matcher.Start(ctx, uint64(f.begin), end, matches) - if err != nil { - return err - } - defer session.Close() - - f.sys.backend.ServiceFilter(ctx, session) - - for { - select { - case number, ok := <-matches: - f.bbMatchCount++ - // Abort if all matches have been fulfilled - if !ok { - err := session.Error() - if err == nil { - f.begin = int64(end) + 1 - } - return err - } - f.begin = int64(number) + 1 - - // Retrieve the suggested block and pull any truly matching logs - header, err := f.sys.backend.HeaderByNumber(ctx, rpc.BlockNumber(number)) - if header == nil || err != nil { - return err - } - found, err := f.checkMatches(ctx, header) - if err != nil { - return err - } - for _, log := range found { - logChan <- log - } - - case <-ctx.Done(): - return ctx.Err() - } - } -} - // unindexedLogs returns the logs matching the filter criteria based on raw block // iteration and bloom matching. func (f *Filter) unindexedLogs(ctx context.Context, end uint64, logChan chan *types.Log) error { diff --git a/eth/filters/filter_system.go b/eth/filters/filter_system.go index 45f03f16d62d..41a1b4d1dcc5 100644 --- a/eth/filters/filter_system.go +++ b/eth/filters/filter_system.go @@ -29,7 +29,6 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common/lru" "github.com/ethereum/go-ethereum/core" - "github.com/ethereum/go-ethereum/core/bloombits" "github.com/ethereum/go-ethereum/core/filtermaps" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/ethdb" @@ -70,9 +69,6 @@ type Backend interface { SubscribeRemovedLogsEvent(ch chan<- core.RemovedLogsEvent) event.Subscription SubscribeLogsEvent(ch chan<- []*types.Log) event.Subscription - BloomStatus() (uint64, uint64) - ServiceFilter(ctx context.Context, session *bloombits.MatcherSession) - NewMatcherBackend() filtermaps.MatcherBackend } diff --git a/eth/filters/filter_system_test.go b/eth/filters/filter_system_test.go index 1d52afb28243..49b61bd585a2 100644 --- a/eth/filters/filter_system_test.go +++ b/eth/filters/filter_system_test.go @@ -29,7 +29,6 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/consensus/ethash" "github.com/ethereum/go-ethereum/core" - "github.com/ethereum/go-ethereum/core/bloombits" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/ethdb" @@ -137,37 +136,6 @@ func (b *testBackend) SubscribeChainEvent(ch chan<- core.ChainEvent) event.Subsc return b.chainFeed.Subscribe(ch) } -func (b *testBackend) BloomStatus() (uint64, uint64) { - return params.BloomBitsBlocks, b.sections -} - -func (b *testBackend) ServiceFilter(ctx context.Context, session *bloombits.MatcherSession) { - requests := make(chan chan *bloombits.Retrieval) - - go session.Multiplex(16, 0, requests) - go func() { - for { - // Wait for a service request or a shutdown - select { - case <-ctx.Done(): - return - - case request := <-requests: - task := <-request - - task.Bitsets = make([][]byte, len(task.Sections)) - for i, section := range task.Sections { - if rand.Int()%4 != 0 { // Handle occasional missing deliveries - head := rawdb.ReadCanonicalHash(b.db, (section+1)*params.BloomBitsBlocks-1) - task.Bitsets[i], _ = rawdb.ReadBloomBits(b.db, task.Bit, section, head) - } - } - request <- task - } - } - }() -} - func (b *testBackend) setPending(block *types.Block, receipts types.Receipts) { b.pendingBlock = block b.pendingReceipts = receipts diff --git a/internal/ethapi/api_test.go b/internal/ethapi/api_test.go index 384ca9f1cc73..4a36cbdf2d0d 100644 --- a/internal/ethapi/api_test.go +++ b/internal/ethapi/api_test.go @@ -43,7 +43,6 @@ import ( "github.com/ethereum/go-ethereum/consensus/beacon" "github.com/ethereum/go-ethereum/consensus/ethash" "github.com/ethereum/go-ethereum/core" - "github.com/ethereum/go-ethereum/core/bloombits" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/state" "github.com/ethereum/go-ethereum/core/types" @@ -623,11 +622,6 @@ func (b testBackend) SubscribeRemovedLogsEvent(ch chan<- core.RemovedLogsEvent) func (b testBackend) SubscribeLogsEvent(ch chan<- []*types.Log) event.Subscription { panic("implement me") } -func (b testBackend) BloomStatus() (uint64, uint64) { panic("implement me") } -func (b testBackend) ServiceFilter(ctx context.Context, session *bloombits.MatcherSession) { - panic("implement me") -} - func TestEstimateGas(t *testing.T) { t.Parallel() // Initialize test accounts diff --git a/internal/ethapi/backend.go b/internal/ethapi/backend.go index 96bf894b0a74..932644a80edf 100644 --- a/internal/ethapi/backend.go +++ b/internal/ethapi/backend.go @@ -27,7 +27,6 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/consensus" "github.com/ethereum/go-ethereum/core" - "github.com/ethereum/go-ethereum/core/bloombits" "github.com/ethereum/go-ethereum/core/filtermaps" "github.com/ethereum/go-ethereum/core/state" "github.com/ethereum/go-ethereum/core/types" @@ -96,8 +95,6 @@ type Backend interface { GetLogs(ctx context.Context, blockHash common.Hash, number uint64) ([][]*types.Log, error) SubscribeRemovedLogsEvent(ch chan<- core.RemovedLogsEvent) event.Subscription SubscribeLogsEvent(ch chan<- []*types.Log) event.Subscription - BloomStatus() (uint64, uint64) - ServiceFilter(ctx context.Context, session *bloombits.MatcherSession) NewMatcherBackend() filtermaps.MatcherBackend } diff --git a/internal/ethapi/transaction_args_test.go b/internal/ethapi/transaction_args_test.go index 531782817328..500bb8738c4c 100644 --- a/internal/ethapi/transaction_args_test.go +++ b/internal/ethapi/transaction_args_test.go @@ -30,7 +30,6 @@ import ( "github.com/ethereum/go-ethereum/common/hexutil" "github.com/ethereum/go-ethereum/consensus" "github.com/ethereum/go-ethereum/core" - "github.com/ethereum/go-ethereum/core/bloombits" "github.com/ethereum/go-ethereum/core/state" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/core/vm" @@ -396,10 +395,8 @@ func (b *backendMock) TxPoolContent() (map[common.Address][]*types.Transaction, func (b *backendMock) TxPoolContentFrom(addr common.Address) ([]*types.Transaction, []*types.Transaction) { return nil, nil } -func (b *backendMock) SubscribeNewTxsEvent(chan<- core.NewTxsEvent) event.Subscription { return nil } -func (b *backendMock) BloomStatus() (uint64, uint64) { return 0, 0 } -func (b *backendMock) ServiceFilter(ctx context.Context, session *bloombits.MatcherSession) {} -func (b *backendMock) SubscribeLogsEvent(ch chan<- []*types.Log) event.Subscription { return nil } +func (b *backendMock) SubscribeNewTxsEvent(chan<- core.NewTxsEvent) event.Subscription { return nil } +func (b *backendMock) SubscribeLogsEvent(ch chan<- []*types.Log) event.Subscription { return nil } func (b *backendMock) SubscribeRemovedLogsEvent(ch chan<- core.RemovedLogsEvent) event.Subscription { return nil } diff --git a/params/network_params.go b/params/network_params.go index 61bd6b2f4229..c016e7fcf32c 100644 --- a/params/network_params.go +++ b/params/network_params.go @@ -20,14 +20,6 @@ package params // aren't necessarily consensus related. const ( - // BloomBitsBlocks is the number of blocks a single bloom bit section vector - // contains on the server side. - BloomBitsBlocks uint64 = 4096 - - // BloomConfirms is the number of confirmation blocks before a bloom section is - // considered probably final and its rotated bits are calculated. - BloomConfirms = 256 - // FullImmutabilityThreshold is the number of blocks after which a chain segment is // considered immutable (i.e. soft finality). It is used by the downloader as a // hard limit against deep ancestors, by the blockchain against deep reorgs, by From 348c6f0fca59ab4a5e741fb64c99ee3ab5e548b8 Mon Sep 17 00:00:00 2001 From: Zsolt Felfoldi Date: Fri, 27 Sep 2024 12:38:07 +0200 Subject: [PATCH 07/23] core/filtermaps: remove bloombits database --- core/filtermaps/filtermaps.go | 73 ++++++++++++++++++++++------------- core/filtermaps/indexer.go | 52 +++++++++++++++++-------- core/rawdb/schema.go | 2 +- 3 files changed, 83 insertions(+), 44 deletions(-) diff --git a/core/filtermaps/filtermaps.go b/core/filtermaps/filtermaps.go index 8f5e436d9f2b..3b19cc2cd258 100644 --- a/core/filtermaps/filtermaps.go +++ b/core/filtermaps/filtermaps.go @@ -6,6 +6,7 @@ import ( "errors" "sort" "sync" + "time" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common/lru" @@ -45,7 +46,8 @@ type blockchain interface { type FilterMaps struct { lock sync.RWMutex db ethdb.Database - closeCh chan chan struct{} + closeCh chan struct{} + closeWg sync.WaitGroup filterMapsRange chain blockchain matcherSyncCh chan *FilterMapsMatcherBackend @@ -102,7 +104,7 @@ func NewFilterMaps(db ethdb.Database, chain blockchain) *FilterMaps { fm := &FilterMaps{ db: db, chain: chain, - closeCh: make(chan chan struct{}), + closeCh: make(chan struct{}), filterMapsRange: filterMapsRange{ initialized: rs.Initialized, headLvPointer: rs.HeadLvPointer, @@ -119,64 +121,81 @@ func NewFilterMaps(db ethdb.Database, chain blockchain) *FilterMaps { lvPointerCache: lru.NewCache[uint64, uint64](1000), revertPoints: make(map[uint64]*revertPoint), } - if !fm.initialized { - fm.resetDb() - } - fm.updateMapCache() - if rp, err := fm.newUpdateBatch().makeRevertPoint(); err == nil { - fm.revertPoints[rp.blockNumber] = rp - } else { - log.Error("Error creating head revert point", "error", err) - } + fm.closeWg.Add(2) + go fm.removeBloomBits() go fm.updateLoop() return fm } // Close ensures that the indexer is fully stopped before returning. func (f *FilterMaps) Close() { - ch := make(chan struct{}) - f.closeCh <- ch - <-ch + close(f.closeCh) + f.closeWg.Wait() } // reset un-initializes the FilterMaps structure and removes all related data from -// the database. -// Note that this function assumes that the read/write lock is being held. -func (f *FilterMaps) reset() { - // deleting the range first ensures that resetDb will be called again at next - // startup and any leftover data will be removed even if it cannot finish now. - rawdb.DeleteFilterMapsRange(f.db) - f.resetDb() +// the database. The function returns true if everything was successfully removed. +func (f *FilterMaps) reset() bool { + f.lock.Lock() f.filterMapsRange = filterMapsRange{} f.filterMapCache = make(map[uint32]*filterMap) f.revertPoints = make(map[uint64]*revertPoint) f.blockPtrCache.Purge() f.lvPointerCache.Purge() + f.lock.Unlock() + // deleting the range first ensures that resetDb will be called again at next + // startup and any leftover data will be removed even if it cannot finish now. + rawdb.DeleteFilterMapsRange(f.db) + return f.removeDbWithPrefix(rawdb.FilterMapsPrefix, "Resetting log index database") } -// resetDb removes all log index data from the database. -func (f *FilterMaps) resetDb() { - var logged bool +// removeBloomBits removes old bloom bits data from the database. +func (f *FilterMaps) removeBloomBits() { + f.removeDbWithPrefix(rawdb.BloomBitsPrefix, "Removing old bloom bits database") + f.removeDbWithPrefix(rawdb.BloomBitsIndexPrefix, "Removing old bloom bits chain index") + f.closeWg.Done() +} + +// removeDbWithPrefix removes data with the given prefix from the database and +// returns true if everything was successfully removed. +func (f *FilterMaps) removeDbWithPrefix(prefix []byte, action string) bool { + var ( + logged bool + lastLogged time.Time + removed uint64 + ) for { - it := f.db.NewIterator(rawdb.FilterMapsPrefix, nil) + select { + case <-f.closeCh: + return false + default: + } + it := f.db.NewIterator(prefix, nil) batch := f.db.NewBatch() var count int for ; count < 10000 && it.Next(); count++ { batch.Delete(it.Key()) + removed++ } it.Release() if count == 0 { break } if !logged { - log.Info("Resetting log index database...") + log.Info(action + "...") logged = true + lastLogged = time.Now() + } + if time.Since(lastLogged) >= time.Second*10 { + log.Info(action+" in progress", "removed keys", removed) + lastLogged = time.Now() } batch.Write() } if logged { - log.Info("Resetting log index database finished") + log.Info(action + " finished") } + return true } // setRange updates the covered range and also adds the changes to the given batch. diff --git a/core/filtermaps/indexer.go b/core/filtermaps/indexer.go index a9b1b56136fd..9c2235a03fa1 100644 --- a/core/filtermaps/indexer.go +++ b/core/filtermaps/indexer.go @@ -22,6 +22,14 @@ const ( // updateLoop initializes and updates the log index structure according to the // canonical chain. func (f *FilterMaps) updateLoop() { + defer f.closeWg.Done() + f.updateMapCache() + if rp, err := f.newUpdateBatch().makeRevertPoint(); err == nil { + f.revertPoints[rp.blockNumber] = rp + } else { + log.Error("Error creating head revert point", "error", err) + } + var ( headEventCh = make(chan core.ChainHeadEvent) sub = f.chain.SubscribeChainHeadEvent(headEventCh) @@ -54,8 +62,7 @@ func (f *FilterMaps) updateLoop() { case <-time.After(time.Second * 20): // keep updating log index during syncing head = f.chain.CurrentBlock() - case ch := <-f.closeCh: - close(ch) + case <-f.closeCh: stop = true } } @@ -69,7 +76,10 @@ func (f *FilterMaps) updateLoop() { for !stop { if !fmr.initialized { - f.tryInit(head) + if !f.tryInit(head) { + return + } + if syncMatcher != nil { syncMatcher.synced(head) syncMatcher = nil @@ -82,7 +92,9 @@ func (f *FilterMaps) updateLoop() { } // log index is initialized if fmr.headBlockHash != head.Hash() { - f.tryUpdateHead(head) + if !f.tryUpdateHead(head) { + return + } fmr = f.getRange() if fmr.headBlockHash != head.Hash() { wait() @@ -101,8 +113,7 @@ func (f *FilterMaps) updateLoop() { head = ev.Block.Header() case syncMatcher = <-f.matcherSyncCh: head = f.chain.CurrentBlock() - case ch := <-f.closeCh: - close(ch) + case <-f.closeCh: stop = true return true default: @@ -128,24 +139,34 @@ func (f *FilterMaps) getRange() filterMapsRange { } // tryInit attempts to initialize the log index structure. -func (f *FilterMaps) tryInit(head *types.Header) { +// Returns false if indexer was stopped during a database reset. In this case the +// indexer should exit and remaining parts of the old database will be removed +// at next startup. +func (f *FilterMaps) tryInit(head *types.Header) bool { + if !f.reset() { + return false + } receipts := rawdb.ReadRawReceipts(f.db, head.Hash(), head.Number.Uint64()) if receipts == nil { log.Error("Could not retrieve block receipts for init block", "number", head.Number, "hash", head.Hash()) - return + return true } update := f.newUpdateBatch() if err := update.initWithBlock(head, receipts); err != nil { log.Error("Could not initialize log index", "error", err) } f.applyUpdateBatch(update) + return true } // tryUpdateHead attempts to update the log index with a new head. If necessary, // it reverts to a common ancestor with the old head before adding new block logs. // If no suitable revert point is available (probably a reorg just after init) // then it resets the index and tries to re-initialize with the new head. -func (f *FilterMaps) tryUpdateHead(newHead *types.Header) { +// Returns false if indexer was stopped during a database reset. In this case the +// indexer should exit and remaining parts of the old database will be removed +// at next startup. +func (f *FilterMaps) tryUpdateHead(newHead *types.Header) bool { // iterate back from new head until the log index head or a revert point and // collect headers of blocks to be added var ( @@ -159,14 +180,12 @@ func (f *FilterMaps) tryUpdateHead(newHead *types.Header) { rp, err = f.getRevertPoint(chainPtr.Number.Uint64()) if err != nil { log.Error("Error fetching revert point", "block number", chainPtr.Number.Uint64(), "error", err) - return + return true } if rp == nil { // there are no more revert points available so we should reset and re-initialize log.Warn("No suitable revert point exists; re-initializing log index", "block number", newHead.Number.Uint64()) - f.reset() - f.tryInit(newHead) - return + return f.tryInit(newHead) } } if chainPtr.Hash() == rp.blockHash { @@ -178,7 +197,7 @@ func (f *FilterMaps) tryUpdateHead(newHead *types.Header) { chainPtr = f.chain.GetHeader(chainPtr.ParentHash, chainPtr.Number.Uint64()-1) if chainPtr == nil { log.Error("Canonical header not found", "number", chainPtr.Number.Uint64()-1, "hash", chainPtr.ParentHash) - return + return true } } if rp.blockHash != f.headBlockHash { @@ -187,12 +206,12 @@ func (f *FilterMaps) tryUpdateHead(newHead *types.Header) { } if err := f.revertTo(rp); err != nil { log.Error("Error applying revert point", "block number", chainPtr.Number.Uint64(), "error", err) - return + return true } } if newHeaders == nil { - return + return true } // add logs of new blocks in reverse order update := f.newUpdateBatch() @@ -214,6 +233,7 @@ func (f *FilterMaps) tryUpdateHead(newHead *types.Header) { } } f.applyUpdateBatch(update) + return true } // tryExtendTail attempts to extend the log index backwards until it indexes the diff --git a/core/rawdb/schema.go b/core/rawdb/schema.go index 60d6a58ade49..29e9adb42b14 100644 --- a/core/rawdb/schema.go +++ b/core/rawdb/schema.go @@ -106,7 +106,7 @@ var ( blockReceiptsPrefix = []byte("r") // blockReceiptsPrefix + num (uint64 big endian) + hash -> block receipts txLookupPrefix = []byte("l") // txLookupPrefix + hash -> transaction/receipt lookup metadata - bloomBitsPrefix = []byte("B") // bloomBitsPrefix + bit (uint16 big endian) + section (uint64 big endian) + hash -> bloom bits + BloomBitsPrefix = []byte("B") // bloomBitsPrefix + bit (uint16 big endian) + section (uint64 big endian) + hash -> bloom bits SnapshotAccountPrefix = []byte("a") // SnapshotAccountPrefix + account hash -> account trie value SnapshotStoragePrefix = []byte("o") // SnapshotStoragePrefix + account hash + storage hash -> storage trie value CodePrefix = []byte("c") // CodePrefix + code hash -> account code From bf2d00dd60dcb7ca785678ca7dba2d185ab58fc6 Mon Sep 17 00:00:00 2001 From: Zsolt Felfoldi Date: Fri, 27 Sep 2024 22:35:28 +0200 Subject: [PATCH 08/23] core/filtermaps: added history.logs parameter --- cmd/geth/chaincmd.go | 2 + cmd/geth/main.go | 2 + cmd/utils/flags.go | 17 ++++ core/filtermaps/filtermaps.go | 49 +++++++--- core/filtermaps/indexer.go | 168 ++++++++++++++++++++++++++++++---- core/rawdb/schema.go | 2 +- eth/backend.go | 2 +- eth/ethconfig/config.go | 3 + 8 files changed, 212 insertions(+), 33 deletions(-) diff --git a/cmd/geth/chaincmd.go b/cmd/geth/chaincmd.go index 018bca4a0f6f..b73826ed5523 100644 --- a/cmd/geth/chaincmd.go +++ b/cmd/geth/chaincmd.go @@ -101,6 +101,8 @@ if one is set. Otherwise it prints the genesis from the datadir.`, utils.VMTraceFlag, utils.VMTraceJsonConfigFlag, utils.TransactionHistoryFlag, + utils.LogHistoryFlag, + utils.LogNoHistoryFlag, utils.StateHistoryFlag, }, utils.DatabaseFlags), Description: ` diff --git a/cmd/geth/main.go b/cmd/geth/main.go index 2675a616759c..ef0d1ed2e470 100644 --- a/cmd/geth/main.go +++ b/cmd/geth/main.go @@ -88,6 +88,8 @@ var ( utils.SnapshotFlag, utils.TxLookupLimitFlag, // deprecated utils.TransactionHistoryFlag, + utils.LogHistoryFlag, + utils.LogNoHistoryFlag, utils.StateHistoryFlag, utils.LightServeFlag, // deprecated utils.LightIngressFlag, // deprecated diff --git a/cmd/utils/flags.go b/cmd/utils/flags.go index da2814855a66..808654600dca 100644 --- a/cmd/utils/flags.go +++ b/cmd/utils/flags.go @@ -279,6 +279,17 @@ var ( Value: ethconfig.Defaults.TransactionHistory, Category: flags.StateCategory, } + LogHistoryFlag = &cli.Uint64Flag{ + Name: "history.logs", + Usage: "Number of recent blocks to maintain log search index for (default = about one year, 0 = entire chain)", + Value: ethconfig.Defaults.LogHistory, + Category: flags.StateCategory, + } + LogNoHistoryFlag = &cli.BoolFlag{ + Name: "history.logs.disable", + Usage: "Do not maintain log search index", + Category: flags.StateCategory, + } // Beacon client light sync settings BeaconApiFlag = &cli.StringSliceFlag{ Name: "beacon.api", @@ -1727,6 +1738,12 @@ func SetEthConfig(ctx *cli.Context, stack *node.Node, cfg *ethconfig.Config) { cfg.StateScheme = rawdb.HashScheme log.Warn("Forcing hash state-scheme for archive mode") } + if ctx.IsSet(LogHistoryFlag.Name) { + cfg.LogHistory = ctx.Uint64(LogHistoryFlag.Name) + } + if ctx.IsSet(LogNoHistoryFlag.Name) { + cfg.LogNoHistory = true + } if ctx.IsSet(CacheFlag.Name) || ctx.IsSet(CacheTrieFlag.Name) { cfg.TrieCleanCache = ctx.Int(CacheFlag.Name) * ctx.Int(CacheTrieFlag.Name) / 100 } diff --git a/core/filtermaps/filtermaps.go b/core/filtermaps/filtermaps.go index 3b19cc2cd258..3ba2856e29af 100644 --- a/core/filtermaps/filtermaps.go +++ b/core/filtermaps/filtermaps.go @@ -44,10 +44,13 @@ type blockchain interface { // without the tree hashing and consensus changes: // https://eips.ethereum.org/EIPS/eip-7745 type FilterMaps struct { - lock sync.RWMutex - db ethdb.Database - closeCh chan struct{} - closeWg sync.WaitGroup + lock sync.RWMutex + db ethdb.Database + closeCh chan struct{} + closeWg sync.WaitGroup + history uint64 + noHistory bool + filterMapsRange chain blockchain matcherSyncCh chan *FilterMapsMatcherBackend @@ -87,24 +90,32 @@ var emptyRow = FilterRow{} // filterMapsRange describes the block range that has been indexed and the log // value index range it has been mapped to. +// Note that tailBlockLvPointer points to the earliest log value index belonging +// to the tail block while tailLvPointer points to the earliest log value index +// added to the corresponding filter map. The latter might point to an earlier +// index after tail blocks have been pruned because we do not remove tail values +// one by one, rather delete entire maps when all blocks that had log values in +// those maps are unindexed. type filterMapsRange struct { - initialized bool - headLvPointer, tailLvPointer uint64 - headBlockNumber, tailBlockNumber uint64 - headBlockHash, tailParentHash common.Hash + initialized bool + headLvPointer, tailLvPointer, tailBlockLvPointer uint64 + headBlockNumber, tailBlockNumber uint64 + headBlockHash, tailParentHash common.Hash } // NewFilterMaps creates a new FilterMaps and starts the indexer in order to keep // the structure in sync with the given blockchain. -func NewFilterMaps(db ethdb.Database, chain blockchain) *FilterMaps { +func NewFilterMaps(db ethdb.Database, chain blockchain, history uint64, noHistory bool) *FilterMaps { rs, err := rawdb.ReadFilterMapsRange(db) if err != nil { log.Error("Error reading log index range", "error", err) } fm := &FilterMaps{ - db: db, - chain: chain, - closeCh: make(chan struct{}), + db: db, + chain: chain, + closeCh: make(chan struct{}), + history: history, + noHistory: noHistory, filterMapsRange: filterMapsRange{ initialized: rs.Initialized, headLvPointer: rs.HeadLvPointer, @@ -121,6 +132,11 @@ func NewFilterMaps(db ethdb.Database, chain blockchain) *FilterMaps { lvPointerCache: lru.NewCache[uint64, uint64](1000), revertPoints: make(map[uint64]*revertPoint), } + fm.tailBlockLvPointer, err = fm.getBlockLvPointer(fm.tailBlockNumber) + if err != nil { + log.Error("Error fetching tail block pointer, resetting log index", "error", err) + fm.filterMapsRange = filterMapsRange{} // updateLoop resets the database + } fm.closeWg.Add(2) go fm.removeBloomBits() go fm.updateLoop() @@ -200,7 +216,7 @@ func (f *FilterMaps) removeDbWithPrefix(prefix []byte, action string) bool { // setRange updates the covered range and also adds the changes to the given batch. // Note that this function assumes that the read/write lock is being held. -func (f *FilterMaps) setRange(batch ethdb.Batch, newRange filterMapsRange) { +func (f *FilterMaps) setRange(batch ethdb.KeyValueWriter, newRange filterMapsRange) { f.filterMapsRange = newRange rs := rawdb.FilterMapsRange{ Initialized: newRange.initialized, @@ -227,7 +243,7 @@ func (f *FilterMaps) updateMapCache() { defer f.filterMapLock.Unlock() newFilterMapCache := make(map[uint32]*filterMap) - firstMap, afterLastMap := uint32(f.tailLvPointer>>logValuesPerMap), uint32((f.headLvPointer+valuesPerMap-1)>>logValuesPerMap) + firstMap, afterLastMap := uint32(f.tailBlockLvPointer>>logValuesPerMap), uint32((f.headLvPointer+valuesPerMap-1)>>logValuesPerMap) headCacheFirst := firstMap + 1 if afterLastMap > headCacheFirst+headCacheSize { headCacheFirst = afterLastMap - headCacheSize @@ -255,7 +271,7 @@ func (f *FilterMaps) updateMapCache() { // If this is not the case then an invalid result or an error may be returned. // Note that this function assumes that the read lock is being held. func (f *FilterMaps) getLogByLvIndex(lvIndex uint64) (*types.Log, error) { - if lvIndex < f.tailLvPointer || lvIndex > f.headLvPointer { + if lvIndex < f.tailBlockLvPointer || lvIndex > f.headLvPointer { return nil, nil } // find possible block range based on map to block pointers @@ -264,6 +280,9 @@ func (f *FilterMaps) getLogByLvIndex(lvIndex uint64) (*types.Log, error) { if err != nil { return nil, err } + if firstBlockNumber < f.tailBlockNumber { + firstBlockNumber = f.tailBlockNumber + } var lastBlockNumber uint64 if mapIndex+1 < uint32((f.headLvPointer+valuesPerMap-1)>>logValuesPerMap) { lastBlockNumber, err = f.getMapBlockPtr(mapIndex + 1) diff --git a/core/filtermaps/indexer.go b/core/filtermaps/indexer.go index 9c2235a03fa1..d29debe13553 100644 --- a/core/filtermaps/indexer.go +++ b/core/filtermaps/indexer.go @@ -23,6 +23,11 @@ const ( // canonical chain. func (f *FilterMaps) updateLoop() { defer f.closeWg.Done() + + if f.noHistory { + f.reset() + return + } f.updateMapCache() if rp, err := f.newUpdateBatch().makeRevertPoint(); err == nil { f.revertPoints[rp.blockNumber] = rp @@ -106,7 +111,7 @@ func (f *FilterMaps) updateLoop() { syncMatcher = nil } // log index head is at latest chain head; process tail blocks if possible - f.tryExtendTail(func() bool { + f.tryUpdateTail(head, func() bool { // return true if tail processing needs to be stopped select { case ev := <-headEventCh: @@ -236,19 +241,35 @@ func (f *FilterMaps) tryUpdateHead(newHead *types.Header) bool { return true } -// tryExtendTail attempts to extend the log index backwards until it indexes the -// genesis block or cannot find more block receipts. Since this is a long process, -// stopFn is called after adding each tail block and if it returns true, the +// tryUpdateTail attempts to extend or prune the log index according to the +// current head block number and the log history settings. +// stopFn is called regularly during the process, and if it returns true, the // latest batch is written and the function returns. -func (f *FilterMaps) tryExtendTail(stopFn func() bool) { +func (f *FilterMaps) tryUpdateTail(head *types.Header, stopFn func() bool) { + var tailTarget uint64 + if f.history > 0 { + if headNum := head.Number.Uint64(); headNum >= f.history { + tailTarget = headNum + 1 - f.history + } + } + tailNum := f.getRange().tailBlockNumber + if tailNum > tailTarget { + f.tryExtendTail(tailTarget, stopFn) + } + if tailNum < tailTarget { + f.pruneTailPtr(tailTarget) + f.tryPruneTailMaps(tailTarget, stopFn) + } +} + +// tryExtendTail attempts to extend the log index backwards until it indexes the +// tail target block or cannot find more block receipts. +func (f *FilterMaps) tryExtendTail(tailTarget uint64, stopFn func() bool) { fmr := f.getRange() number, parentHash := fmr.tailBlockNumber, fmr.tailParentHash - if number == 0 { - return - } update := f.newUpdateBatch() lastTailEpoch := update.tailEpoch() - for number > 0 && !stopFn() { + for number > tailTarget && !stopFn() { if tailEpoch := update.tailEpoch(); tailEpoch < lastTailEpoch { // limit the amount of data updated in a single batch f.applyUpdateBatch(update) @@ -274,6 +295,114 @@ func (f *FilterMaps) tryExtendTail(stopFn func() bool) { f.applyUpdateBatch(update) } +// pruneTailPtr updates the tail block number and hash and the corresponding +// tailBlockLvPointer according to the given tail target block number. +// Note that this function does not remove old index data, only marks it unused +// by updating the tail pointers, except for targetLvPointer which is unchanged +// as it marks the tail of the log index data stored in the database. +func (f *FilterMaps) pruneTailPtr(tailTarget uint64) { + f.lock.Lock() + defer f.lock.Unlock() + + // obtain target log value pointer + if tailTarget <= f.tailBlockNumber || tailTarget > f.headBlockNumber { + return // nothing to do + } + targetLvPointer, err := f.getBlockLvPointer(tailTarget) + fmr := f.filterMapsRange + + if err != nil { + log.Error("Error fetching tail target log value pointer", "block number", tailTarget, "error", err) + } + + // obtain tail target's parent hash + var tailParentHash common.Hash + if tailTarget > 0 { + if f.chain.GetCanonicalHash(fmr.headBlockNumber) != fmr.headBlockHash { + return // if a reorg is happening right now then try again later + } + tailParentHash = f.chain.GetCanonicalHash(tailTarget - 1) + if f.chain.GetCanonicalHash(fmr.headBlockNumber) != fmr.headBlockHash { + return // check again to make sure that tailParentHash is consistent with the indexed chain + } + } + + fmr.tailBlockNumber, fmr.tailParentHash = tailTarget, tailParentHash + fmr.tailBlockLvPointer = targetLvPointer + f.setRange(f.db, fmr) +} + +// tryPruneTailMaps removes unused filter maps and corresponding log index +// pointers from the database. This function also updates targetLvPointer. +func (f *FilterMaps) tryPruneTailMaps(tailTarget uint64, stopFn func() bool) { + fmr := f.getRange() + tailMap := uint32(fmr.tailLvPointer >> logValuesPerMap) + targetMap := uint32(fmr.tailBlockLvPointer >> logValuesPerMap) + if tailMap >= targetMap { + return + } + lastEpoch := (targetMap - 1) >> logMapsPerEpoch + removeLvPtr, err := f.getMapBlockPtr(tailMap) + if err != nil { + log.Error("Error fetching tail map block pointer", "map index", tailMap, "error", err) + removeLvPtr = math.MaxUint64 // do not remove anything + } + var ( + logged bool + lastLogged time.Time + ) + for tailMap < targetMap && !stopFn() { + tailEpoch := tailMap >> logMapsPerEpoch + if tailEpoch == lastEpoch { + f.pruneMaps(tailMap, targetMap, &removeLvPtr) + break + } + nextTailMap := (tailEpoch + 1) << logMapsPerEpoch + f.pruneMaps(tailMap, nextTailMap, &removeLvPtr) + tailMap = nextTailMap + if !logged || time.Since(lastLogged) >= time.Second*10 { + log.Info("Pruning log index tail...", "filter maps left", targetMap-tailMap) + logged, lastLogged = true, time.Now() + } + } + if logged { + log.Info("Finished pruning log index tail", "filter maps left", targetMap-tailMap) + } +} + +// pruneMaps removes filter maps and corresponding log index pointers in the +// specified range in a single batch. +func (f *FilterMaps) pruneMaps(first, afterLast uint32, removeLvPtr *uint64) { + nextBlockNumber, err := f.getMapBlockPtr(afterLast) + if err != nil { + log.Error("Error fetching next map block pointer", "map index", afterLast, "error", err) + nextBlockNumber = 0 // do not remove anything + } + batch := f.db.NewBatch() + for *removeLvPtr < nextBlockNumber { + f.deleteBlockLvPointer(batch, *removeLvPtr) + (*removeLvPtr)++ + } + for mapIndex := first; mapIndex < afterLast; mapIndex++ { + f.deleteMapBlockPtr(batch, mapIndex) + } + for rowIndex := uint32(0); rowIndex < mapHeight; rowIndex++ { + for mapIndex := first; mapIndex < afterLast; mapIndex++ { + f.storeFilterMapRow(batch, mapIndex, rowIndex, emptyRow) + } + } + fmr := f.getRange() + fmr.tailLvPointer = uint64(afterLast) << logValuesPerMap + if fmr.tailLvPointer > fmr.tailBlockLvPointer { + log.Error("Cannot prune filter maps beyond tail block log value pointer", "tailLvPointer", fmr.tailLvPointer, "tailBlockLvPointer", fmr.tailBlockLvPointer) + return + } + f.setRange(batch, fmr) + if err := batch.Write(); err != nil { + log.Crit("Could not write update batch", "error", err) + } +} + // updateBatch is a memory overlay collecting changes to the index log structure // that can be written to the database in a single batch while the in-memory // representations in FilterMaps are also updated. @@ -368,7 +497,7 @@ func (f *FilterMaps) applyUpdateBatch(u *updateBatch) { if err := batch.Write(); err != nil { log.Crit("Could not write update batch", "error", err) } - log.Info("Log index block range updated", "tail", u.tailBlockNumber, "head", u.headBlockNumber, "log values", u.headLvPointer-u.tailLvPointer) + log.Info("Log index block range updated", "tail", u.tailBlockNumber, "head", u.headBlockNumber, "log values", u.headLvPointer-u.tailBlockLvPointer) } // updatedRangeLength returns the lenght of the updated filter map range. @@ -378,7 +507,7 @@ func (u *updateBatch) updatedRangeLength() uint32 { // tailEpoch returns the tail epoch index. func (u *updateBatch) tailEpoch() uint32 { - return uint32(u.tailLvPointer >> (logValuesPerMap + logMapsPerEpoch)) + return uint32(u.tailBlockLvPointer >> (logValuesPerMap + logMapsPerEpoch)) } // getRowPtr returns a pointer to a FilterRow that can be modified. If the batch @@ -416,8 +545,8 @@ func (u *updateBatch) initWithBlock(header *types.Header, receipts types.Receipt return errors.New("already initialized") } u.initialized = true - u.headLvPointer, u.tailLvPointer = startLvPointer, startLvPointer - u.headBlockNumber, u.tailBlockNumber = header.Number.Uint64()-1, header.Number.Uint64() //TODO genesis? + u.headLvPointer, u.tailLvPointer, u.tailBlockLvPointer = startLvPointer, startLvPointer, startLvPointer + u.headBlockNumber, u.tailBlockNumber = header.Number.Uint64()-1, header.Number.Uint64() u.headBlockHash, u.tailParentHash = header.ParentHash, header.ParentHash u.addBlockToHead(header, receipts) return nil @@ -470,16 +599,23 @@ func (u *updateBatch) addBlockToHead(header *types.Header, receipts types.Receip // addValueToTail adds a single log value to the tail of the log index. func (u *updateBatch) addValueToTail(logValue common.Hash) error { - if u.tailLvPointer == 0 { + if u.tailBlockLvPointer == 0 { return errors.New("tail log value pointer underflow") } + if u.tailBlockLvPointer < u.tailLvPointer { + panic("tailBlockLvPointer < tailLvPointer") + } + u.tailBlockLvPointer-- + if u.tailBlockLvPointer >= u.tailLvPointer { + return nil // already added to the map + } u.tailLvPointer-- - mapIndex := uint32(u.tailLvPointer >> logValuesPerMap) + mapIndex := uint32(u.tailBlockLvPointer >> logValuesPerMap) rowPtr, err := u.getRowPtr(mapIndex, rowIndex(mapIndex>>logMapsPerEpoch, logValue)) if err != nil { return err } - column := columnIndex(u.tailLvPointer, logValue) + column := columnIndex(u.tailBlockLvPointer, logValue) *rowPtr = append(*rowPtr, 0) copy((*rowPtr)[1:], (*rowPtr)[:len(*rowPtr)-1]) (*rowPtr)[0] = column diff --git a/core/rawdb/schema.go b/core/rawdb/schema.go index 29e9adb42b14..0948fa9d9894 100644 --- a/core/rawdb/schema.go +++ b/core/rawdb/schema.go @@ -145,7 +145,7 @@ var ( FixedCommitteeRootKey = []byte("fixedRoot-") // bigEndian64(syncPeriod) -> committee root hash SyncCommitteeKey = []byte("committee-") // bigEndian64(syncPeriod) -> serialized committee - FilterMapsPrefix = []byte("fT5-") //TODO fm- + FilterMapsPrefix = []byte("fm-") filterMapsRangeKey = append(FilterMapsPrefix, byte('R')) filterMapRowPrefix = append(FilterMapsPrefix, byte('r')) // filterMapRowPrefix + mapRowIndex (uint64 big endian) -> filter row filterMapBlockPtrPrefix = append(FilterMapsPrefix, byte('b')) // filterMapBlockPtrPrefix + mapIndex (uint32 big endian) -> block number (uint64 big endian) diff --git a/eth/backend.go b/eth/backend.go index f66ed1bbb35c..db7dbfd034e3 100644 --- a/eth/backend.go +++ b/eth/backend.go @@ -214,7 +214,7 @@ func New(stack *node.Node, config *ethconfig.Config) (*Ethereum, error) { if err != nil { return nil, err } - eth.filterMaps = filtermaps.NewFilterMaps(chainDb, eth.blockchain) + eth.filterMaps = filtermaps.NewFilterMaps(chainDb, eth.blockchain, config.LogHistory, config.LogNoHistory) if config.BlobPool.Datadir != "" { config.BlobPool.Datadir = stack.ResolvePath(config.BlobPool.Datadir) diff --git a/eth/ethconfig/config.go b/eth/ethconfig/config.go index c781a639408a..d347ad54d92a 100644 --- a/eth/ethconfig/config.go +++ b/eth/ethconfig/config.go @@ -52,6 +52,7 @@ var Defaults = Config{ NetworkId: 0, // enable auto configuration of networkID == chainID TxLookupLimit: 2350000, TransactionHistory: 2350000, + LogHistory: 2350000, StateHistory: params.FullImmutabilityThreshold, DatabaseCache: 512, TrieCleanCache: 154, @@ -94,6 +95,8 @@ type Config struct { TxLookupLimit uint64 `toml:",omitempty"` // The maximum number of blocks from head whose tx indices are reserved. TransactionHistory uint64 `toml:",omitempty"` // The maximum number of blocks from head whose tx indices are reserved. + LogHistory uint64 `toml:",omitempty"` // The maximum number of blocks from head where a log search index is maintained. + LogNoHistory bool `toml:",omitempty"` // No log search index is maintained. StateHistory uint64 `toml:",omitempty"` // The maximum number of blocks from head whose state histories are reserved. // State scheme represents the scheme used to store ethereum states and trie From 77318f1d57b7ea3358299997e74d79366d0dfba3 Mon Sep 17 00:00:00 2001 From: Zsolt Felfoldi Date: Sat, 28 Sep 2024 15:42:04 +0200 Subject: [PATCH 09/23] core/filtermaps: moved math stuff to separate file, added Params --- core/filtermaps/filtermaps.go | 200 ++---------------- core/filtermaps/indexer.go | 101 ++++----- core/filtermaps/matcher.go | 39 ++-- core/filtermaps/matcher_backend.go | 51 +++-- core/filtermaps/math.go | 180 ++++++++++++++++ .../{filtermaps_test.go => math_test.go} | 30 ++- eth/backend.go | 2 +- 7 files changed, 323 insertions(+), 280 deletions(-) create mode 100644 core/filtermaps/math.go rename core/filtermaps/{filtermaps_test.go => math_test.go} (78%) diff --git a/core/filtermaps/filtermaps.go b/core/filtermaps/filtermaps.go index 3ba2856e29af..600145fdcad5 100644 --- a/core/filtermaps/filtermaps.go +++ b/core/filtermaps/filtermaps.go @@ -1,10 +1,7 @@ package filtermaps import ( - "crypto/sha256" - "encoding/binary" "errors" - "sort" "sync" "time" @@ -18,16 +15,7 @@ import ( "github.com/ethereum/go-ethereum/log" ) -const ( - logMapHeight = 12 // log2(mapHeight) - mapHeight = 1 << logMapHeight // filter map height (number of rows) - logMapsPerEpoch = 6 // log2(mmapsPerEpochapsPerEpoch) - mapsPerEpoch = 1 << logMapsPerEpoch // number of maps in an epoch - logValuesPerMap = 16 // log2(logValuesPerMap) - valuesPerMap = 1 << logValuesPerMap // number of log values marked on each filter map - - headCacheSize = 8 // maximum number of recent filter maps cached in memory -) +const headCacheSize = 8 // maximum number of recent filter maps cached in memory // blockchain defines functions required by the FilterMaps log indexer. type blockchain interface { @@ -51,6 +39,7 @@ type FilterMaps struct { history uint64 noHistory bool + Params filterMapsRange chain blockchain matcherSyncCh chan *FilterMapsMatcherBackend @@ -60,7 +49,7 @@ type FilterMaps struct { // while updating the structure. Note that the set of cached maps depends // only on filterMapsRange and rows of other maps are not cached here. filterMapLock sync.Mutex - filterMapCache map[uint32]*filterMap + filterMapCache map[uint32]filterMap blockPtrCache *lru.Cache[uint32, uint64] lvPointerCache *lru.Cache[uint64, uint64] revertPoints map[uint64]*revertPoint @@ -73,7 +62,7 @@ type FilterMaps struct { // It can be used as a memory cache or an overlay while preparing a batch of // changes to the structure. In either case a nil value should be interpreted // as transparent (uncached/unchanged). -type filterMap [mapHeight]FilterRow +type filterMap []FilterRow // FilterRow encodes a single row of a filter map as a list of column indices. // Note that the values are always stored in the same order as they were added @@ -105,17 +94,19 @@ type filterMapsRange struct { // NewFilterMaps creates a new FilterMaps and starts the indexer in order to keep // the structure in sync with the given blockchain. -func NewFilterMaps(db ethdb.Database, chain blockchain, history uint64, noHistory bool) *FilterMaps { +func NewFilterMaps(db ethdb.Database, chain blockchain, params Params, history uint64, noHistory bool) *FilterMaps { rs, err := rawdb.ReadFilterMapsRange(db) if err != nil { log.Error("Error reading log index range", "error", err) } + params.deriveFields() fm := &FilterMaps{ db: db, chain: chain, closeCh: make(chan struct{}), history: history, noHistory: noHistory, + Params: params, filterMapsRange: filterMapsRange{ initialized: rs.Initialized, headLvPointer: rs.HeadLvPointer, @@ -127,7 +118,7 @@ func NewFilterMaps(db ethdb.Database, chain blockchain, history uint64, noHistor }, matcherSyncCh: make(chan *FilterMapsMatcherBackend), matchers: make(map[*FilterMapsMatcherBackend]struct{}), - filterMapCache: make(map[uint32]*filterMap), + filterMapCache: make(map[uint32]filterMap), blockPtrCache: lru.NewCache[uint32, uint64](1000), lvPointerCache: lru.NewCache[uint64, uint64](1000), revertPoints: make(map[uint64]*revertPoint), @@ -154,7 +145,7 @@ func (f *FilterMaps) Close() { func (f *FilterMaps) reset() bool { f.lock.Lock() f.filterMapsRange = filterMapsRange{} - f.filterMapCache = make(map[uint32]*filterMap) + f.filterMapCache = make(map[uint32]filterMap) f.revertPoints = make(map[uint64]*revertPoint) f.blockPtrCache.Purge() f.lvPointerCache.Purge() @@ -242,21 +233,21 @@ func (f *FilterMaps) updateMapCache() { f.filterMapLock.Lock() defer f.filterMapLock.Unlock() - newFilterMapCache := make(map[uint32]*filterMap) - firstMap, afterLastMap := uint32(f.tailBlockLvPointer>>logValuesPerMap), uint32((f.headLvPointer+valuesPerMap-1)>>logValuesPerMap) + newFilterMapCache := make(map[uint32]filterMap) + firstMap, afterLastMap := uint32(f.tailBlockLvPointer>>f.logValuesPerMap), uint32((f.headLvPointer+f.valuesPerMap-1)>>f.logValuesPerMap) headCacheFirst := firstMap + 1 if afterLastMap > headCacheFirst+headCacheSize { headCacheFirst = afterLastMap - headCacheSize } fm := f.filterMapCache[firstMap] if fm == nil { - fm = new(filterMap) + fm = make(filterMap, f.mapHeight) } newFilterMapCache[firstMap] = fm for mapIndex := headCacheFirst; mapIndex < afterLastMap; mapIndex++ { fm := f.filterMapCache[mapIndex] if fm == nil { - fm = new(filterMap) + fm = make(filterMap, f.mapHeight) } newFilterMapCache[mapIndex] = fm } @@ -275,7 +266,7 @@ func (f *FilterMaps) getLogByLvIndex(lvIndex uint64) (*types.Log, error) { return nil, nil } // find possible block range based on map to block pointers - mapIndex := uint32(lvIndex >> logValuesPerMap) + mapIndex := uint32(lvIndex >> f.logValuesPerMap) firstBlockNumber, err := f.getMapBlockPtr(mapIndex) if err != nil { return nil, err @@ -284,7 +275,7 @@ func (f *FilterMaps) getLogByLvIndex(lvIndex uint64) (*types.Log, error) { firstBlockNumber = f.tailBlockNumber } var lastBlockNumber uint64 - if mapIndex+1 < uint32((f.headLvPointer+valuesPerMap-1)>>logValuesPerMap) { + if mapIndex+1 < uint32((f.headLvPointer+f.valuesPerMap-1)>>f.logValuesPerMap) { lastBlockNumber, err = f.getMapBlockPtr(mapIndex + 1) if err != nil { return nil, err @@ -345,7 +336,7 @@ func (f *FilterMaps) getFilterMapRow(mapIndex, rowIndex uint32) (FilterRow, erro if fm != nil && fm[rowIndex] != nil { return fm[rowIndex], nil } - row, err := rawdb.ReadFilterMapRow(f.db, mapRowIndex(mapIndex, rowIndex)) + row, err := rawdb.ReadFilterMapRow(f.db, f.mapRowIndex(mapIndex, rowIndex)) if err != nil { return nil, err } @@ -364,9 +355,9 @@ func (f *FilterMaps) storeFilterMapRow(batch ethdb.Batch, mapIndex, rowIndex uin defer f.filterMapLock.Unlock() if fm := f.filterMapCache[mapIndex]; fm != nil { - (*fm)[rowIndex] = row + fm[rowIndex] = row } - rawdb.WriteFilterMapRow(batch, mapRowIndex(mapIndex, rowIndex), []uint32(row)) + rawdb.WriteFilterMapRow(batch, f.mapRowIndex(mapIndex, rowIndex), []uint32(row)) } // mapRowIndex calculates the unified storage index where the given row of the @@ -375,9 +366,9 @@ func (f *FilterMaps) storeFilterMapRow(batch ethdb.Batch, mapIndex, rowIndex uin // same data proximity reasons it is also suitable for database representation. // See also: // https://eips.ethereum.org/EIPS/eip-7745#hash-tree-structure -func mapRowIndex(mapIndex, rowIndex uint32) uint64 { - epochIndex, mapSubIndex := mapIndex>>logMapsPerEpoch, mapIndex%mapsPerEpoch - return (uint64(epochIndex)<>f.logMapsPerEpoch, mapIndex&(f.mapsPerEpoch-1) + return (uint64(epochIndex)< 1 { - q := a / m - m, a = a%m, m - x, y = y, x-q*y - } - if x < 0 { - x += m0 - } - return uint32(x) -} diff --git a/core/filtermaps/indexer.go b/core/filtermaps/indexer.go index d29debe13553..6d94e10b45fb 100644 --- a/core/filtermaps/indexer.go +++ b/core/filtermaps/indexer.go @@ -13,10 +13,10 @@ import ( ) const ( - startLvPointer = valuesPerMap << 31 // log value index assigned to init block - removedPointer = math.MaxUint64 // used in updateBatch to signal removed items - revertPointFrequency = 256 // frequency of revert points in database - cachedRevertPoints = 64 // revert points for most recent blocks in memory + startLvMap = 1 << 31 // map index assigned to init block + removedPointer = math.MaxUint64 // used in updateBatch to signal removed items + revertPointFrequency = 256 // frequency of revert points in database + cachedRevertPoints = 64 // revert points for most recent blocks in memory ) // updateLoop initializes and updates the log index structure according to the @@ -36,7 +36,7 @@ func (f *FilterMaps) updateLoop() { } var ( - headEventCh = make(chan core.ChainHeadEvent) + headEventCh = make(chan core.ChainHeadEvent, 10) sub = f.chain.SubscribeChainHeadEvent(headEventCh) head *types.Header stop bool @@ -231,7 +231,7 @@ func (f *FilterMaps) tryUpdateHead(newHead *types.Header) bool { log.Error("Error adding new block", "number", newHeader.Number, "hash", newHeader.Hash(), "error", err) break } - if update.updatedRangeLength() >= mapsPerEpoch { + if update.updatedRangeLength() >= f.mapsPerEpoch { // limit the amount of data updated in a single batch f.applyUpdateBatch(update) update = f.newUpdateBatch() @@ -336,12 +336,12 @@ func (f *FilterMaps) pruneTailPtr(tailTarget uint64) { // pointers from the database. This function also updates targetLvPointer. func (f *FilterMaps) tryPruneTailMaps(tailTarget uint64, stopFn func() bool) { fmr := f.getRange() - tailMap := uint32(fmr.tailLvPointer >> logValuesPerMap) - targetMap := uint32(fmr.tailBlockLvPointer >> logValuesPerMap) + tailMap := uint32(fmr.tailLvPointer >> f.logValuesPerMap) + targetMap := uint32(fmr.tailBlockLvPointer >> f.logValuesPerMap) if tailMap >= targetMap { return } - lastEpoch := (targetMap - 1) >> logMapsPerEpoch + lastEpoch := (targetMap - 1) >> f.logMapsPerEpoch removeLvPtr, err := f.getMapBlockPtr(tailMap) if err != nil { log.Error("Error fetching tail map block pointer", "map index", tailMap, "error", err) @@ -352,12 +352,12 @@ func (f *FilterMaps) tryPruneTailMaps(tailTarget uint64, stopFn func() bool) { lastLogged time.Time ) for tailMap < targetMap && !stopFn() { - tailEpoch := tailMap >> logMapsPerEpoch + tailEpoch := tailMap >> f.logMapsPerEpoch if tailEpoch == lastEpoch { f.pruneMaps(tailMap, targetMap, &removeLvPtr) break } - nextTailMap := (tailEpoch + 1) << logMapsPerEpoch + nextTailMap := (tailEpoch + 1) << f.logMapsPerEpoch f.pruneMaps(tailMap, nextTailMap, &removeLvPtr) tailMap = nextTailMap if !logged || time.Since(lastLogged) >= time.Second*10 { @@ -386,13 +386,13 @@ func (f *FilterMaps) pruneMaps(first, afterLast uint32, removeLvPtr *uint64) { for mapIndex := first; mapIndex < afterLast; mapIndex++ { f.deleteMapBlockPtr(batch, mapIndex) } - for rowIndex := uint32(0); rowIndex < mapHeight; rowIndex++ { + for rowIndex := uint32(0); rowIndex < f.mapHeight; rowIndex++ { for mapIndex := first; mapIndex < afterLast; mapIndex++ { f.storeFilterMapRow(batch, mapIndex, rowIndex, emptyRow) } } fmr := f.getRange() - fmr.tailLvPointer = uint64(afterLast) << logValuesPerMap + fmr.tailLvPointer = uint64(afterLast) << f.logValuesPerMap if fmr.tailLvPointer > fmr.tailBlockLvPointer { log.Error("Cannot prune filter maps beyond tail block log value pointer", "tailLvPointer", fmr.tailLvPointer, "tailBlockLvPointer", fmr.tailBlockLvPointer) return @@ -407,11 +407,11 @@ func (f *FilterMaps) pruneMaps(first, afterLast uint32, removeLvPtr *uint64) { // that can be written to the database in a single batch while the in-memory // representations in FilterMaps are also updated. type updateBatch struct { + f *FilterMaps filterMapsRange - maps map[uint32]*filterMap // nil rows are unchanged - getFilterMapRow func(mapIndex, rowIndex uint32) (FilterRow, error) - blockLvPointer map[uint64]uint64 // removedPointer means delete - mapBlockPtr map[uint32]uint64 // removedPointer means delete + maps map[uint32]filterMap // nil rows are unchanged + blockLvPointer map[uint64]uint64 // removedPointer means delete + mapBlockPtr map[uint32]uint64 // removedPointer means delete revertPoints map[uint64]*revertPoint firstMap, afterLastMap uint32 } @@ -422,9 +422,9 @@ func (f *FilterMaps) newUpdateBatch() *updateBatch { defer f.lock.RUnlock() return &updateBatch{ + f: f, filterMapsRange: f.filterMapsRange, - maps: make(map[uint32]*filterMap), - getFilterMapRow: f.getFilterMapRow, + maps: make(map[uint32]filterMap), blockLvPointer: make(map[uint64]uint64), mapBlockPtr: make(map[uint32]uint64), revertPoints: make(map[uint64]*revertPoint), @@ -455,10 +455,10 @@ func (f *FilterMaps) applyUpdateBatch(u *updateBatch) { } } // write filter map rows - for rowIndex := uint32(0); rowIndex < mapHeight; rowIndex++ { + for rowIndex := uint32(0); rowIndex < f.mapHeight; rowIndex++ { for mapIndex := u.firstMap; mapIndex < u.afterLastMap; mapIndex++ { if fm := u.maps[mapIndex]; fm != nil { - if row := (*fm)[rowIndex]; row != nil { + if row := fm[rowIndex]; row != nil { f.storeFilterMapRow(batch, mapIndex, rowIndex, row) } } @@ -488,7 +488,7 @@ func (f *FilterMaps) applyUpdateBatch(u *updateBatch) { rawdb.WriteRevertPoint(batch, b, &rawdb.RevertPoint{ BlockHash: rp.blockHash, MapIndex: rp.mapIndex, - RowLength: rp.rowLength[:], + RowLength: rp.rowLength, }) } } @@ -507,7 +507,7 @@ func (u *updateBatch) updatedRangeLength() uint32 { // tailEpoch returns the tail epoch index. func (u *updateBatch) tailEpoch() uint32 { - return uint32(u.tailBlockLvPointer >> (logValuesPerMap + logMapsPerEpoch)) + return uint32(u.tailBlockLvPointer >> (u.f.logValuesPerMap + u.f.logMapsPerEpoch)) } // getRowPtr returns a pointer to a FilterRow that can be modified. If the batch @@ -517,7 +517,7 @@ func (u *updateBatch) tailEpoch() uint32 { func (u *updateBatch) getRowPtr(mapIndex, rowIndex uint32) (*FilterRow, error) { fm := u.maps[mapIndex] if fm == nil { - fm = new(filterMap) + fm = make(filterMap, u.f.mapHeight) u.maps[mapIndex] = fm if mapIndex < u.firstMap || u.afterLastMap == 0 { u.firstMap = mapIndex @@ -526,9 +526,9 @@ func (u *updateBatch) getRowPtr(mapIndex, rowIndex uint32) (*FilterRow, error) { u.afterLastMap = mapIndex + 1 } } - rowPtr := &(*fm)[rowIndex] + rowPtr := &fm[rowIndex] if *rowPtr == nil { - if filterRow, err := u.getFilterMapRow(mapIndex, rowIndex); err == nil { + if filterRow, err := u.f.getFilterMapRow(mapIndex, rowIndex); err == nil { // filterRow is read only, copy before write *rowPtr = make(FilterRow, len(filterRow), len(filterRow)+8) copy(*rowPtr, filterRow) @@ -545,6 +545,7 @@ func (u *updateBatch) initWithBlock(header *types.Header, receipts types.Receipt return errors.New("already initialized") } u.initialized = true + startLvPointer := uint64(startLvMap) << u.f.logValuesPerMap u.headLvPointer, u.tailLvPointer, u.tailBlockLvPointer = startLvPointer, startLvPointer, startLvPointer u.headBlockNumber, u.tailBlockNumber = header.Number.Uint64()-1, header.Number.Uint64() u.headBlockHash, u.tailParentHash = header.ParentHash, header.ParentHash @@ -554,12 +555,12 @@ func (u *updateBatch) initWithBlock(header *types.Header, receipts types.Receipt // addValueToHead adds a single log value to the head of the log index. func (u *updateBatch) addValueToHead(logValue common.Hash) error { - mapIndex := uint32(u.headLvPointer >> logValuesPerMap) - rowPtr, err := u.getRowPtr(mapIndex, rowIndex(mapIndex>>logMapsPerEpoch, logValue)) + mapIndex := uint32(u.headLvPointer >> u.f.logValuesPerMap) + rowPtr, err := u.getRowPtr(mapIndex, u.f.rowIndex(mapIndex>>u.f.logMapsPerEpoch, logValue)) if err != nil { return err } - column := columnIndex(u.headLvPointer, logValue) + column := u.f.columnIndex(u.headLvPointer, logValue) *rowPtr = append(*rowPtr, column) u.headLvPointer++ return nil @@ -577,11 +578,11 @@ func (u *updateBatch) addBlockToHead(header *types.Header, receipts types.Receip } number := header.Number.Uint64() u.blockLvPointer[number] = u.headLvPointer - startMap := uint32((u.headLvPointer + valuesPerMap - 1) >> logValuesPerMap) + startMap := uint32((u.headLvPointer + u.f.valuesPerMap - 1) >> u.f.logValuesPerMap) if err := iterateReceipts(receipts, u.addValueToHead); err != nil { return err } - stopMap := uint32((u.headLvPointer + valuesPerMap - 1) >> logValuesPerMap) + stopMap := uint32((u.headLvPointer + u.f.valuesPerMap - 1) >> u.f.logValuesPerMap) for m := startMap; m < stopMap; m++ { u.mapBlockPtr[m] = number } @@ -610,12 +611,12 @@ func (u *updateBatch) addValueToTail(logValue common.Hash) error { return nil // already added to the map } u.tailLvPointer-- - mapIndex := uint32(u.tailBlockLvPointer >> logValuesPerMap) - rowPtr, err := u.getRowPtr(mapIndex, rowIndex(mapIndex>>logMapsPerEpoch, logValue)) + mapIndex := uint32(u.tailBlockLvPointer >> u.f.logValuesPerMap) + rowPtr, err := u.getRowPtr(mapIndex, u.f.rowIndex(mapIndex>>u.f.logMapsPerEpoch, logValue)) if err != nil { return err } - column := columnIndex(u.tailBlockLvPointer, logValue) + column := u.f.columnIndex(u.tailBlockLvPointer, logValue) *rowPtr = append(*rowPtr, 0) copy((*rowPtr)[1:], (*rowPtr)[:len(*rowPtr)-1]) (*rowPtr)[0] = column @@ -632,7 +633,7 @@ func (u *updateBatch) addBlockToTail(header *types.Header, receipts types.Receip return errors.New("addBlockToTail parent mismatch") } number := header.Number.Uint64() - stopMap := uint32((u.tailLvPointer + valuesPerMap - 1) >> logValuesPerMap) + stopMap := uint32((u.tailLvPointer + u.f.valuesPerMap - 1) >> u.f.logValuesPerMap) var cnt int if err := iterateReceiptsReverse(receipts, func(lv common.Hash) error { cnt++ @@ -640,7 +641,7 @@ func (u *updateBatch) addBlockToTail(header *types.Header, receipts types.Receip }); err != nil { return err } - startMap := uint32(u.tailLvPointer >> logValuesPerMap) + startMap := uint32(u.tailLvPointer >> u.f.logValuesPerMap) for m := startMap; m < stopMap; m++ { u.mapBlockPtr[m] = number } @@ -693,7 +694,7 @@ type revertPoint struct { blockNumber uint64 blockHash common.Hash mapIndex uint32 - rowLength [mapHeight]uint + rowLength []uint } // makeRevertPoint creates a new revertPoint. @@ -701,19 +702,20 @@ func (u *updateBatch) makeRevertPoint() (*revertPoint, error) { rp := &revertPoint{ blockNumber: u.headBlockNumber, blockHash: u.headBlockHash, - mapIndex: uint32(u.headLvPointer >> logValuesPerMap), + mapIndex: uint32(u.headLvPointer >> u.f.logValuesPerMap), + rowLength: make([]uint, u.f.mapHeight), } - if u.tailLvPointer > uint64(rp.mapIndex)< uint64(rp.mapIndex)<> logValuesPerMap) + afterLastMap := uint32((f.headLvPointer + f.valuesPerMap - 1) >> f.logValuesPerMap) if rp.mapIndex >= afterLastMap { return errors.New("cannot revert (head map behind revert point)") } - lvPointer := uint64(rp.mapIndex) << logValuesPerMap - for rowIndex, rowLen := range rp.rowLength[:] { + lvPointer := uint64(rp.mapIndex) << f.logValuesPerMap + for rowIndex, rowLen := range rp.rowLength { rowIndex := uint32(rowIndex) row, err := f.getFilterMapRow(rp.mapIndex, rowIndex) if err != nil { diff --git a/core/filtermaps/matcher.go b/core/filtermaps/matcher.go index 02fb6ed82d34..64c7c5efe6ed 100644 --- a/core/filtermaps/matcher.go +++ b/core/filtermaps/matcher.go @@ -21,6 +21,7 @@ var ErrMatchAll = errors.New("match all patterns not supported") // once EIP-7745 is implemented and active, these functions can also be trustlessly // served by a remote prover. type MatcherBackend interface { + GetParams() *Params GetBlockLvPointer(ctx context.Context, blockNumber uint64) (uint64, error) GetFilterMapRow(ctx context.Context, mapIndex, rowIndex uint32) (FilterRow, error) GetLogByLvIndex(ctx context.Context, lvIndex uint64) (*types.Log, error) @@ -139,6 +140,7 @@ func GetPotentialMatches(ctx context.Context, backend MatcherBackend, firstBlock // to that block range might be missing or incorrect. // Also note that the returned list may contain false positives. func getPotentialMatches(ctx context.Context, backend MatcherBackend, firstBlock, lastBlock uint64, addresses []common.Address, topics [][]common.Hash) ([]*types.Log, error) { + params := backend.GetParams() // find the log value index range to search firstIndex, err := backend.GetBlockLvPointer(ctx, firstBlock) if err != nil { @@ -151,8 +153,8 @@ func getPotentialMatches(ctx context.Context, backend MatcherBackend, firstBlock if lastIndex > 0 { lastIndex-- } - firstMap, lastMap := uint32(firstIndex>>logValuesPerMap), uint32(lastIndex>>logValuesPerMap) - firstEpoch, lastEpoch := firstMap>>logMapsPerEpoch, lastMap>>logMapsPerEpoch + firstMap, lastMap := uint32(firstIndex>>params.logValuesPerMap), uint32(lastIndex>>params.logValuesPerMap) + firstEpoch, lastEpoch := firstMap>>params.logMapsPerEpoch, lastMap>>params.logMapsPerEpoch // build matcher according to the given filter criteria matchers := make([]matcher, len(topics)+1) @@ -178,13 +180,13 @@ func getPotentialMatches(ctx context.Context, backend MatcherBackend, firstBlock } // matcher is the final sequence matcher that signals a match when all underlying // matchers signal a match for consecutive log value indices. - matcher := newMatchSequence(matchers) + matcher := newMatchSequence(params, matchers) // processEpoch returns the potentially matching logs from the given epoch. processEpoch := func(epochIndex uint32) ([]*types.Log, error) { var logs []*types.Log // create a list of map indices to process - fm, lm := epochIndex<>logMapsPerEpoch, s.value)) + filterRow, err := s.backend.GetFilterMapRow(ctx, mapIndex, params.rowIndex(mapIndex>>params.logMapsPerEpoch, s.value)) if err != nil { return nil, err } - results[i] = filterRow.potentialMatches(mapIndex, s.value) + results[i] = params.potentialMatches(filterRow, mapIndex, s.value) } return results, nil } @@ -403,6 +406,7 @@ func mergeResults(results []potentialMatches) potentialMatches { // gives a match at X+offset. Note that matchSequence can be used recursively to // detect any log value sequence. type matchSequence struct { + params *Params base, next matcher offset uint64 // *EmptyRate == totalCount << 32 + emptyCount (atomically accessed) @@ -412,7 +416,7 @@ type matchSequence struct { // newMatchSequence creates a recursive sequence matcher from a list of underlying // matchers. The resulting matcher signals a match at log value index X when each // underlying matcher matchers[i] returns a match at X+i. -func newMatchSequence(matchers []matcher) matcher { +func newMatchSequence(params *Params, matchers []matcher) matcher { if len(matchers) == 0 { panic("zero length sequence matchers are not allowed") } @@ -420,7 +424,8 @@ func newMatchSequence(matchers []matcher) matcher { return matchers[0] } return &matchSequence{ - base: newMatchSequence(matchers[:len(matchers)-1]), + params: params, + base: newMatchSequence(params, matchers[:len(matchers)-1]), next: matchers[len(matchers)-1], offset: uint64(len(matchers) - 1), } @@ -461,7 +466,7 @@ func (m *matchSequence) getMatches(ctx context.Context, mapIndices []uint32) ([] nextIndices = append(nextIndices, mapIndex) lastAdded = mapIndex } - if !baseFirst || baseRes[i] == nil || baseRes[i][len(baseRes[i])-1] >= (uint64(mapIndex+1)<= (uint64(mapIndex+1)< 0 && next1[len(next1)-1] >= (uint64(mapIndex)< 0 && next2[0] < (uint64(mapIndex+1)< 0 && next1[len(next1)-1] >= (uint64(mapIndex)< 0 && next2[0] < (uint64(mapIndex+1)< f.headBlockNumber { - fm.lastValid = f.headBlockNumber - } - if fm.firstValid > fm.lastValid { - fm.valid = false - } - } +// GetParams returns the filtermaps parameters. +// GetParams implements MatcherBackend. +func (fm *FilterMapsMatcherBackend) GetParams() *Params { + return &fm.f.Params } // Close removes the matcher from the set of active matchers and ensures that // any SyncLogIndex calls are cancelled. +// Close implements MatcherBackend. func (fm *FilterMapsMatcherBackend) Close() { fm.f.lock.Lock() defer fm.f.lock.Unlock() @@ -156,3 +139,27 @@ func (fm *FilterMapsMatcherBackend) SyncLogIndex(ctx context.Context) (SyncRange return SyncRange{}, ctx.Err() } } + +// updateMatchersValidRange iterates through active matchers and limits their +// valid range with the current indexed range. This function should be called +// whenever a part of the log index has been removed, before adding new blocks +// to it. +func (f *FilterMaps) updateMatchersValidRange() { + for fm := range f.matchers { + if !f.initialized { + fm.valid = false + } + if !fm.valid { + continue + } + if fm.firstValid < f.tailBlockNumber { + fm.firstValid = f.tailBlockNumber + } + if fm.lastValid > f.headBlockNumber { + fm.lastValid = f.headBlockNumber + } + if fm.firstValid > fm.lastValid { + fm.valid = false + } + } +} diff --git a/core/filtermaps/math.go b/core/filtermaps/math.go new file mode 100644 index 000000000000..b0132df9130d --- /dev/null +++ b/core/filtermaps/math.go @@ -0,0 +1,180 @@ +package filtermaps + +import ( + "crypto/sha256" + "encoding/binary" + "sort" + + "github.com/ethereum/go-ethereum/common" +) + +type Params struct { + logMapHeight uint // log2(mapHeight) + logMapsPerEpoch uint // log2(mmapsPerEpochapsPerEpoch) + logValuesPerMap uint // log2(logValuesPerMap) + // derived fields + mapHeight uint32 // filter map height (number of rows) + mapsPerEpoch uint32 // number of maps in an epoch + valuesPerMap uint64 // number of log values marked on each filter map +} + +var DefaultParams = Params{ + logMapHeight: 12, + logMapsPerEpoch: 6, + logValuesPerMap: 16, +} + +func (p *Params) deriveFields() { + p.mapHeight = uint32(1) << p.logMapHeight + p.mapsPerEpoch = uint32(1) << p.logMapsPerEpoch + p.valuesPerMap = uint64(1) << p.logValuesPerMap +} + +// addressValue returns the log value hash of a log emitting address. +func addressValue(address common.Address) common.Hash { + var result common.Hash + hasher := sha256.New() + hasher.Write(address[:]) + hasher.Sum(result[:0]) + return result +} + +// topicValue returns the log value hash of a log topic. +func topicValue(topic common.Hash) common.Hash { + var result common.Hash + hasher := sha256.New() + hasher.Write(topic[:]) + hasher.Sum(result[:0]) + return result +} + +// rowIndex returns the row index in which the given log value should be marked +// during the given epoch. Note that row assignments are re-shuffled in every +// epoch in order to ensure that even though there are always a few more heavily +// used rows due to very popular addresses and topics, these will not make search +// for other log values very expensive. Even if certain values are occasionally +// sorted into these heavy rows, in most of the epochs they are placed in average +// length rows. +func (p *Params) rowIndex(epochIndex uint32, logValue common.Hash) uint32 { + hasher := sha256.New() + hasher.Write(logValue[:]) + var indexEnc [4]byte + binary.LittleEndian.PutUint32(indexEnc[:], epochIndex) + hasher.Write(indexEnc[:]) + var hash common.Hash + hasher.Sum(hash[:0]) + return binary.LittleEndian.Uint32(hash[:4]) % p.mapHeight +} + +// columnIndex returns the column index that should be added to the appropriate +// row in order to place a mark for the next log value. +func (p *Params) columnIndex(lvIndex uint64, logValue common.Hash) uint32 { + x := uint32(lvIndex % p.valuesPerMap) // log value sub-index + transformHash := transformHash(uint32(lvIndex/p.valuesPerMap), logValue) + // apply column index transformation function + x += binary.LittleEndian.Uint32(transformHash[0:4]) + x *= binary.LittleEndian.Uint32(transformHash[4:8])*2 + 1 + x ^= binary.LittleEndian.Uint32(transformHash[8:12]) + x *= binary.LittleEndian.Uint32(transformHash[12:16])*2 + 1 + x += binary.LittleEndian.Uint32(transformHash[16:20]) + x *= binary.LittleEndian.Uint32(transformHash[20:24])*2 + 1 + x ^= binary.LittleEndian.Uint32(transformHash[24:28]) + x *= binary.LittleEndian.Uint32(transformHash[28:32])*2 + 1 + return x +} + +// transformHash calculates a hash specific to a given map and log value hash +// that defines a bijective function on the uint32 range. This function is used +// to transform the log value sub-index (distance from the first index of the map) +// into a 32 bit column index, then applied in reverse when searching for potential +// matches for a given log value. +func transformHash(mapIndex uint32, logValue common.Hash) (result common.Hash) { + hasher := sha256.New() + hasher.Write(logValue[:]) + var indexEnc [4]byte + binary.LittleEndian.PutUint32(indexEnc[:], mapIndex) + hasher.Write(indexEnc[:]) + hasher.Sum(result[:0]) + return +} + +// potentialMatches returns the list of log value indices potentially matching +// the given log value hash in the range of the filter map the row belongs to. +// Note that the list of indices is always sorted and potential duplicates are +// removed. Though the column indices are stored in the same order they were +// added and therefore the true matches are automatically reverse transformed +// in the right order, false positives can ruin this property. Since these can +// only be separated from true matches after the combined pattern matching of the +// outputs of individual log value matchers and this pattern matcher assumes a +// sorted and duplicate-free list of indices, we should ensure these properties +// here. +func (p *Params) potentialMatches(row FilterRow, mapIndex uint32, logValue common.Hash) potentialMatches { + results := make(potentialMatches, 0, 8) + transformHash := transformHash(mapIndex, logValue) + sub1 := binary.LittleEndian.Uint32(transformHash[0:4]) + mul1 := uint32ModInverse(binary.LittleEndian.Uint32(transformHash[4:8])*2 + 1) + xor1 := binary.LittleEndian.Uint32(transformHash[8:12]) + mul2 := uint32ModInverse(binary.LittleEndian.Uint32(transformHash[12:16])*2 + 1) + sub2 := binary.LittleEndian.Uint32(transformHash[16:20]) + mul3 := uint32ModInverse(binary.LittleEndian.Uint32(transformHash[20:24])*2 + 1) + xor2 := binary.LittleEndian.Uint32(transformHash[24:28]) + mul4 := uint32ModInverse(binary.LittleEndian.Uint32(transformHash[28:32])*2 + 1) + // perform reverse column index transformation on all column indices of the row. + // if a column index was added by the searched log value then the reverse + // transform will yield a valid log value sub-index of the given map. + // Column index is 32 bits long while there are 2**16 valid log value indices + // in the map's range, so this can also happen by accident with 1 in 2**16 + // chance, in which case we have a false positive. + for _, columnIndex := range row { + if potentialSubIndex := (((((((columnIndex * mul4) ^ xor2) * mul3) - sub2) * mul2) ^ xor1) * mul1) - sub1; potentialSubIndex < uint32(p.valuesPerMap) { + results = append(results, uint64(mapIndex)< 1 { + q := a / m + m, a = a%m, m + x, y = y, x-q*y + } + if x < 0 { + x += m0 + } + return uint32(x) +} diff --git a/core/filtermaps/filtermaps_test.go b/core/filtermaps/math_test.go similarity index 78% rename from core/filtermaps/filtermaps_test.go rename to core/filtermaps/math_test.go index 70a4ce3b14ca..5cf76dd34a92 100644 --- a/core/filtermaps/filtermaps_test.go +++ b/core/filtermaps/math_test.go @@ -8,14 +8,17 @@ import ( ) func TestSingleMatch(t *testing.T) { + params := DefaultParams + params.deriveFields() + for count := 0; count < 100000; count++ { // generate a row with a single random entry mapIndex := rand.Uint32() - lvIndex := uint64(mapIndex)<> 32) if falsePositives < expFalse/2 || falsePositives > expFalse*3/2 { t.Fatalf("False positive rate out of expected range (got %d, expected %d +-50%%)", falsePositives, expFalse) } diff --git a/eth/backend.go b/eth/backend.go index db7dbfd034e3..0d1439828979 100644 --- a/eth/backend.go +++ b/eth/backend.go @@ -214,7 +214,7 @@ func New(stack *node.Node, config *ethconfig.Config) (*Ethereum, error) { if err != nil { return nil, err } - eth.filterMaps = filtermaps.NewFilterMaps(chainDb, eth.blockchain, config.LogHistory, config.LogNoHistory) + eth.filterMaps = filtermaps.NewFilterMaps(chainDb, eth.blockchain, filtermaps.DefaultParams, config.LogHistory, config.LogNoHistory) if config.BlobPool.Datadir != "" { config.BlobPool.Datadir = stack.ResolvePath(config.BlobPool.Datadir) From b73ed9c1b76a061f57dc9d870021139743226ecd Mon Sep 17 00:00:00 2001 From: Zsolt Felfoldi Date: Sun, 29 Sep 2024 10:38:49 +0200 Subject: [PATCH 10/23] core/filtermaps: add indexer test --- core/filtermaps/filtermaps.go | 21 ++- core/filtermaps/indexer.go | 50 +++++- core/filtermaps/indexer_test.go | 280 ++++++++++++++++++++++++++++++++ eth/backend.go | 5 +- 4 files changed, 339 insertions(+), 17 deletions(-) create mode 100644 core/filtermaps/indexer_test.go diff --git a/core/filtermaps/filtermaps.go b/core/filtermaps/filtermaps.go index 600145fdcad5..75ad51ada9c1 100644 --- a/core/filtermaps/filtermaps.go +++ b/core/filtermaps/filtermaps.go @@ -23,6 +23,7 @@ type blockchain interface { SubscribeChainHeadEvent(ch chan<- core.ChainHeadEvent) event.Subscription GetHeader(hash common.Hash, number uint64) *types.Header GetCanonicalHash(number uint64) common.Hash + GetReceiptsByHash(hash common.Hash) types.Receipts } // FilterMaps is the in-memory representation of the log index structure that is @@ -33,7 +34,7 @@ type blockchain interface { // https://eips.ethereum.org/EIPS/eip-7745 type FilterMaps struct { lock sync.RWMutex - db ethdb.Database + db ethdb.KeyValueStore closeCh chan struct{} closeWg sync.WaitGroup history uint64 @@ -53,6 +54,8 @@ type FilterMaps struct { blockPtrCache *lru.Cache[uint32, uint64] lvPointerCache *lru.Cache[uint64, uint64] revertPoints map[uint64]*revertPoint + + testHook func(int) } // filterMap is a full or partial in-memory representation of a filter map where @@ -94,7 +97,7 @@ type filterMapsRange struct { // NewFilterMaps creates a new FilterMaps and starts the indexer in order to keep // the structure in sync with the given blockchain. -func NewFilterMaps(db ethdb.Database, chain blockchain, params Params, history uint64, noHistory bool) *FilterMaps { +func NewFilterMaps(db ethdb.KeyValueStore, chain blockchain, params Params, history uint64, noHistory bool) *FilterMaps { rs, err := rawdb.ReadFilterMapsRange(db) if err != nil { log.Error("Error reading log index range", "error", err) @@ -128,14 +131,17 @@ func NewFilterMaps(db ethdb.Database, chain blockchain, params Params, history u log.Error("Error fetching tail block pointer, resetting log index", "error", err) fm.filterMapsRange = filterMapsRange{} // updateLoop resets the database } - fm.closeWg.Add(2) - go fm.removeBloomBits() - go fm.updateLoop() return fm } +func (f *FilterMaps) Start() { + f.closeWg.Add(2) + go f.removeBloomBits() + go f.updateLoop() +} + // Close ensures that the indexer is fully stopped before returning. -func (f *FilterMaps) Close() { +func (f *FilterMaps) Stop() { close(f.closeCh) f.closeWg.Wait() } @@ -297,8 +303,7 @@ func (f *FilterMaps) getLogByLvIndex(lvIndex uint64) (*types.Log, error) { } } // get block receipts - hash := f.chain.GetCanonicalHash(firstBlockNumber) - receipts := rawdb.ReadRawReceipts(f.db, hash, firstBlockNumber) //TODO small cache + receipts := f.chain.GetReceiptsByHash(f.chain.GetCanonicalHash(firstBlockNumber)) if receipts == nil { return nil, errors.New("receipts not found") } diff --git a/core/filtermaps/indexer.go b/core/filtermaps/indexer.go index 6d94e10b45fb..c5a0fbb48571 100644 --- a/core/filtermaps/indexer.go +++ b/core/filtermaps/indexer.go @@ -22,7 +22,12 @@ const ( // updateLoop initializes and updates the log index structure according to the // canonical chain. func (f *FilterMaps) updateLoop() { - defer f.closeWg.Done() + defer func() { + f.closeWg.Done() + if f.testHook != nil { + f.testHook(testHookStop) + } + }() if f.noHistory { f.reset() @@ -38,7 +43,7 @@ func (f *FilterMaps) updateLoop() { var ( headEventCh = make(chan core.ChainHeadEvent, 10) sub = f.chain.SubscribeChainHeadEvent(headEventCh) - head *types.Header + head = f.chain.CurrentBlock() stop bool syncMatcher *FilterMapsMatcherBackend ) @@ -59,16 +64,21 @@ func (f *FilterMaps) updateLoop() { if stop { return } + delay := time.Second * 20 + if f.testHook != nil { + f.testHook(testHookWait) + delay = 0 + } select { case ev := <-headEventCh: head = ev.Block.Header() case syncMatcher = <-f.matcherSyncCh: head = f.chain.CurrentBlock() - case <-time.After(time.Second * 20): - // keep updating log index during syncing - head = f.chain.CurrentBlock() case <-f.closeCh: stop = true + case <-time.After(delay): + // keep updating log index during syncing + head = f.chain.CurrentBlock() } } for head == nil { @@ -151,7 +161,7 @@ func (f *FilterMaps) tryInit(head *types.Header) bool { if !f.reset() { return false } - receipts := rawdb.ReadRawReceipts(f.db, head.Hash(), head.Number.Uint64()) + receipts := f.chain.GetReceiptsByHash(head.Hash()) if receipts == nil { log.Error("Could not retrieve block receipts for init block", "number", head.Number, "hash", head.Hash()) return true @@ -161,6 +171,9 @@ func (f *FilterMaps) tryInit(head *types.Header) bool { log.Error("Could not initialize log index", "error", err) } f.applyUpdateBatch(update) + if f.testHook != nil { + f.testHook(testHookInit) + } return true } @@ -222,7 +235,7 @@ func (f *FilterMaps) tryUpdateHead(newHead *types.Header) bool { update := f.newUpdateBatch() for i := len(newHeaders) - 1; i >= 0; i-- { newHeader := newHeaders[i] - receipts := rawdb.ReadRawReceipts(f.db, newHeader.Hash(), newHeader.Number.Uint64()) + receipts := f.chain.GetReceiptsByHash(newHeader.Hash()) if receipts == nil { log.Error("Could not retrieve block receipts for new block", "number", newHeader.Number, "hash", newHeader.Hash()) break @@ -234,10 +247,16 @@ func (f *FilterMaps) tryUpdateHead(newHead *types.Header) bool { if update.updatedRangeLength() >= f.mapsPerEpoch { // limit the amount of data updated in a single batch f.applyUpdateBatch(update) + if f.testHook != nil { + f.testHook(testHookUpdateHeadEpoch) + } update = f.newUpdateBatch() } } f.applyUpdateBatch(update) + if f.testHook != nil { + f.testHook(testHookUpdateHead) + } return true } @@ -273,6 +292,9 @@ func (f *FilterMaps) tryExtendTail(tailTarget uint64, stopFn func() bool) { if tailEpoch := update.tailEpoch(); tailEpoch < lastTailEpoch { // limit the amount of data updated in a single batch f.applyUpdateBatch(update) + if f.testHook != nil { + f.testHook(testHookExtendTailEpoch) + } update = f.newUpdateBatch() lastTailEpoch = tailEpoch } @@ -281,7 +303,7 @@ func (f *FilterMaps) tryExtendTail(tailTarget uint64, stopFn func() bool) { log.Error("Tail header not found", "number", number-1, "hash", parentHash) break } - receipts := rawdb.ReadRawReceipts(f.db, newTail.Hash(), newTail.Number.Uint64()) + receipts := f.chain.GetReceiptsByHash(newTail.Hash()) if receipts == nil { log.Error("Could not retrieve block receipts for tail block", "number", newTail.Number, "hash", newTail.Hash()) break @@ -293,6 +315,9 @@ func (f *FilterMaps) tryExtendTail(tailTarget uint64, stopFn func() bool) { number, parentHash = newTail.Number.Uint64(), newTail.ParentHash } f.applyUpdateBatch(update) + if f.testHook != nil { + f.testHook(testHookExtendTail) + } } // pruneTailPtr updates the tail block number and hash and the corresponding @@ -330,6 +355,9 @@ func (f *FilterMaps) pruneTailPtr(tailTarget uint64) { fmr.tailBlockNumber, fmr.tailParentHash = tailTarget, tailParentHash fmr.tailBlockLvPointer = targetLvPointer f.setRange(f.db, fmr) + if f.testHook != nil { + f.testHook(testHookPruneTail) + } } // tryPruneTailMaps removes unused filter maps and corresponding log index @@ -401,6 +429,9 @@ func (f *FilterMaps) pruneMaps(first, afterLast uint32, removeLvPtr *uint64) { if err := batch.Write(); err != nil { log.Crit("Could not write update batch", "error", err) } + if f.testHook != nil { + f.testHook(testHookPruneTailMaps) + } } // updateBatch is a memory overlay collecting changes to the index log structure @@ -799,5 +830,8 @@ func (f *FilterMaps) revertTo(rp *revertPoint) error { if err := batch.Write(); err != nil { log.Crit("Could not write update batch", "error", err) } + if f.testHook != nil { + f.testHook(testHookRevert) + } return nil } diff --git a/core/filtermaps/indexer_test.go b/core/filtermaps/indexer_test.go new file mode 100644 index 000000000000..5fd3f12ce57e --- /dev/null +++ b/core/filtermaps/indexer_test.go @@ -0,0 +1,280 @@ +package filtermaps + +import ( + "math/big" + "math/rand" + "sync" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/consensus/ethash" + "github.com/ethereum/go-ethereum/core" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/event" + "github.com/ethereum/go-ethereum/params" +) + +const ( + testHookInit = iota + testHookUpdateHeadEpoch + testHookUpdateHead + testHookExtendTailEpoch + testHookExtendTail + testHookPruneTail + testHookPruneTailMaps + testHookRevert + testHookWait + testHookStop +) + +var testParams = Params{ + logMapHeight: 2, + logMapsPerEpoch: 4, + logValuesPerMap: 4, +} + +func TestIndexerSetHistory(t *testing.T) { + ts := newTestSetup(t) + ts.setHistory(0, false) + ts.chain.addBlocks(1000, 5, 2, 4, false) // 50 log values per block + ts.runUntilWait() + ts.setHistory(100, false) + ts.runUntil(func() bool { + l := ts.lastRange.headLvPointer - ts.lastRange.tailLvPointer + return l > 44000 && l < 45000 + }) + ts.setHistory(200, false) + ts.runUntilWait() + ts.setHistory(0, false) + ts.runUntilWait() + if ts.lastRange.headLvPointer-ts.lastRange.tailLvPointer != 50000 { + t.Fatalf("Invalid number of log values in the final state (expected %d, got %d)", 50000, ts.lastRange.headLvPointer-ts.lastRange.tailLvPointer) + } +} + +func TestIndexerRandomSetHistory(t *testing.T) { + ts := newTestSetup(t) + ts.chain.addBlocks(100, 5, 2, 4, false) // 50 log values per block + for i := 0; i < 3000; i++ { + ts.setHistory(uint64(rand.Intn(1001)), false) + ts.nextEvent() + for rand.Intn(20) != 0 && ts.lastEvent != testHookWait { + ts.nextEvent() + } + } + ts.setHistory(0, false) + ts.runUntilWait() + if ts.lastRange.headLvPointer-ts.lastRange.tailLvPointer != 5000 { + t.Fatalf("Invalid number of log values in the final state (expected %d, got %d)", 5000, ts.lastRange.headLvPointer-ts.lastRange.tailLvPointer) + } +} + +type testSetup struct { + t *testing.T + fm *FilterMaps + db ethdb.Database + chain *testChain + eventCh chan int + resumeCh chan struct{} + lastEvent int + lastRange filterMapsRange +} + +func newTestSetup(t *testing.T) *testSetup { + return &testSetup{ + t: t, + chain: newTestChain(), + db: rawdb.NewMemoryDatabase(), + eventCh: make(chan int), + resumeCh: make(chan struct{}), + } +} + +func (ts *testSetup) runUntil(stop func() bool) { + for !stop() { + ts.nextEvent() + for ts.lastEvent == testHookWait { + ts.t.Fatalf("Indexer in waiting state before runUntil condition is met") + } + } +} + +func (ts *testSetup) runUntilWait() { + ts.nextEvent() + for ts.lastEvent != testHookWait { + ts.nextEvent() + } +} + +func (ts *testSetup) setHistory(history uint64, noHistory bool) { + if ts.fm != nil { + ts.stopFm() + } + ts.fm = NewFilterMaps(ts.db, ts.chain, testParams, history, noHistory) + ts.fm.testHook = ts.testHook + ts.fm.Start() + ts.lastEvent = <-ts.eventCh +} + +func (ts *testSetup) testHook(event int) { + ts.eventCh <- event + <-ts.resumeCh +} + +func (ts *testSetup) nextEvent() { + ts.resumeCh <- struct{}{} + ts.lastEvent = <-ts.eventCh + ts.lastRange = ts.fm.getRange() +} + +func (ts *testSetup) stopFm() { + close(ts.fm.closeCh) + for { + ts.nextEvent() + if ts.lastEvent == testHookStop { + break + } + } + ts.resumeCh <- struct{}{} + ts.fm.closeWg.Wait() +} + +func (ts *testSetup) close() { + ts.stopFm() + ts.db.Close() + ts.chain.db.Close() +} + +type testChain struct { + db ethdb.Database + lock sync.RWMutex + canonical []common.Hash + chainHeadFeed event.Feed + blocks map[common.Hash]*types.Block + receipts map[common.Hash]types.Receipts +} + +func newTestChain() *testChain { + return &testChain{ + blocks: make(map[common.Hash]*types.Block), + receipts: make(map[common.Hash]types.Receipts), + } +} + +func (tc *testChain) CurrentBlock() *types.Header { + tc.lock.RLock() + defer tc.lock.RUnlock() + + if len(tc.canonical) == 0 { + return nil + } + return tc.blocks[tc.canonical[len(tc.canonical)-1]].Header() +} + +func (tc *testChain) SubscribeChainHeadEvent(ch chan<- core.ChainHeadEvent) event.Subscription { + return tc.chainHeadFeed.Subscribe(ch) +} + +func (tc *testChain) GetHeader(hash common.Hash, number uint64) *types.Header { + tc.lock.RLock() + defer tc.lock.RUnlock() + + return tc.blocks[hash].Header() +} + +func (tc *testChain) GetCanonicalHash(number uint64) common.Hash { + tc.lock.RLock() + defer tc.lock.RUnlock() + + if uint64(len(tc.canonical)) <= number { + return common.Hash{} + } + return tc.canonical[number] +} + +func (tc *testChain) GetReceiptsByHash(hash common.Hash) types.Receipts { + tc.lock.RLock() + defer tc.lock.RUnlock() + + return tc.receipts[hash] +} + +func (tc *testChain) addBlocks(count, maxTxPerBlock, maxLogsPerReceipt, maxTopicsPerLog int, random bool) { + tc.lock.Lock() + defer tc.lock.Unlock() + + blockGen := func(i int, gen *core.BlockGen) { + var txCount int + if random { + txCount = rand.Intn(maxTxPerBlock + 1) + } else { + txCount = maxTxPerBlock + } + for k := txCount; k > 0; k-- { + receipt := types.NewReceipt(nil, false, 0) + var logCount int + if random { + logCount = rand.Intn(maxLogsPerReceipt + 1) + } else { + logCount = maxLogsPerReceipt + } + receipt.Logs = make([]*types.Log, logCount) + for i := range receipt.Logs { + log := &types.Log{} + receipt.Logs[i] = log + rand.Read(log.Address[:]) + var topicCount int + if random { + topicCount = rand.Intn(maxTopicsPerLog + 1) + } else { + topicCount = maxTopicsPerLog + } + log.Topics = make([]common.Hash, topicCount) + for j := range log.Topics { + rand.Read(log.Topics[j][:]) + } + } + gen.AddUncheckedReceipt(receipt) + gen.AddUncheckedTx(types.NewTransaction(999, common.HexToAddress("0x999"), big.NewInt(999), 999, gen.BaseFee(), nil)) + } + } + + var ( + blocks []*types.Block + receipts []types.Receipts + engine = ethash.NewFaker() + ) + + if len(tc.canonical) == 0 { + gspec := &core.Genesis{ + Alloc: types.GenesisAlloc{}, + BaseFee: big.NewInt(params.InitialBaseFee), + Config: params.TestChainConfig, + } + tc.db, blocks, receipts = core.GenerateChainWithGenesis(gspec, engine, count, blockGen) + gblock := gspec.ToBlock() + ghash := gblock.Hash() + tc.canonical = []common.Hash{ghash} + tc.blocks[ghash] = gblock + tc.receipts[ghash] = types.Receipts{} + } else { + blocks, receipts = core.GenerateChain(params.TestChainConfig, tc.blocks[tc.canonical[len(tc.canonical)-1]], engine, tc.db, count, blockGen) + } + + for i, block := range blocks { + num, hash := int(block.NumberU64()), block.Hash() + if len(tc.canonical) != num { + panic(nil) + } + tc.canonical = append(tc.canonical, hash) + tc.blocks[hash] = block + if receipts[i] != nil { + tc.receipts[hash] = receipts[i] + } else { + tc.receipts[hash] = types.Receipts{} + } + } + tc.chainHeadFeed.Send(core.ChainHeadEvent{Block: tc.blocks[tc.canonical[len(tc.canonical)-1]]}) +} diff --git a/eth/backend.go b/eth/backend.go index 0d1439828979..f1db95803ff1 100644 --- a/eth/backend.go +++ b/eth/backend.go @@ -358,6 +358,9 @@ func (s *Ethereum) Start() error { // Start the networking layer s.handler.Start(s.p2pServer.MaxPeers) + + // start log indexer + s.filterMaps.Start() return nil } @@ -401,7 +404,7 @@ func (s *Ethereum) Stop() error { s.handler.Stop() // Then stop everything else. - s.filterMaps.Close() + s.filterMaps.Stop() s.txPool.Close() s.blockchain.Stop() s.engine.Close() From db83e033c27cd790bc94c9a84816020421a45987 Mon Sep 17 00:00:00 2001 From: Zsolt Felfoldi Date: Mon, 30 Sep 2024 02:37:43 +0200 Subject: [PATCH 11/23] core/filtermaps: fixed tail pointer bug, added more failing checks --- core/filtermaps/indexer.go | 6 +++--- core/filtermaps/indexer_test.go | 32 +++++++++++++++++++++++++------- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/core/filtermaps/indexer.go b/core/filtermaps/indexer.go index c5a0fbb48571..e41222ebcdde 100644 --- a/core/filtermaps/indexer.go +++ b/core/filtermaps/indexer.go @@ -664,7 +664,7 @@ func (u *updateBatch) addBlockToTail(header *types.Header, receipts types.Receip return errors.New("addBlockToTail parent mismatch") } number := header.Number.Uint64() - stopMap := uint32((u.tailLvPointer + u.f.valuesPerMap - 1) >> u.f.logValuesPerMap) + stopMap := uint32((u.tailBlockLvPointer + u.f.valuesPerMap - 1) >> u.f.logValuesPerMap) var cnt int if err := iterateReceiptsReverse(receipts, func(lv common.Hash) error { cnt++ @@ -672,11 +672,11 @@ func (u *updateBatch) addBlockToTail(header *types.Header, receipts types.Receip }); err != nil { return err } - startMap := uint32(u.tailLvPointer >> u.f.logValuesPerMap) + startMap := uint32(u.tailBlockLvPointer >> u.f.logValuesPerMap) for m := startMap; m < stopMap; m++ { u.mapBlockPtr[m] = number } - u.blockLvPointer[number] = u.tailLvPointer + u.blockLvPointer[number] = u.tailBlockLvPointer u.tailBlockNumber, u.tailParentHash = number, header.ParentHash return nil } diff --git a/core/filtermaps/indexer_test.go b/core/filtermaps/indexer_test.go index 5fd3f12ce57e..04f2d844137e 100644 --- a/core/filtermaps/indexer_test.go +++ b/core/filtermaps/indexer_test.go @@ -40,6 +40,7 @@ func TestIndexerSetHistory(t *testing.T) { ts.setHistory(0, false) ts.chain.addBlocks(1000, 5, 2, 4, false) // 50 log values per block ts.runUntilWait() + ts.checkLvRange(50) ts.setHistory(100, false) ts.runUntil(func() bool { l := ts.lastRange.headLvPointer - ts.lastRange.tailLvPointer @@ -47,11 +48,10 @@ func TestIndexerSetHistory(t *testing.T) { }) ts.setHistory(200, false) ts.runUntilWait() + ts.checkLvRange(50) ts.setHistory(0, false) ts.runUntilWait() - if ts.lastRange.headLvPointer-ts.lastRange.tailLvPointer != 50000 { - t.Fatalf("Invalid number of log values in the final state (expected %d, got %d)", 50000, ts.lastRange.headLvPointer-ts.lastRange.tailLvPointer) - } + ts.checkLvRange(50) } func TestIndexerRandomSetHistory(t *testing.T) { @@ -63,12 +63,13 @@ func TestIndexerRandomSetHistory(t *testing.T) { for rand.Intn(20) != 0 && ts.lastEvent != testHookWait { ts.nextEvent() } + if ts.lastEvent == testHookWait { + ts.checkLvRange(50) + } } ts.setHistory(0, false) ts.runUntilWait() - if ts.lastRange.headLvPointer-ts.lastRange.tailLvPointer != 5000 { - t.Fatalf("Invalid number of log values in the final state (expected %d, got %d)", 5000, ts.lastRange.headLvPointer-ts.lastRange.tailLvPointer) - } + ts.checkLvRange(50) } type testSetup struct { @@ -76,6 +77,7 @@ type testSetup struct { fm *FilterMaps db ethdb.Database chain *testChain + params Params eventCh chan int resumeCh chan struct{} lastEvent int @@ -83,10 +85,13 @@ type testSetup struct { } func newTestSetup(t *testing.T) *testSetup { + params := testParams + params.deriveFields() return &testSetup{ t: t, chain: newTestChain(), db: rawdb.NewMemoryDatabase(), + params: params, eventCh: make(chan int), resumeCh: make(chan struct{}), } @@ -108,11 +113,24 @@ func (ts *testSetup) runUntilWait() { } } +func (ts *testSetup) checkLvRange(lvPerBlock uint64) { + expBlockCount := uint64(len(ts.chain.canonical) - 1) + if ts.fm.history != 0 && ts.fm.history < expBlockCount { + expBlockCount = ts.fm.history + } + if ts.lastRange.headLvPointer-ts.lastRange.tailBlockLvPointer != expBlockCount*lvPerBlock { + ts.t.Fatalf("Invalid number of log values (expected %d, got %d)", expBlockCount*lvPerBlock, ts.lastRange.headLvPointer-ts.lastRange.tailLvPointer) + } + if ts.lastRange.tailBlockLvPointer-ts.lastRange.tailLvPointer >= ts.params.valuesPerMap { + ts.t.Fatalf("Invalid number of leftover tail log values (expected < %d, got %d)", ts.params.valuesPerMap, ts.lastRange.tailBlockLvPointer-ts.lastRange.tailLvPointer) + } +} + func (ts *testSetup) setHistory(history uint64, noHistory bool) { if ts.fm != nil { ts.stopFm() } - ts.fm = NewFilterMaps(ts.db, ts.chain, testParams, history, noHistory) + ts.fm = NewFilterMaps(ts.db, ts.chain, ts.params, history, noHistory) ts.fm.testHook = ts.testHook ts.fm.Start() ts.lastEvent = <-ts.eventCh From 94c869e2362acae6acb48f09dbbb585b05852ddb Mon Sep 17 00:00:00 2001 From: Zsolt Felfoldi Date: Mon, 30 Sep 2024 02:58:08 +0200 Subject: [PATCH 12/23] core/filtermaps: fixed map pruning --- core/filtermaps/indexer.go | 36 +++++++++++++++++++++++---------- core/filtermaps/indexer_test.go | 13 ------------ 2 files changed, 25 insertions(+), 24 deletions(-) diff --git a/core/filtermaps/indexer.go b/core/filtermaps/indexer.go index e41222ebcdde..efaa900417e1 100644 --- a/core/filtermaps/indexer.go +++ b/core/filtermaps/indexer.go @@ -17,6 +17,17 @@ const ( removedPointer = math.MaxUint64 // used in updateBatch to signal removed items revertPointFrequency = 256 // frequency of revert points in database cachedRevertPoints = 64 // revert points for most recent blocks in memory + + testHookInit = iota + testHookUpdateHeadEpoch + testHookUpdateHead + testHookExtendTailEpoch + testHookExtendTail + testHookPruneTail + testHookPruneTailMaps + testHookRevert + testHookWait + testHookStop ) // updateLoop initializes and updates the log index structure according to the @@ -121,7 +132,7 @@ func (f *FilterMaps) updateLoop() { syncMatcher = nil } // log index head is at latest chain head; process tail blocks if possible - f.tryUpdateTail(head, func() bool { + if f.tryUpdateTail(head, func() bool { // return true if tail processing needs to be stopped select { case ev := <-headEventCh: @@ -136,10 +147,9 @@ func (f *FilterMaps) updateLoop() { } // stop if there is a new chain head (always prioritize head updates) return fmr.headBlockHash != head.Hash() - }) - if fmr.headBlockHash == head.Hash() { - // if tail processing exited while there is no new head then no more - // tail blocks can be processed + }) && fmr.headBlockHash == head.Hash() { + // if tail processing reached its final state and there is no new + // head then wait for more events wait() } } @@ -264,7 +274,7 @@ func (f *FilterMaps) tryUpdateHead(newHead *types.Header) bool { // current head block number and the log history settings. // stopFn is called regularly during the process, and if it returns true, the // latest batch is written and the function returns. -func (f *FilterMaps) tryUpdateTail(head *types.Header, stopFn func() bool) { +func (f *FilterMaps) tryUpdateTail(head *types.Header, stopFn func() bool) bool { var tailTarget uint64 if f.history > 0 { if headNum := head.Number.Uint64(); headNum >= f.history { @@ -273,17 +283,19 @@ func (f *FilterMaps) tryUpdateTail(head *types.Header, stopFn func() bool) { } tailNum := f.getRange().tailBlockNumber if tailNum > tailTarget { - f.tryExtendTail(tailTarget, stopFn) + if !f.tryExtendTail(tailTarget, stopFn) { + return false + } } if tailNum < tailTarget { f.pruneTailPtr(tailTarget) - f.tryPruneTailMaps(tailTarget, stopFn) } + return f.tryPruneTailMaps(tailTarget, stopFn) } // tryExtendTail attempts to extend the log index backwards until it indexes the // tail target block or cannot find more block receipts. -func (f *FilterMaps) tryExtendTail(tailTarget uint64, stopFn func() bool) { +func (f *FilterMaps) tryExtendTail(tailTarget uint64, stopFn func() bool) bool { fmr := f.getRange() number, parentHash := fmr.tailBlockNumber, fmr.tailParentHash update := f.newUpdateBatch() @@ -318,6 +330,7 @@ func (f *FilterMaps) tryExtendTail(tailTarget uint64, stopFn func() bool) { if f.testHook != nil { f.testHook(testHookExtendTail) } + return number <= tailTarget } // pruneTailPtr updates the tail block number and hash and the corresponding @@ -362,12 +375,12 @@ func (f *FilterMaps) pruneTailPtr(tailTarget uint64) { // tryPruneTailMaps removes unused filter maps and corresponding log index // pointers from the database. This function also updates targetLvPointer. -func (f *FilterMaps) tryPruneTailMaps(tailTarget uint64, stopFn func() bool) { +func (f *FilterMaps) tryPruneTailMaps(tailTarget uint64, stopFn func() bool) bool { fmr := f.getRange() tailMap := uint32(fmr.tailLvPointer >> f.logValuesPerMap) targetMap := uint32(fmr.tailBlockLvPointer >> f.logValuesPerMap) if tailMap >= targetMap { - return + return true } lastEpoch := (targetMap - 1) >> f.logMapsPerEpoch removeLvPtr, err := f.getMapBlockPtr(tailMap) @@ -396,6 +409,7 @@ func (f *FilterMaps) tryPruneTailMaps(tailTarget uint64, stopFn func() bool) { if logged { log.Info("Finished pruning log index tail", "filter maps left", targetMap-tailMap) } + return tailMap >= targetMap } // pruneMaps removes filter maps and corresponding log index pointers in the diff --git a/core/filtermaps/indexer_test.go b/core/filtermaps/indexer_test.go index 04f2d844137e..4c54d0b4773a 100644 --- a/core/filtermaps/indexer_test.go +++ b/core/filtermaps/indexer_test.go @@ -16,19 +16,6 @@ import ( "github.com/ethereum/go-ethereum/params" ) -const ( - testHookInit = iota - testHookUpdateHeadEpoch - testHookUpdateHead - testHookExtendTailEpoch - testHookExtendTail - testHookPruneTail - testHookPruneTailMaps - testHookRevert - testHookWait - testHookStop -) - var testParams = Params{ logMapHeight: 2, logMapsPerEpoch: 4, From ee9caeeb14a7a53c10a0151ed5f4ae4b4e3bdfe3 Mon Sep 17 00:00:00 2001 From: Zsolt Felfoldi Date: Tue, 1 Oct 2024 02:14:00 +0200 Subject: [PATCH 13/23] core/filtermaps: use unindexed search as a fallback --- core/filtermaps/indexer.go | 27 ++-- core/filtermaps/matcher.go | 90 +---------- core/filtermaps/matcher_backend.go | 4 +- eth/filters/filter.go | 232 ++++++++++++++++++++--------- eth/filters/filter_system_test.go | 8 +- 5 files changed, 179 insertions(+), 182 deletions(-) diff --git a/core/filtermaps/indexer.go b/core/filtermaps/indexer.go index efaa900417e1..36094ba555fe 100644 --- a/core/filtermaps/indexer.go +++ b/core/filtermaps/indexer.go @@ -57,21 +57,23 @@ func (f *FilterMaps) updateLoop() { head = f.chain.CurrentBlock() stop bool syncMatcher *FilterMapsMatcherBackend + fmr = f.getRange() ) - defer func() { - sub.Unsubscribe() - if syncMatcher != nil { + matcherSync := func() { + if syncMatcher != nil && fmr.headBlockHash == head.Hash() { syncMatcher.synced(head) syncMatcher = nil } + } + + defer func() { + sub.Unsubscribe() + matcherSync() }() wait := func() { - if syncMatcher != nil { - syncMatcher.synced(head) - syncMatcher = nil - } + matcherSync() if stop { return } @@ -98,7 +100,7 @@ func (f *FilterMaps) updateLoop() { return } } - fmr := f.getRange() + fmr = f.getRange() for !stop { if !fmr.initialized { @@ -106,10 +108,6 @@ func (f *FilterMaps) updateLoop() { return } - if syncMatcher != nil { - syncMatcher.synced(head) - syncMatcher = nil - } fmr = f.getRange() if !fmr.initialized { wait() @@ -127,10 +125,7 @@ func (f *FilterMaps) updateLoop() { continue } } - if syncMatcher != nil { - syncMatcher.synced(head) - syncMatcher = nil - } + matcherSync() // log index head is at latest chain head; process tail blocks if possible if f.tryUpdateTail(head, func() bool { // return true if tail processing needs to be stopped diff --git a/core/filtermaps/matcher.go b/core/filtermaps/matcher.go index 64c7c5efe6ed..04dfe9751b36 100644 --- a/core/filtermaps/matcher.go +++ b/core/filtermaps/matcher.go @@ -47,99 +47,11 @@ type SyncRange struct { } // GetPotentialMatches returns a list of logs that are potential matches for the -// given filter criteria. If parts of the requested range are not indexed then -// an error is returned. If parts of the requested range are changed during the -// search process then potentially incorrect logs are discarded and searched -// again, ensuring that the returned results are always consistent with the latest -// state of the chain. -// If firstBlock or lastBlock are bigger than the head block number then they are -// substituted with the latest head of the chain, ensuring that a search until -// the head block is still consistent with the latest canonical chain if a new -// head has been added during the process. -// Note that the returned list may still contain false positives. -func GetPotentialMatches(ctx context.Context, backend MatcherBackend, firstBlock, lastBlock uint64, addresses []common.Address, topics [][]common.Hash) ([]*types.Log, *types.Header, uint64, uint64, error) { - if firstBlock > lastBlock { - return nil, nil, 0, 0, errors.New("invalid search range") - } - // enforce a consistent state before starting the search in order to be able - // to determine valid range later - syncRange, err := backend.SyncLogIndex(ctx) - if err != nil { - return nil, nil, 0, 0, err - } - headBlock := syncRange.Head.Number.Uint64() // Head is guaranteed != nil - // if haveMatches == true then matches correspond to the block number range - // between matchFirst and matchLast - var ( - matches []*types.Log - haveMatches bool - matchFirst, matchLast uint64 - ) - for !haveMatches || (matchLast < lastBlock && matchLast < headBlock) { - // determine range to be searched; for simplicity we only extend the most - // recent end of the existing match set by matching between searchFirst - // and searchLast. - searchFirst, searchLast := firstBlock, lastBlock - if searchFirst > headBlock { - searchFirst = headBlock - } - if searchLast > headBlock { - searchLast = headBlock - } - if haveMatches && matchFirst != searchFirst { - // searchFirst might change if firstBlock > headBlock - matches, haveMatches = nil, false - } - if haveMatches && matchLast >= searchFirst { - searchFirst = matchLast + 1 - } - // check if indexed range covers the requested range - if !syncRange.Indexed || syncRange.FirstIndexed > searchFirst || syncRange.LastIndexed < searchLast { - return nil, nil, 0, 0, errors.New("log index not available for requested range") - } - // search for matches in the required range - newMatches, err := getPotentialMatches(ctx, backend, searchFirst, searchLast, addresses, topics) - if err != nil { - return nil, nil, 0, 0, err - } - // enforce a consistent state again in order to determine the guaranteed - // valid range in which the log index has not been changed since the last - // sync. - syncRange, err = backend.SyncLogIndex(ctx) - if err != nil { - return nil, nil, 0, 0, err - } - headBlock = syncRange.Head.Number.Uint64() - // return with error if the beginning of the recently searched range might - // be invalid due to removed log index - if !syncRange.Valid || syncRange.FirstValid > searchFirst || syncRange.LastValid < searchFirst { - return nil, nil, 0, 0, errors.New("log index not available for requested range") - } - // roll back most recent matches if they are not covered by the guaranteed - // valid range - if syncRange.LastValid < searchLast { - for len(newMatches) > 0 && newMatches[len(newMatches)-1].BlockNumber > syncRange.LastValid { - newMatches = newMatches[:len(newMatches)-1] - } - searchLast = syncRange.LastValid - } - // append new matches to existing ones if the were any - if haveMatches { - matches = append(matches, newMatches...) - } else { - matches, haveMatches = newMatches, true - } - matchLast = searchLast - } - return matches, syncRange.Head, firstBlock, matchLast, nil -} - -// getPotentialMatches returns a list of logs that are potential matches for the // given filter criteria. If parts of the log index in the searched range are // missing or changed during the search process then the resulting logs belonging // to that block range might be missing or incorrect. // Also note that the returned list may contain false positives. -func getPotentialMatches(ctx context.Context, backend MatcherBackend, firstBlock, lastBlock uint64, addresses []common.Address, topics [][]common.Hash) ([]*types.Log, error) { +func GetPotentialMatches(ctx context.Context, backend MatcherBackend, firstBlock, lastBlock uint64, addresses []common.Address, topics [][]common.Hash) ([]*types.Log, error) { params := backend.GetParams() // find the log value index range to search firstIndex, err := backend.GetBlockLvPointer(ctx, firstBlock) diff --git a/core/filtermaps/matcher_backend.go b/core/filtermaps/matcher_backend.go index 7becdd809337..82dd7c37ef86 100644 --- a/core/filtermaps/matcher_backend.go +++ b/core/filtermaps/matcher_backend.go @@ -111,8 +111,8 @@ func (fm *FilterMapsMatcherBackend) synced(head *types.Header) { } // SyncLogIndex ensures that the log index is consistent with the current state -// of the chain (note that it may or may not be actually synced up to the head). -// It blocks until this state is achieved. +// of the chain and is synced up to the current head. It blocks until this state +// is achieved or the context is cancelled. // If successful, it returns a SyncRange that contains the latest chain head, // the indexed range that is currently consistent with the chain and the valid // range that has not been changed and has been consistent with all states of the diff --git a/eth/filters/filter.go b/eth/filters/filter.go index 2fcf0945ba95..e52ffd928731 100644 --- a/eth/filters/filter.go +++ b/eth/filters/filter.go @@ -22,7 +22,6 @@ import ( "math" "math/big" "slices" - "time" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/filtermaps" @@ -93,117 +92,202 @@ func (f *Filter) Logs(ctx context.Context) ([]*types.Log, error) { return nil, errPendingLogsUnsupported } - resolveSpecial := func(number int64) (int64, error) { + resolveSpecial := func(number int64) (uint64, error) { switch number { - case rpc.LatestBlockNumber.Int64(), rpc.PendingBlockNumber.Int64(): - // we should return head here since we've already captured - // that we need to get the pending logs in the pending boolean above - return math.MaxInt64, nil + case rpc.LatestBlockNumber.Int64(): + // when searching from and/or until the current head, we resolve it + // to MaxUint64 which is translated by rangeLogs to the actual head + // in each iteration, ensuring that the head block will be searched + // even if the chain is updated during search. + return math.MaxUint64, nil case rpc.FinalizedBlockNumber.Int64(): hdr, _ := f.sys.backend.HeaderByNumber(ctx, rpc.FinalizedBlockNumber) if hdr == nil { return 0, errors.New("finalized header not found") } - return hdr.Number.Int64(), nil + return hdr.Number.Uint64(), nil case rpc.SafeBlockNumber.Int64(): hdr, _ := f.sys.backend.HeaderByNumber(ctx, rpc.SafeBlockNumber) if hdr == nil { return 0, errors.New("safe header not found") } - return hdr.Number.Int64(), nil - default: - return number, nil + return hdr.Number.Uint64(), nil + } + if number < 0 { + return 0, errors.New("negative block number") } + return uint64(number), nil } - var err error // range query need to resolve the special begin/end block number - if f.begin, err = resolveSpecial(f.begin); err != nil { + begin, err := resolveSpecial(f.begin) + if err != nil { return nil, err } - if f.end, err = resolveSpecial(f.end); err != nil { + end, err := resolveSpecial(f.end) + if err != nil { return nil, err } + return f.rangeLogs(ctx, begin, end) +} - start := time.Now() - mb := f.sys.backend.NewMatcherBackend() - logs, _, _, _, err := filtermaps.GetPotentialMatches(ctx, mb, uint64(f.begin), uint64(f.end), f.addresses, f.topics) - mb.Close() - if err == filtermaps.ErrMatchAll { - // revert to legacy filter - hdr, _ := f.sys.backend.HeaderByNumber(ctx, rpc.LatestBlockNumber) - if hdr == nil { - return nil, errors.New("latest header not found") - } - headNumber := hdr.Number.Int64() - if f.begin > headNumber { - f.begin = headNumber - } - if f.end > headNumber { - f.end = headNumber - } - logChan, errChan := f.rangeLogsAsync(ctx) - var logs []*types.Log - for { - select { - case log := <-logChan: - logs = append(logs, log) - case err := <-errChan: - return logs, err - } - } +func (f *Filter) rangeLogs(ctx context.Context, firstBlock, lastBlock uint64) ([]*types.Log, error) { + if firstBlock > lastBlock { + return nil, errors.New("invalid search range") } - fmLogs := filterLogs(logs, nil, nil, f.addresses, f.topics) - log.Debug("Finished log search", "run time", time.Since(start), "true matches", len(fmLogs), "false positives", len(logs)-len(fmLogs)) - return fmLogs, err -} + mb := f.sys.backend.NewMatcherBackend() + defer mb.Close() -// rangeLogsAsync retrieves block-range logs that match the filter criteria asynchronously, -// it creates and returns two channels: one for delivering log data, and one for reporting errors. -func (f *Filter) rangeLogsAsync(ctx context.Context) (chan *types.Log, chan error) { + // enforce a consistent state before starting the search in order to be able + // to determine valid range later + syncRange, err := mb.SyncLogIndex(ctx) + if err != nil { + return nil, err + } + headBlock := syncRange.Head.Number.Uint64() // Head is guaranteed != nil + // if haveMatches == true then matches correspond to the block number range + // between matchFirst and matchLast var ( - logChan = make(chan *types.Log) - errChan = make(chan error) + matches []*types.Log + haveMatches, forceUnindexed bool + matchFirst, matchLast uint64 ) - - go func() { - defer func() { - close(errChan) - close(logChan) - }() - - if err := f.unindexedLogs(ctx, uint64(f.end), logChan); err != nil { - errChan <- err + trimMatches := func(trimFirst, trimLast uint64) { + if !haveMatches { + return + } + if trimLast < matchFirst || trimFirst > matchLast { + matches, haveMatches = nil, false return } + if trimFirst > matchFirst { + for len(matches) > 0 && matches[0].BlockNumber < trimFirst { + matches = matches[1:] + } + matchFirst = trimFirst + } + if trimLast < matchLast { + for len(matches) > 0 && matches[len(matches)-1].BlockNumber > trimLast { + matches = matches[:len(matches)-1] + } + matchLast = trimLast + } + } + + for { + // determine range to be searched; for simplicity we only extend the most + // recent end of the existing match set by matching between searchFirst + // and searchLast. + searchFirst, searchLast := firstBlock, lastBlock + if searchFirst > headBlock { + searchFirst = headBlock + } + if searchLast > headBlock { + searchLast = headBlock + } + trimMatches(searchFirst, searchLast) + if haveMatches && matchFirst == searchFirst && matchLast == searchLast { + return matches, nil + } + var trimTailIfNotValid uint64 + if haveMatches && matchFirst > searchFirst { + // missing tail section; do unindexed search + tailMatches, err := f.unindexedLogs(ctx, searchFirst, matchFirst-1) + if err != nil { + return matches, err + } + matches = append(tailMatches, matches...) + matchFirst = searchFirst + // unindexed results are not affected by valid tail; do not trim tail + trimTailIfNotValid = math.MaxUint64 + } + // now if we have matches, they start at searchFirst + if haveMatches { + searchFirst = matchLast + 1 + if !syncRange.Indexed || syncRange.FirstIndexed > searchFirst { + forceUnindexed = true + } + } + var newMatches []*types.Log + if !syncRange.Indexed || syncRange.FirstIndexed > searchLast || syncRange.LastIndexed < searchFirst { + forceUnindexed = true + } + if !forceUnindexed { + if syncRange.FirstIndexed > searchFirst { + searchFirst = syncRange.FirstIndexed + } + if syncRange.LastIndexed > searchLast { + searchLast = syncRange.LastIndexed + } + newMatches, err = f.indexedLogs(ctx, mb, searchFirst, searchLast) + // trim tail if it affects the indexed search range + trimTailIfNotValid = searchFirst + if err == filtermaps.ErrMatchAll { + // "match all" filters are not supported by filtermaps; fall back + // to unindexed search which is the most efficient in this case + forceUnindexed = true + } + } + if forceUnindexed { + newMatches, err = f.unindexedLogs(ctx, searchFirst, searchLast) + // unindexed results are not affected by valid tail; do not trim tail + trimTailIfNotValid = math.MaxUint64 + } + if err != nil { + return matches, err + } + if matches == nil { + matches = newMatches + haveMatches, matchFirst, matchLast = true, searchFirst, searchLast + } else { + matches = append(matches, newMatches...) + matchLast = searchLast + } - errChan <- nil - }() + syncRange, err = mb.SyncLogIndex(ctx) + if err != nil { + return matches, err + } + if !syncRange.Valid { + matches, haveMatches = nil, false + } else { + if syncRange.FirstValid > trimTailIfNotValid { + trimMatches(syncRange.FirstValid, syncRange.LastValid) + } else { + trimMatches(0, syncRange.LastValid) + } + } + } +} - return logChan, errChan +func (f *Filter) indexedLogs(ctx context.Context, mb filtermaps.MatcherBackend, begin, end uint64) ([]*types.Log, error) { + logs, err := filtermaps.GetPotentialMatches(ctx, mb, begin, end, f.addresses, f.topics) + logs = filterLogs(logs, nil, nil, f.addresses, f.topics) + return logs, err } // unindexedLogs returns the logs matching the filter criteria based on raw block // iteration and bloom matching. -func (f *Filter) unindexedLogs(ctx context.Context, end uint64, logChan chan *types.Log) error { - for ; f.begin <= int64(end); f.begin++ { - header, err := f.sys.backend.HeaderByNumber(ctx, rpc.BlockNumber(f.begin)) +func (f *Filter) unindexedLogs(ctx context.Context, begin, end uint64) ([]*types.Log, error) { + log.Warn("Performing unindexed log search", "begin", begin, "end", end) + var logs []*types.Log + for blockNumber := begin; blockNumber <= end; blockNumber++ { + select { + case <-ctx.Done(): + return logs, ctx.Err() + default: + } + header, err := f.sys.backend.HeaderByNumber(ctx, rpc.BlockNumber(blockNumber)) if header == nil || err != nil { - return err + return logs, err } found, err := f.blockLogs(ctx, header) if err != nil { - return err - } - for _, log := range found { - select { - case logChan <- log: - case <-ctx.Done(): - return ctx.Err() - } + return logs, err } + logs = append(logs, found...) } - return nil + return logs, nil } // blockLogs returns the logs matching the filter criteria within a single block. diff --git a/eth/filters/filter_system_test.go b/eth/filters/filter_system_test.go index 49b61bd585a2..54d91fd1add7 100644 --- a/eth/filters/filter_system_test.go +++ b/eth/filters/filter_system_test.go @@ -20,7 +20,6 @@ import ( "context" "errors" "math/big" - "math/rand" "reflect" "runtime" "testing" @@ -29,6 +28,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/consensus/ethash" "github.com/ethereum/go-ethereum/core" + "github.com/ethereum/go-ethereum/core/filtermaps" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/ethdb" @@ -136,6 +136,12 @@ func (b *testBackend) SubscribeChainEvent(ch chan<- core.ChainEvent) event.Subsc return b.chainFeed.Subscribe(ch) } +func (b *testBackend) NewMatcherBackend() filtermaps.MatcherBackend { + fm := filtermaps.NewFilterMaps(b.db, b, filtermaps.DefaultParams, 0, false) + fm.Start() + return fm.NewMatcherBackend() +} + func (b *testBackend) setPending(block *types.Block, receipts types.Receipts) { b.pendingBlock = block b.pendingReceipts = receipts From d5f2af2bb59d3cbd7eb96159f8625a2b3cd39b6e Mon Sep 17 00:00:00 2001 From: Zsolt Felfoldi Date: Tue, 1 Oct 2024 05:53:35 +0200 Subject: [PATCH 14/23] eth/filters: fixed tests, added more --- core/filtermaps/filtermaps.go | 18 ++- core/filtermaps/indexer.go | 57 +++++-- core/filtermaps/indexer_test.go | 74 ++++++++- core/filtermaps/matcher_backend.go | 7 + core/rawdb/accessors_indexes_test.go | 2 - core/rawdb/database.go | 4 + eth/filters/filter.go | 160 +++++++++++++------- eth/filters/filter_system_test.go | 36 ++++- eth/filters/filter_test.go | 184 +++++++++++++++++++++-- internal/ethapi/api_test.go | 4 + internal/ethapi/transaction_args_test.go | 3 + 11 files changed, 455 insertions(+), 94 deletions(-) diff --git a/core/filtermaps/filtermaps.go b/core/filtermaps/filtermaps.go index 75ad51ada9c1..03eb341ffdb9 100644 --- a/core/filtermaps/filtermaps.go +++ b/core/filtermaps/filtermaps.go @@ -20,7 +20,7 @@ const headCacheSize = 8 // maximum number of recent filter maps cached in memory // blockchain defines functions required by the FilterMaps log indexer. type blockchain interface { CurrentBlock() *types.Header - SubscribeChainHeadEvent(ch chan<- core.ChainHeadEvent) event.Subscription + SubscribeChainEvent(ch chan<- core.ChainEvent) event.Subscription GetHeader(hash common.Hash, number uint64) *types.Header GetCanonicalHash(number uint64) common.Hash GetReceiptsByHash(hash common.Hash) types.Receipts @@ -55,7 +55,8 @@ type FilterMaps struct { lvPointerCache *lru.Cache[uint64, uint64] revertPoints map[uint64]*revertPoint - testHook func(int) + waitIdleCh chan chan bool + testHook func(int) } // filterMap is a full or partial in-memory representation of a filter map where @@ -104,12 +105,13 @@ func NewFilterMaps(db ethdb.KeyValueStore, chain blockchain, params Params, hist } params.deriveFields() fm := &FilterMaps{ - db: db, - chain: chain, - closeCh: make(chan struct{}), - history: history, - noHistory: noHistory, - Params: params, + db: db, + chain: chain, + closeCh: make(chan struct{}), + waitIdleCh: make(chan chan bool), + history: history, + noHistory: noHistory, + Params: params, filterMapsRange: filterMapsRange{ initialized: rs.Initialized, headLvPointer: rs.HeadLvPointer, diff --git a/core/filtermaps/indexer.go b/core/filtermaps/indexer.go index 36094ba555fe..b324c75b4fdc 100644 --- a/core/filtermaps/indexer.go +++ b/core/filtermaps/indexer.go @@ -17,7 +17,9 @@ const ( removedPointer = math.MaxUint64 // used in updateBatch to signal removed items revertPointFrequency = 256 // frequency of revert points in database cachedRevertPoints = 64 // revert points for most recent blocks in memory +) +const ( testHookInit = iota testHookUpdateHeadEpoch testHookUpdateHead @@ -52,8 +54,8 @@ func (f *FilterMaps) updateLoop() { } var ( - headEventCh = make(chan core.ChainHeadEvent, 10) - sub = f.chain.SubscribeChainHeadEvent(headEventCh) + headEventCh = make(chan core.ChainEvent, 10) + sub = f.chain.SubscribeChainEvent(headEventCh) head = f.chain.CurrentBlock() stop bool syncMatcher *FilterMapsMatcherBackend @@ -61,7 +63,7 @@ func (f *FilterMaps) updateLoop() { ) matcherSync := func() { - if syncMatcher != nil && fmr.headBlockHash == head.Hash() { + if syncMatcher != nil && fmr.initialized && fmr.headBlockHash == head.Hash() { syncMatcher.synced(head) syncMatcher = nil } @@ -79,19 +81,32 @@ func (f *FilterMaps) updateLoop() { } delay := time.Second * 20 if f.testHook != nil { - f.testHook(testHookWait) delay = 0 } - select { - case ev := <-headEventCh: - head = ev.Block.Header() - case syncMatcher = <-f.matcherSyncCh: - head = f.chain.CurrentBlock() - case <-f.closeCh: - stop = true - case <-time.After(delay): - // keep updating log index during syncing - head = f.chain.CurrentBlock() + loop: + for { + select { + case ev := <-headEventCh: + head = ev.Block.Header() + case syncMatcher = <-f.matcherSyncCh: + head = f.chain.CurrentBlock() + case <-f.closeCh: + stop = true + case ch := <-f.waitIdleCh: + head = f.chain.CurrentBlock() + if head.Hash() == f.getRange().headBlockHash { + ch <- true + continue loop + } + ch <- false + case <-time.After(delay): + // keep updating log index during syncing + head = f.chain.CurrentBlock() + if f.testHook != nil { + f.testHook(testHookWait) + } + } + break } } for head == nil { @@ -150,6 +165,18 @@ func (f *FilterMaps) updateLoop() { } } +// WaitIdle blocks until the indexer is in an idle state while synced up to the +// latest chain head. +func (f *FilterMaps) WaitIdle() { + for { + ch := make(chan bool) + f.waitIdleCh <- ch + if <-ch { + return + } + } +} + // getRange returns the current filterMapsRange. func (f *FilterMaps) getRange() filterMapsRange { f.lock.RLock() @@ -804,7 +831,7 @@ func (f *FilterMaps) revertTo(rp *revertPoint) error { batch := f.db.NewBatch() afterLastMap := uint32((f.headLvPointer + f.valuesPerMap - 1) >> f.logValuesPerMap) - if rp.mapIndex >= afterLastMap { + if rp.mapIndex > afterLastMap { return errors.New("cannot revert (head map behind revert point)") } lvPointer := uint64(rp.mapIndex) << f.logValuesPerMap diff --git a/core/filtermaps/indexer_test.go b/core/filtermaps/indexer_test.go index 4c54d0b4773a..c49a1aa6bb2c 100644 --- a/core/filtermaps/indexer_test.go +++ b/core/filtermaps/indexer_test.go @@ -1,6 +1,8 @@ package filtermaps import ( + "crypto/sha256" + "fmt" "math/big" "math/rand" "sync" @@ -59,6 +61,25 @@ func TestIndexerRandomSetHistory(t *testing.T) { ts.checkLvRange(50) } +func TestIndexerDbEquality(t *testing.T) { + ts := newTestSetup(t) + ts.setHistory(0, false) + for i := 0; i < 10; i++ { + ts.chain.addBlocks(100, 10, 3, 4, true) + ts.runUntilWait() + } + hash1 := ts.fmDbHash() + fmt.Println(hash1) + ts.setHistory(500, false) + ts.runUntilWait() + hash2 := ts.fmDbHash() + fmt.Println(hash2) + ts.setHistory(0, false) + ts.runUntilWait() + hash3 := ts.fmDbHash() + fmt.Println(hash3) +} + type testSetup struct { t *testing.T fm *FilterMaps @@ -94,9 +115,14 @@ func (ts *testSetup) runUntil(stop func() bool) { } func (ts *testSetup) runUntilWait() { - ts.nextEvent() - for ts.lastEvent != testHookWait { + for { ts.nextEvent() + for ts.lastEvent != testHookWait { + ts.nextEvent() + } + if ts.fm.getRange().headBlockHash == ts.chain.CurrentBlock().Hash() { + return + } } } @@ -146,6 +172,19 @@ func (ts *testSetup) stopFm() { ts.fm.closeWg.Wait() } +func (ts *testSetup) fmDbHash() common.Hash { + hasher := sha256.New() + it := ts.db.NewIterator(nil, nil) + for it.Next() { + hasher.Write(it.Key()) + hasher.Write(it.Value()) + } + it.Release() + var result common.Hash + hasher.Sum(result[:0]) + return result +} + func (ts *testSetup) close() { ts.stopFm() ts.db.Close() @@ -178,7 +217,7 @@ func (tc *testChain) CurrentBlock() *types.Header { return tc.blocks[tc.canonical[len(tc.canonical)-1]].Header() } -func (tc *testChain) SubscribeChainHeadEvent(ch chan<- core.ChainHeadEvent) event.Subscription { +func (tc *testChain) SubscribeChainEvent(ch chan<- core.ChainEvent) event.Subscription { return tc.chainHeadFeed.Subscribe(ch) } @@ -281,5 +320,32 @@ func (tc *testChain) addBlocks(count, maxTxPerBlock, maxLogsPerReceipt, maxTopic tc.receipts[hash] = types.Receipts{} } } - tc.chainHeadFeed.Send(core.ChainHeadEvent{Block: tc.blocks[tc.canonical[len(tc.canonical)-1]]}) + tc.chainHeadFeed.Send(core.ChainEvent{Block: tc.blocks[tc.canonical[len(tc.canonical)-1]]}) +} + +func (tc *testChain) setHead(headNum int) { + tc.lock.Lock() + defer tc.lock.Unlock() + + tc.canonical = tc.canonical[:headNum+1] + tc.chainHeadFeed.Send(core.ChainEvent{Block: tc.blocks[tc.canonical[len(tc.canonical)-1]]}) +} + +func (tc *testChain) getCanonicalChain() []common.Hash { + tc.lock.RLock() + defer tc.lock.RUnlock() + + cc := make([]common.Hash, len(tc.canonical)) + copy(cc, tc.canonical) + return cc +} + +// restore an earlier state of the chain +func (tc *testChain) setCanonicalChain(cc []common.Hash) { + tc.lock.Lock() + defer tc.lock.Unlock() + + tc.canonical = make([]common.Hash, len(cc)) + copy(tc.canonical, cc) + tc.chainHeadFeed.Send(core.ChainEvent{Block: tc.blocks[tc.canonical[len(tc.canonical)-1]]}) } diff --git a/core/filtermaps/matcher_backend.go b/core/filtermaps/matcher_backend.go index 82dd7c37ef86..29d076505686 100644 --- a/core/filtermaps/matcher_backend.go +++ b/core/filtermaps/matcher_backend.go @@ -118,6 +118,13 @@ func (fm *FilterMapsMatcherBackend) synced(head *types.Header) { // range that has not been changed and has been consistent with all states of the // chain since the previous SyncLogIndex or the creation of the matcher backend. func (fm *FilterMapsMatcherBackend) SyncLogIndex(ctx context.Context) (SyncRange, error) { + if fm.f.noHistory { + head := fm.f.chain.CurrentBlock() + if head == nil { + return SyncRange{}, errors.New("canonical chain head not available") + } + return SyncRange{Head: head}, nil + } // add SyncRange return channel, ensuring that syncCh := make(chan SyncRange, 1) fm.f.lock.Lock() diff --git a/core/rawdb/accessors_indexes_test.go b/core/rawdb/accessors_indexes_test.go index 2ce4330f70bf..1b0e4c36c3f3 100644 --- a/core/rawdb/accessors_indexes_test.go +++ b/core/rawdb/accessors_indexes_test.go @@ -17,7 +17,6 @@ package rawdb import ( - "bytes" "math/big" "testing" @@ -25,7 +24,6 @@ import ( "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/internal/blocktest" - "github.com/ethereum/go-ethereum/params" "github.com/ethereum/go-ethereum/rlp" ) diff --git a/core/rawdb/database.go b/core/rawdb/database.go index ab4dfd76b7a4..53defba030a3 100644 --- a/core/rawdb/database.go +++ b/core/rawdb/database.go @@ -471,6 +471,7 @@ func InspectDatabase(db ethdb.Database, keyPrefix, keyStart []byte) error { accountSnaps stat storageSnaps stat preimages stat + filterMaps stat beaconHeaders stat cliqueSnaps stat @@ -521,6 +522,8 @@ func InspectDatabase(db ethdb.Database, keyPrefix, keyStart []byte) error { codes.Add(size) case bytes.HasPrefix(key, txLookupPrefix) && len(key) == (len(txLookupPrefix)+common.HashLength): txLookups.Add(size) + case bytes.HasPrefix(key, FilterMapsPrefix): + filterMaps.Add(size) case bytes.HasPrefix(key, SnapshotAccountPrefix) && len(key) == (len(SnapshotAccountPrefix)+common.HashLength): accountSnaps.Add(size) case bytes.HasPrefix(key, SnapshotStoragePrefix) && len(key) == (len(SnapshotStoragePrefix)+2*common.HashLength): @@ -595,6 +598,7 @@ func InspectDatabase(db ethdb.Database, keyPrefix, keyStart []byte) error { {"Key-Value store", "Block number->hash", numHashPairings.Size(), numHashPairings.Count()}, {"Key-Value store", "Block hash->number", hashNumPairings.Size(), hashNumPairings.Count()}, {"Key-Value store", "Transaction index", txLookups.Size(), txLookups.Count()}, + {"Key-Value store", "Log search index", filterMaps.Size(), filterMaps.Count()}, {"Key-Value store", "Contract codes", codes.Size(), codes.Count()}, {"Key-Value store", "Hash trie nodes", legacyTries.Size(), legacyTries.Count()}, {"Key-Value store", "Path trie state lookups", stateLookups.Size(), stateLookups.Count()}, diff --git a/eth/filters/filter.go b/eth/filters/filter.go index e52ffd928731..de03c965aa71 100644 --- a/eth/filters/filter.go +++ b/eth/filters/filter.go @@ -37,9 +37,10 @@ type Filter struct { addresses []common.Address topics [][]common.Hash - block *common.Hash // Block hash if filtering a single block - begin, end int64 // Range interval if filtering multiple blocks - bbMatchCount uint64 + block *common.Hash // Block hash if filtering a single block + begin, end int64 // Range interval if filtering multiple blocks + + rangeLogsTestHook chan rangeLogsTestEvent } // NewRangeFilter creates a new filter which uses a bloom filter on blocks to @@ -131,10 +132,31 @@ func (f *Filter) Logs(ctx context.Context) ([]*types.Log, error) { return f.rangeLogs(ctx, begin, end) } +const ( + rangeLogsTestSync = iota + rangeLogsTestTrimmed + rangeLogsTestIndexed + rangeLogsTestUnindexed + rangeLogsTestDone +) + +type rangeLogsTestEvent struct { + event int + begin, end uint64 +} + func (f *Filter) rangeLogs(ctx context.Context, firstBlock, lastBlock uint64) ([]*types.Log, error) { + if f.rangeLogsTestHook != nil { + defer func() { + f.rangeLogsTestHook <- rangeLogsTestEvent{rangeLogsTestDone, 0, 0} + close(f.rangeLogsTestHook) + }() + } + if firstBlock > lastBlock { - return nil, errors.New("invalid search range") + return nil, nil } + mb := f.sys.backend.NewMatcherBackend() defer mb.Close() @@ -144,6 +166,21 @@ func (f *Filter) rangeLogs(ctx context.Context, firstBlock, lastBlock uint64) ([ if err != nil { return nil, err } + if !syncRange.Indexed { + // fallback to completely unindexed search + headNum := syncRange.Head.Number.Uint64() + if firstBlock > headNum { + firstBlock = headNum + } + if lastBlock > headNum { + lastBlock = headNum + } + if f.rangeLogsTestHook != nil { + f.rangeLogsTestHook <- rangeLogsTestEvent{rangeLogsTestUnindexed, firstBlock, lastBlock} + } + return f.unindexedLogs(ctx, firstBlock, lastBlock) + } + headBlock := syncRange.Head.Number.Uint64() // Head is guaranteed != nil // if haveMatches == true then matches correspond to the block number range // between matchFirst and matchLast @@ -157,7 +194,7 @@ func (f *Filter) rangeLogs(ctx context.Context, firstBlock, lastBlock uint64) ([ return } if trimLast < matchFirst || trimFirst > matchLast { - matches, haveMatches = nil, false + matches, haveMatches, matchFirst, matchLast = nil, false, 0, 0 return } if trimFirst > matchFirst { @@ -192,6 +229,9 @@ func (f *Filter) rangeLogs(ctx context.Context, firstBlock, lastBlock uint64) ([ var trimTailIfNotValid uint64 if haveMatches && matchFirst > searchFirst { // missing tail section; do unindexed search + if f.rangeLogsTestHook != nil { + f.rangeLogsTestHook <- rangeLogsTestEvent{rangeLogsTestUnindexed, searchFirst, matchFirst - 1} + } tailMatches, err := f.unindexedLogs(ctx, searchFirst, matchFirst-1) if err != nil { return matches, err @@ -200,56 +240,67 @@ func (f *Filter) rangeLogs(ctx context.Context, firstBlock, lastBlock uint64) ([ matchFirst = searchFirst // unindexed results are not affected by valid tail; do not trim tail trimTailIfNotValid = math.MaxUint64 - } - // now if we have matches, they start at searchFirst - if haveMatches { - searchFirst = matchLast + 1 - if !syncRange.Indexed || syncRange.FirstIndexed > searchFirst { + } else { + // if we have matches, they start at searchFirst + if haveMatches { + searchFirst = matchLast + 1 + if !syncRange.Indexed || syncRange.FirstIndexed > searchFirst { + forceUnindexed = true + } + } + var newMatches []*types.Log + if !syncRange.Indexed || syncRange.FirstIndexed > searchLast || syncRange.LastIndexed < searchFirst { forceUnindexed = true } - } - var newMatches []*types.Log - if !syncRange.Indexed || syncRange.FirstIndexed > searchLast || syncRange.LastIndexed < searchFirst { - forceUnindexed = true - } - if !forceUnindexed { - if syncRange.FirstIndexed > searchFirst { - searchFirst = syncRange.FirstIndexed + if !forceUnindexed { + if syncRange.FirstIndexed > searchFirst { + searchFirst = syncRange.FirstIndexed + } + if syncRange.LastIndexed < searchLast { + searchLast = syncRange.LastIndexed + } + if f.rangeLogsTestHook != nil { + f.rangeLogsTestHook <- rangeLogsTestEvent{rangeLogsTestIndexed, searchFirst, searchLast} + } + newMatches, err = f.indexedLogs(ctx, mb, searchFirst, searchLast) + // trim tail if it affects the indexed search range + trimTailIfNotValid = searchFirst + if err == filtermaps.ErrMatchAll { + // "match all" filters are not supported by filtermaps; fall back + // to unindexed search which is the most efficient in this case + forceUnindexed = true + } } - if syncRange.LastIndexed > searchLast { - searchLast = syncRange.LastIndexed + if forceUnindexed { + if f.rangeLogsTestHook != nil { + f.rangeLogsTestHook <- rangeLogsTestEvent{rangeLogsTestUnindexed, searchFirst, searchLast} + } + newMatches, err = f.unindexedLogs(ctx, searchFirst, searchLast) + // unindexed results are not affected by valid tail; do not trim tail + trimTailIfNotValid = math.MaxUint64 } - newMatches, err = f.indexedLogs(ctx, mb, searchFirst, searchLast) - // trim tail if it affects the indexed search range - trimTailIfNotValid = searchFirst - if err == filtermaps.ErrMatchAll { - // "match all" filters are not supported by filtermaps; fall back - // to unindexed search which is the most efficient in this case - forceUnindexed = true + if err != nil { + return matches, err + } + if !haveMatches { + matches = newMatches + haveMatches, matchFirst, matchLast = true, searchFirst, searchLast + } else { + matches = append(matches, newMatches...) + matchLast = searchLast } - } - if forceUnindexed { - newMatches, err = f.unindexedLogs(ctx, searchFirst, searchLast) - // unindexed results are not affected by valid tail; do not trim tail - trimTailIfNotValid = math.MaxUint64 - } - if err != nil { - return matches, err - } - if matches == nil { - matches = newMatches - haveMatches, matchFirst, matchLast = true, searchFirst, searchLast - } else { - matches = append(matches, newMatches...) - matchLast = searchLast } + if f.rangeLogsTestHook != nil { + f.rangeLogsTestHook <- rangeLogsTestEvent{event: rangeLogsTestSync, begin: matchFirst, end: matchLast} + } syncRange, err = mb.SyncLogIndex(ctx) if err != nil { return matches, err } + headBlock = syncRange.Head.Number.Uint64() // Head is guaranteed != nil if !syncRange.Valid { - matches, haveMatches = nil, false + matches, haveMatches, matchFirst, matchLast = nil, false, 0, 0 } else { if syncRange.FirstValid > trimTailIfNotValid { trimMatches(syncRange.FirstValid, syncRange.LastValid) @@ -257,37 +308,42 @@ func (f *Filter) rangeLogs(ctx context.Context, firstBlock, lastBlock uint64) ([ trimMatches(0, syncRange.LastValid) } } + if f.rangeLogsTestHook != nil { + f.rangeLogsTestHook <- rangeLogsTestEvent{event: rangeLogsTestTrimmed, begin: matchFirst, end: matchLast} + } } } func (f *Filter) indexedLogs(ctx context.Context, mb filtermaps.MatcherBackend, begin, end uint64) ([]*types.Log, error) { - logs, err := filtermaps.GetPotentialMatches(ctx, mb, begin, end, f.addresses, f.topics) - logs = filterLogs(logs, nil, nil, f.addresses, f.topics) - return logs, err + potentialMatches, err := filtermaps.GetPotentialMatches(ctx, mb, begin, end, f.addresses, f.topics) + matches := filterLogs(potentialMatches, nil, nil, f.addresses, f.topics) + log.Trace("Performed indexed log search", "begin", begin, "end", end, "true matches", len(matches), "false positives", len(potentialMatches)-len(matches)) + return matches, err } // unindexedLogs returns the logs matching the filter criteria based on raw block // iteration and bloom matching. func (f *Filter) unindexedLogs(ctx context.Context, begin, end uint64) ([]*types.Log, error) { log.Warn("Performing unindexed log search", "begin", begin, "end", end) - var logs []*types.Log + var matches []*types.Log for blockNumber := begin; blockNumber <= end; blockNumber++ { select { case <-ctx.Done(): - return logs, ctx.Err() + return matches, ctx.Err() default: } header, err := f.sys.backend.HeaderByNumber(ctx, rpc.BlockNumber(blockNumber)) if header == nil || err != nil { - return logs, err + return matches, err } found, err := f.blockLogs(ctx, header) if err != nil { - return logs, err + return matches, err } - logs = append(logs, found...) + matches = append(matches, found...) } - return logs, nil + log.Trace("Performed unindexed log search", "begin", begin, "end", end, "matches", len(matches)) + return matches, nil } // blockLogs returns the logs matching the filter criteria within a single block. diff --git a/eth/filters/filter_system_test.go b/eth/filters/filter_system_test.go index 54d91fd1add7..0cc473d78642 100644 --- a/eth/filters/filter_system_test.go +++ b/eth/filters/filter_system_test.go @@ -40,6 +40,7 @@ import ( type testBackend struct { db ethdb.Database + fm *filtermaps.FilterMaps sections uint64 txFeed event.Feed logsFeed event.Feed @@ -58,10 +59,28 @@ func (b *testBackend) CurrentHeader() *types.Header { return hdr } +func (b *testBackend) CurrentBlock() *types.Header { + return b.CurrentHeader() +} + func (b *testBackend) ChainDb() ethdb.Database { return b.db } +func (b *testBackend) GetCanonicalHash(number uint64) common.Hash { + return rawdb.ReadCanonicalHash(b.db, number) +} + +func (b *testBackend) GetHeader(hash common.Hash, number uint64) *types.Header { + hdr, _ := b.HeaderByHash(context.Background(), hash) + return hdr +} + +func (b *testBackend) GetReceiptsByHash(hash common.Hash) types.Receipts { + r, _ := b.GetReceipts(context.Background(), hash) + return r +} + func (b *testBackend) HeaderByNumber(ctx context.Context, blockNr rpc.BlockNumber) (*types.Header, error) { var ( hash common.Hash @@ -137,9 +156,20 @@ func (b *testBackend) SubscribeChainEvent(ch chan<- core.ChainEvent) event.Subsc } func (b *testBackend) NewMatcherBackend() filtermaps.MatcherBackend { - fm := filtermaps.NewFilterMaps(b.db, b, filtermaps.DefaultParams, 0, false) - fm.Start() - return fm.NewMatcherBackend() + return b.fm.NewMatcherBackend() +} + +func (b *testBackend) startFilterMaps(history uint64, noHistory bool) { + b.fm = filtermaps.NewFilterMaps(b.db, b, filtermaps.DefaultParams, history, noHistory) + b.fm.Start() + if !noHistory { + b.fm.WaitIdle() + } +} + +func (b *testBackend) stopFilterMaps() { + b.fm.Stop() + b.fm = nil } func (b *testBackend) setPending(block *types.Block, receipts types.Receipts) { diff --git a/eth/filters/filter_test.go b/eth/filters/filter_test.go index d8b703fee4e9..2082ba4ea7e3 100644 --- a/eth/filters/filter_test.go +++ b/eth/filters/filter_test.go @@ -46,15 +46,27 @@ func makeReceipt(addr common.Address) *types.Receipt { return receipt } -func BenchmarkFilters(b *testing.B) { +func BenchmarkFiltersIndexed(b *testing.B) { + benchmarkFilters(b, 0, false) +} + +func BenchmarkFiltersHalfIndexed(b *testing.B) { + benchmarkFilters(b, 50000, false) +} + +func BenchmarkFiltersUnindexed(b *testing.B) { + benchmarkFilters(b, 0, true) +} + +func benchmarkFilters(b *testing.B, history uint64, noHistory bool) { var ( - db, _ = rawdb.NewLevelDBDatabase(b.TempDir(), 0, 0, "", false) - _, sys = newTestFilterSystem(b, db, Config{}) - key1, _ = crypto.HexToECDSA("b71c71a67e1177ad4e901695e1b4b9ee17ae16c6668d313eac2f96dbcda3f291") - addr1 = crypto.PubkeyToAddress(key1.PublicKey) - addr2 = common.BytesToAddress([]byte("jeff")) - addr3 = common.BytesToAddress([]byte("ethereum")) - addr4 = common.BytesToAddress([]byte("random addresses please")) + db, _ = rawdb.NewLevelDBDatabase(b.TempDir(), 0, 0, "", false) + backend, sys = newTestFilterSystem(b, db, Config{}) + key1, _ = crypto.HexToECDSA("b71c71a67e1177ad4e901695e1b4b9ee17ae16c6668d313eac2f96dbcda3f291") + addr1 = crypto.PubkeyToAddress(key1.PublicKey) + addr2 = common.BytesToAddress([]byte("jeff")) + addr3 = common.BytesToAddress([]byte("ethereum")) + addr4 = common.BytesToAddress([]byte("random addresses please")) gspec = &core.Genesis{ Alloc: types.GenesisAlloc{addr1: {Balance: big.NewInt(1000000)}}, @@ -94,9 +106,12 @@ func BenchmarkFilters(b *testing.B) { rawdb.WriteHeadBlockHash(db, block.Hash()) rawdb.WriteReceipts(db, block.Hash(), block.NumberU64(), receipts[i]) } + backend.startFilterMaps(history, noHistory) + defer backend.stopFilterMaps() + b.ResetTimer() - filter := sys.NewRangeFilter(0, -1, []common.Address{addr1, addr2, addr3, addr4}, nil) + filter := sys.NewRangeFilter(0, int64(rpc.LatestBlockNumber), []common.Address{addr1, addr2, addr3, addr4}, nil) for i := 0; i < b.N; i++ { filter.begin = 0 @@ -107,7 +122,19 @@ func BenchmarkFilters(b *testing.B) { } } -func TestFilters(t *testing.T) { +func TestFiltersIndexed(t *testing.T) { + testFilters(t, 0, false) +} + +func TestFiltersHalfIndexed(t *testing.T) { + testFilters(t, 500, false) +} + +func TestFiltersUnindexed(t *testing.T) { + testFilters(t, 0, true) +} + +func testFilters(t *testing.T, history uint64, noHistory bool) { var ( db = rawdb.NewMemoryDatabase() backend, sys = newTestFilterSystem(t, db, Config{}) @@ -279,6 +306,9 @@ func TestFilters(t *testing.T) { }) backend.setPending(pchain[0], preceipts[0]) + backend.startFilterMaps(history, noHistory) + defer backend.stopFilterMaps() + for i, tc := range []struct { f *Filter want string @@ -387,3 +417,137 @@ func TestFilters(t *testing.T) { } }) } + +func TestRangeLogs(t *testing.T) { + var ( + db = rawdb.NewMemoryDatabase() + backend, sys = newTestFilterSystem(t, db, Config{}) + gspec = &core.Genesis{ + Config: params.TestChainConfig, + Alloc: types.GenesisAlloc{}, + BaseFee: big.NewInt(params.InitialBaseFee), + } + ) + _, err := gspec.Commit(db, triedb.NewDatabase(db, nil)) + if err != nil { + t.Fatal(err) + } + chain, _ := core.GenerateChain(gspec.Config, gspec.ToBlock(), ethash.NewFaker(), db, 1000, func(i int, gen *core.BlockGen) {}) + var l uint64 + bc, err := core.NewBlockChain(db, nil, gspec, nil, ethash.NewFaker(), vm.Config{}, &l) + if err != nil { + t.Fatal(err) + } + _, err = bc.InsertChain(chain[:600]) + if err != nil { + t.Fatal(err) + } + + backend.startFilterMaps(200, false) + defer backend.stopFilterMaps() + + var ( + testCase, event int + filter *Filter + addresses = []common.Address{common.Address{}} + ) + + newFilter := func(begin, end int64) { + testCase++ + event = 0 + filter = sys.NewRangeFilter(begin, end, addresses, nil) + filter.rangeLogsTestHook = make(chan rangeLogsTestEvent) + go func(filter *Filter) { + filter.Logs(context.Background()) + // ensure that filter will not be blocked if we exit early + for _ = range filter.rangeLogsTestHook { + } + }(filter) + } + + expEvent := func(exp rangeLogsTestEvent) { + event++ + ev := <-filter.rangeLogsTestHook + if ev != exp { + t.Fatalf("Test case #%d: wrong test event #%d received (got %v, expected %v)", testCase, event, ev, exp) + } + } + + // test case #1 + newFilter(300, 500) + expEvent(rangeLogsTestEvent{rangeLogsTestIndexed, 401, 500}) + expEvent(rangeLogsTestEvent{rangeLogsTestSync, 401, 500}) + expEvent(rangeLogsTestEvent{rangeLogsTestTrimmed, 401, 500}) + expEvent(rangeLogsTestEvent{rangeLogsTestUnindexed, 300, 400}) + if _, err := bc.InsertChain(chain[600:700]); err != nil { + t.Fatal(err) + } + backend.fm.WaitIdle() + expEvent(rangeLogsTestEvent{rangeLogsTestSync, 300, 500}) + expEvent(rangeLogsTestEvent{rangeLogsTestTrimmed, 300, 500}) // unindexed search is not affected by trimmed tail + expEvent(rangeLogsTestEvent{rangeLogsTestDone, 0, 0}) + + // test case #2 + newFilter(400, int64(rpc.LatestBlockNumber)) + expEvent(rangeLogsTestEvent{rangeLogsTestIndexed, 501, 700}) + if _, err := bc.InsertChain(chain[700:800]); err != nil { + t.Fatal(err) + } + backend.fm.WaitIdle() + expEvent(rangeLogsTestEvent{rangeLogsTestSync, 501, 700}) + expEvent(rangeLogsTestEvent{rangeLogsTestTrimmed, 601, 700}) + expEvent(rangeLogsTestEvent{rangeLogsTestUnindexed, 400, 600}) + expEvent(rangeLogsTestEvent{rangeLogsTestSync, 400, 700}) + expEvent(rangeLogsTestEvent{rangeLogsTestTrimmed, 400, 700}) + expEvent(rangeLogsTestEvent{rangeLogsTestIndexed, 701, 800}) + if err := bc.SetHead(750); err != nil { + t.Fatal(err) + } + backend.fm.WaitIdle() + expEvent(rangeLogsTestEvent{rangeLogsTestSync, 400, 800}) + expEvent(rangeLogsTestEvent{rangeLogsTestTrimmed, 400, 750}) + expEvent(rangeLogsTestEvent{rangeLogsTestDone, 0, 0}) + + // test case #3 + newFilter(int64(rpc.LatestBlockNumber), int64(rpc.LatestBlockNumber)) + expEvent(rangeLogsTestEvent{rangeLogsTestIndexed, 750, 750}) + if err := bc.SetHead(740); err != nil { + t.Fatal(err) + } + backend.fm.WaitIdle() + expEvent(rangeLogsTestEvent{rangeLogsTestSync, 750, 750}) + expEvent(rangeLogsTestEvent{rangeLogsTestTrimmed, 0, 0}) + expEvent(rangeLogsTestEvent{rangeLogsTestIndexed, 740, 740}) + if _, err := bc.InsertChain(chain[740:750]); err != nil { + t.Fatal(err) + } + backend.fm.WaitIdle() + expEvent(rangeLogsTestEvent{rangeLogsTestSync, 740, 740}) + // trimmed at the beginning of the next iteration + expEvent(rangeLogsTestEvent{rangeLogsTestTrimmed, 740, 740}) + expEvent(rangeLogsTestEvent{rangeLogsTestIndexed, 750, 750}) + expEvent(rangeLogsTestEvent{rangeLogsTestSync, 750, 750}) + expEvent(rangeLogsTestEvent{rangeLogsTestTrimmed, 750, 750}) + expEvent(rangeLogsTestEvent{rangeLogsTestDone, 0, 0}) + + // test case #4 + newFilter(400, int64(rpc.LatestBlockNumber)) + expEvent(rangeLogsTestEvent{rangeLogsTestIndexed, 551, 750}) + expEvent(rangeLogsTestEvent{rangeLogsTestSync, 551, 750}) + expEvent(rangeLogsTestEvent{rangeLogsTestTrimmed, 551, 750}) + expEvent(rangeLogsTestEvent{rangeLogsTestUnindexed, 400, 550}) + if _, err := bc.InsertChain(chain[750:1000]); err != nil { + t.Fatal(err) + } + backend.fm.WaitIdle() + expEvent(rangeLogsTestEvent{rangeLogsTestSync, 400, 750}) + // indexed range affected by tail pruning so we have to discard the entire + // match set + expEvent(rangeLogsTestEvent{rangeLogsTestTrimmed, 0, 0}) + expEvent(rangeLogsTestEvent{rangeLogsTestIndexed, 801, 1000}) + expEvent(rangeLogsTestEvent{rangeLogsTestSync, 801, 1000}) + expEvent(rangeLogsTestEvent{rangeLogsTestTrimmed, 801, 1000}) + expEvent(rangeLogsTestEvent{rangeLogsTestUnindexed, 400, 800}) + expEvent(rangeLogsTestEvent{rangeLogsTestSync, 400, 1000}) + expEvent(rangeLogsTestEvent{rangeLogsTestTrimmed, 400, 1000}) +} diff --git a/internal/ethapi/api_test.go b/internal/ethapi/api_test.go index 4a36cbdf2d0d..06e0a43bba45 100644 --- a/internal/ethapi/api_test.go +++ b/internal/ethapi/api_test.go @@ -43,6 +43,7 @@ import ( "github.com/ethereum/go-ethereum/consensus/beacon" "github.com/ethereum/go-ethereum/consensus/ethash" "github.com/ethereum/go-ethereum/core" + "github.com/ethereum/go-ethereum/core/filtermaps" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/state" "github.com/ethereum/go-ethereum/core/types" @@ -622,6 +623,9 @@ func (b testBackend) SubscribeRemovedLogsEvent(ch chan<- core.RemovedLogsEvent) func (b testBackend) SubscribeLogsEvent(ch chan<- []*types.Log) event.Subscription { panic("implement me") } +func (b testBackend) NewMatcherBackend() filtermaps.MatcherBackend { + panic("implement me") +} func TestEstimateGas(t *testing.T) { t.Parallel() // Initialize test accounts diff --git a/internal/ethapi/transaction_args_test.go b/internal/ethapi/transaction_args_test.go index 500bb8738c4c..cf4a6dad428a 100644 --- a/internal/ethapi/transaction_args_test.go +++ b/internal/ethapi/transaction_args_test.go @@ -30,6 +30,7 @@ import ( "github.com/ethereum/go-ethereum/common/hexutil" "github.com/ethereum/go-ethereum/consensus" "github.com/ethereum/go-ethereum/core" + "github.com/ethereum/go-ethereum/core/filtermaps" "github.com/ethereum/go-ethereum/core/state" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/core/vm" @@ -402,3 +403,5 @@ func (b *backendMock) SubscribeRemovedLogsEvent(ch chan<- core.RemovedLogsEvent) } func (b *backendMock) Engine() consensus.Engine { return nil } + +func (b *backendMock) NewMatcherBackend() filtermaps.MatcherBackend { return nil } From 5c17d796c47ffb8f52e7a5b3e593a716823bbf5a Mon Sep 17 00:00:00 2001 From: Zsolt Felfoldi Date: Thu, 3 Oct 2024 02:58:59 +0200 Subject: [PATCH 15/23] core/filtermaps: added license text --- core/filtermaps/filtermaps.go | 16 ++++++++++++++++ core/filtermaps/indexer.go | 16 ++++++++++++++++ core/filtermaps/indexer_test.go | 16 ++++++++++++++++ core/filtermaps/matcher.go | 16 ++++++++++++++++ core/filtermaps/matcher_backend.go | 16 ++++++++++++++++ core/filtermaps/math.go | 16 ++++++++++++++++ core/filtermaps/math_test.go | 16 ++++++++++++++++ 7 files changed, 112 insertions(+) diff --git a/core/filtermaps/filtermaps.go b/core/filtermaps/filtermaps.go index 03eb341ffdb9..489b6e5e6460 100644 --- a/core/filtermaps/filtermaps.go +++ b/core/filtermaps/filtermaps.go @@ -1,3 +1,19 @@ +// Copyright 2024 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + package filtermaps import ( diff --git a/core/filtermaps/indexer.go b/core/filtermaps/indexer.go index b324c75b4fdc..47edd55da567 100644 --- a/core/filtermaps/indexer.go +++ b/core/filtermaps/indexer.go @@ -1,3 +1,19 @@ +// Copyright 2024 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + package filtermaps import ( diff --git a/core/filtermaps/indexer_test.go b/core/filtermaps/indexer_test.go index c49a1aa6bb2c..562fe79273ae 100644 --- a/core/filtermaps/indexer_test.go +++ b/core/filtermaps/indexer_test.go @@ -1,3 +1,19 @@ +// Copyright 2024 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + package filtermaps import ( diff --git a/core/filtermaps/matcher.go b/core/filtermaps/matcher.go index 04dfe9751b36..dd088455a991 100644 --- a/core/filtermaps/matcher.go +++ b/core/filtermaps/matcher.go @@ -1,3 +1,19 @@ +// Copyright 2024 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + package filtermaps import ( diff --git a/core/filtermaps/matcher_backend.go b/core/filtermaps/matcher_backend.go index 29d076505686..0bc87e1e9355 100644 --- a/core/filtermaps/matcher_backend.go +++ b/core/filtermaps/matcher_backend.go @@ -1,3 +1,19 @@ +// Copyright 2024 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + package filtermaps import ( diff --git a/core/filtermaps/math.go b/core/filtermaps/math.go index b0132df9130d..ca6d01c08c31 100644 --- a/core/filtermaps/math.go +++ b/core/filtermaps/math.go @@ -1,3 +1,19 @@ +// Copyright 2024 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + package filtermaps import ( diff --git a/core/filtermaps/math_test.go b/core/filtermaps/math_test.go index 5cf76dd34a92..3210c833ac5e 100644 --- a/core/filtermaps/math_test.go +++ b/core/filtermaps/math_test.go @@ -1,3 +1,19 @@ +// Copyright 2024 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + package filtermaps import ( From 28cdf1549133a803002ed6f05e550aa154cc4797 Mon Sep 17 00:00:00 2001 From: Zsolt Felfoldi Date: Thu, 3 Oct 2024 12:34:07 +0200 Subject: [PATCH 16/23] core/filtermaps: added more tests --- core/filtermaps/filtermaps.go | 1 - core/filtermaps/indexer.go | 63 ++----- core/filtermaps/indexer_test.go | 263 +++++++++++++++++------------- core/filtermaps/matcher_test.go | 86 ++++++++++ core/rawdb/schema.go | 4 +- eth/filters/filter_system_test.go | 4 +- 6 files changed, 253 insertions(+), 168 deletions(-) create mode 100644 core/filtermaps/matcher_test.go diff --git a/core/filtermaps/filtermaps.go b/core/filtermaps/filtermaps.go index 489b6e5e6460..ab154824013e 100644 --- a/core/filtermaps/filtermaps.go +++ b/core/filtermaps/filtermaps.go @@ -72,7 +72,6 @@ type FilterMaps struct { revertPoints map[uint64]*revertPoint waitIdleCh chan chan bool - testHook func(int) } // filterMap is a full or partial in-memory representation of a filter map where diff --git a/core/filtermaps/indexer.go b/core/filtermaps/indexer.go index 47edd55da567..c04d8623a42a 100644 --- a/core/filtermaps/indexer.go +++ b/core/filtermaps/indexer.go @@ -35,28 +35,10 @@ const ( cachedRevertPoints = 64 // revert points for most recent blocks in memory ) -const ( - testHookInit = iota - testHookUpdateHeadEpoch - testHookUpdateHead - testHookExtendTailEpoch - testHookExtendTail - testHookPruneTail - testHookPruneTailMaps - testHookRevert - testHookWait - testHookStop -) - // updateLoop initializes and updates the log index structure according to the // canonical chain. func (f *FilterMaps) updateLoop() { - defer func() { - f.closeWg.Done() - if f.testHook != nil { - f.testHook(testHookStop) - } - }() + defer f.closeWg.Done() if f.noHistory { f.reset() @@ -95,10 +77,6 @@ func (f *FilterMaps) updateLoop() { if stop { return } - delay := time.Second * 20 - if f.testHook != nil { - delay = 0 - } loop: for { select { @@ -115,12 +93,9 @@ func (f *FilterMaps) updateLoop() { continue loop } ch <- false - case <-time.After(delay): + case <-time.After(time.Second * 20): // keep updating log index during syncing head = f.chain.CurrentBlock() - if f.testHook != nil { - f.testHook(testHookWait) - } } break } @@ -184,6 +159,10 @@ func (f *FilterMaps) updateLoop() { // WaitIdle blocks until the indexer is in an idle state while synced up to the // latest chain head. func (f *FilterMaps) WaitIdle() { + if f.noHistory { + f.closeWg.Wait() + return + } for { ch := make(chan bool) f.waitIdleCh <- ch @@ -219,9 +198,6 @@ func (f *FilterMaps) tryInit(head *types.Header) bool { log.Error("Could not initialize log index", "error", err) } f.applyUpdateBatch(update) - if f.testHook != nil { - f.testHook(testHookInit) - } return true } @@ -295,16 +271,10 @@ func (f *FilterMaps) tryUpdateHead(newHead *types.Header) bool { if update.updatedRangeLength() >= f.mapsPerEpoch { // limit the amount of data updated in a single batch f.applyUpdateBatch(update) - if f.testHook != nil { - f.testHook(testHookUpdateHeadEpoch) - } update = f.newUpdateBatch() } } f.applyUpdateBatch(update) - if f.testHook != nil { - f.testHook(testHookUpdateHead) - } return true } @@ -342,9 +312,6 @@ func (f *FilterMaps) tryExtendTail(tailTarget uint64, stopFn func() bool) bool { if tailEpoch := update.tailEpoch(); tailEpoch < lastTailEpoch { // limit the amount of data updated in a single batch f.applyUpdateBatch(update) - if f.testHook != nil { - f.testHook(testHookExtendTailEpoch) - } update = f.newUpdateBatch() lastTailEpoch = tailEpoch } @@ -365,9 +332,6 @@ func (f *FilterMaps) tryExtendTail(tailTarget uint64, stopFn func() bool) bool { number, parentHash = newTail.Number.Uint64(), newTail.ParentHash } f.applyUpdateBatch(update) - if f.testHook != nil { - f.testHook(testHookExtendTail) - } return number <= tailTarget } @@ -406,9 +370,6 @@ func (f *FilterMaps) pruneTailPtr(tailTarget uint64) { fmr.tailBlockNumber, fmr.tailParentHash = tailTarget, tailParentHash fmr.tailBlockLvPointer = targetLvPointer f.setRange(f.db, fmr) - if f.testHook != nil { - f.testHook(testHookPruneTail) - } } // tryPruneTailMaps removes unused filter maps and corresponding log index @@ -461,6 +422,9 @@ func (f *FilterMaps) pruneMaps(first, afterLast uint32, removeLvPtr *uint64) { batch := f.db.NewBatch() for *removeLvPtr < nextBlockNumber { f.deleteBlockLvPointer(batch, *removeLvPtr) + if (*removeLvPtr)%revertPointFrequency == 0 { + rawdb.DeleteRevertPoint(batch, *removeLvPtr) + } (*removeLvPtr)++ } for mapIndex := first; mapIndex < afterLast; mapIndex++ { @@ -481,9 +445,6 @@ func (f *FilterMaps) pruneMaps(first, afterLast uint32, removeLvPtr *uint64) { if err := batch.Write(); err != nil { log.Crit("Could not write update batch", "error", err) } - if f.testHook != nil { - f.testHook(testHookPruneTailMaps) - } } // updateBatch is a memory overlay collecting changes to the index log structure @@ -873,6 +834,9 @@ func (f *FilterMaps) revertTo(rp *revertPoint) error { } for blockNumber := rp.blockNumber + 1; blockNumber <= f.headBlockNumber; blockNumber++ { f.deleteBlockLvPointer(batch, blockNumber) + if blockNumber%revertPointFrequency == 0 { + rawdb.DeleteRevertPoint(batch, blockNumber) + } } newRange := f.filterMapsRange newRange.headLvPointer = lvPointer @@ -882,8 +846,5 @@ func (f *FilterMaps) revertTo(rp *revertPoint) error { if err := batch.Write(); err != nil { log.Crit("Could not write update batch", "error", err) } - if f.testHook != nil { - f.testHook(testHookRevert) - } return nil } diff --git a/core/filtermaps/indexer_test.go b/core/filtermaps/indexer_test.go index 562fe79273ae..6a1f25fc1a79 100644 --- a/core/filtermaps/indexer_test.go +++ b/core/filtermaps/indexer_test.go @@ -18,7 +18,6 @@ package filtermaps import ( "crypto/sha256" - "fmt" "math/big" "math/rand" "sync" @@ -40,72 +39,156 @@ var testParams = Params{ logValuesPerMap: 4, } -func TestIndexerSetHistory(t *testing.T) { +func TestIndexerRandomRange(t *testing.T) { ts := newTestSetup(t) - ts.setHistory(0, false) + defer ts.close() + + forks := make([][]common.Hash, 10) ts.chain.addBlocks(1000, 5, 2, 4, false) // 50 log values per block - ts.runUntilWait() - ts.checkLvRange(50) - ts.setHistory(100, false) - ts.runUntil(func() bool { - l := ts.lastRange.headLvPointer - ts.lastRange.tailLvPointer - return l > 44000 && l < 45000 - }) - ts.setHistory(200, false) - ts.runUntilWait() - ts.checkLvRange(50) + for i := range forks { + if i != 0 { + forkBlock := rand.Intn(1000) + ts.chain.setHead(forkBlock) + ts.chain.addBlocks(1000-forkBlock, 5, 2, 4, false) // 50 log values per block + } + forks[i] = ts.chain.getCanonicalChain() + } ts.setHistory(0, false) - ts.runUntilWait() - ts.checkLvRange(50) -} - -func TestIndexerRandomSetHistory(t *testing.T) { - ts := newTestSetup(t) - ts.chain.addBlocks(100, 5, 2, 4, false) // 50 log values per block - for i := 0; i < 3000; i++ { - ts.setHistory(uint64(rand.Intn(1001)), false) - ts.nextEvent() - for rand.Intn(20) != 0 && ts.lastEvent != testHookWait { - ts.nextEvent() + var ( + history int + noHistory bool + fork, head = len(forks) - 1, 1000 + ) + ts.fm.WaitIdle() + for i := 0; i < 200; i++ { + switch rand.Intn(2) { + case 0: + // change history settings + switch rand.Intn(10) { + case 0: + history, noHistory = 0, false + case 1: + history, noHistory = 0, true + default: + history, noHistory = rand.Intn(1000)+1, false + } + ts.setHistory(uint64(history), noHistory) + case 1: + // change head + fork, head = rand.Intn(len(forks)), rand.Intn(1001) + ts.chain.setCanonicalChain(forks[fork][:head+1]) + } + ts.fm.WaitIdle() + fmr := ts.fm.getRange() + if noHistory { + if fmr.initialized { + t.Fatalf("filterMapsRange initialized while indexing is disabled") + } + continue } - if ts.lastEvent == testHookWait { - ts.checkLvRange(50) + if !fmr.initialized { + t.Fatalf("filterMapsRange not initialized while indexing is enabled") + } + var ( + tail int + tpHash common.Hash + ) + if history > 0 && history <= head { + tail = head + 1 - history + } + if tail > 0 { + tpHash = forks[fork][tail-1] + } + if fmr.headBlockNumber != uint64(head) || fmr.headBlockHash != forks[fork][head] { + ts.t.Fatalf("Invalid index head (expected #%d %v, got #%d %v)", head, forks[fork][head], fmr.headBlockNumber, fmr.headBlockHash) + } + if fmr.tailBlockNumber != uint64(tail) || fmr.tailParentHash != tpHash { + ts.t.Fatalf("Invalid index head (expected #%d %v, got #%d %v)", tail, tpHash, fmr.tailBlockNumber, fmr.tailParentHash) + } + expLvCount := uint64(head+1-tail) * 50 + if tail == 0 { + expLvCount -= 50 // no logs in genesis block + } + if fmr.headLvPointer-fmr.tailBlockLvPointer != expLvCount { + ts.t.Fatalf("Invalid number of log values (expected %d, got %d)", expLvCount, fmr.headLvPointer-fmr.tailBlockLvPointer) + } + if fmr.tailBlockLvPointer-fmr.tailLvPointer >= ts.params.valuesPerMap { + ts.t.Fatalf("Invalid number of leftover tail log values (expected < %d, got %d)", ts.params.valuesPerMap, fmr.tailBlockLvPointer-fmr.tailLvPointer) } } - ts.setHistory(0, false) - ts.runUntilWait() - ts.checkLvRange(50) } -func TestIndexerDbEquality(t *testing.T) { +func TestIndexerCompareDb(t *testing.T) { ts := newTestSetup(t) + defer ts.close() + ts.setHistory(0, false) - for i := 0; i < 10; i++ { - ts.chain.addBlocks(100, 10, 3, 4, true) - ts.runUntilWait() - } - hash1 := ts.fmDbHash() - fmt.Println(hash1) - ts.setHistory(500, false) - ts.runUntilWait() - hash2 := ts.fmDbHash() - fmt.Println(hash2) + ts.chain.addBlocks(500, 10, 3, 4, true) + ts.fm.WaitIdle() + // revert points are stored after block 500 + ts.chain.addBlocks(500, 10, 3, 4, true) + ts.fm.WaitIdle() + chain1 := ts.chain.getCanonicalChain() + ts.storeDbHash("chain 1 [0, 1000]") + + ts.chain.setHead(600) + ts.fm.WaitIdle() + ts.storeDbHash("chain 1/2 [0, 600]") + + ts.chain.addBlocks(600, 10, 3, 4, true) + ts.fm.WaitIdle() + chain2 := ts.chain.getCanonicalChain() + ts.storeDbHash("chain 2 [0, 1200]") + + ts.setHistory(800, false) + ts.fm.WaitIdle() + ts.storeDbHash("chain 2 [401, 1200]") + + ts.chain.setHead(600) + ts.fm.WaitIdle() + ts.checkDbHash("chain 1/2 [0, 600]") + + ts.chain.setCanonicalChain(chain1) + ts.fm.WaitIdle() + ts.storeDbHash("chain 1 [201, 1000]") + ts.setHistory(0, false) - ts.runUntilWait() - hash3 := ts.fmDbHash() - fmt.Println(hash3) + ts.fm.WaitIdle() + ts.checkDbHash("chain 1 [0, 1000]") + + ts.setHistory(0, true) + ts.fm.WaitIdle() + ts.storeDbHash("no index") + + ts.chain.setCanonicalChain(chain2[:501]) + ts.setHistory(0, false) + ts.fm.WaitIdle() + ts.chain.setCanonicalChain(chain2) + ts.fm.WaitIdle() + ts.checkDbHash("chain 2 [0, 1200]") + + ts.chain.setCanonicalChain(chain1) + ts.fm.WaitIdle() + ts.setHistory(800, false) + ts.fm.WaitIdle() + ts.checkDbHash("chain 1 [201, 1000]") + + ts.chain.setCanonicalChain(chain2) + ts.fm.WaitIdle() + ts.checkDbHash("chain 2 [401, 1200]") + + ts.setHistory(0, true) + ts.fm.WaitIdle() + ts.checkDbHash("no index") } type testSetup struct { - t *testing.T - fm *FilterMaps - db ethdb.Database - chain *testChain - params Params - eventCh chan int - resumeCh chan struct{} - lastEvent int - lastRange filterMapsRange + t *testing.T + fm *FilterMaps + db ethdb.Database + chain *testChain + params Params + dbHashes map[string]common.Hash } func newTestSetup(t *testing.T) *testSetup { @@ -116,76 +199,32 @@ func newTestSetup(t *testing.T) *testSetup { chain: newTestChain(), db: rawdb.NewMemoryDatabase(), params: params, - eventCh: make(chan int), - resumeCh: make(chan struct{}), - } -} - -func (ts *testSetup) runUntil(stop func() bool) { - for !stop() { - ts.nextEvent() - for ts.lastEvent == testHookWait { - ts.t.Fatalf("Indexer in waiting state before runUntil condition is met") - } - } -} - -func (ts *testSetup) runUntilWait() { - for { - ts.nextEvent() - for ts.lastEvent != testHookWait { - ts.nextEvent() - } - if ts.fm.getRange().headBlockHash == ts.chain.CurrentBlock().Hash() { - return - } - } -} - -func (ts *testSetup) checkLvRange(lvPerBlock uint64) { - expBlockCount := uint64(len(ts.chain.canonical) - 1) - if ts.fm.history != 0 && ts.fm.history < expBlockCount { - expBlockCount = ts.fm.history - } - if ts.lastRange.headLvPointer-ts.lastRange.tailBlockLvPointer != expBlockCount*lvPerBlock { - ts.t.Fatalf("Invalid number of log values (expected %d, got %d)", expBlockCount*lvPerBlock, ts.lastRange.headLvPointer-ts.lastRange.tailLvPointer) - } - if ts.lastRange.tailBlockLvPointer-ts.lastRange.tailLvPointer >= ts.params.valuesPerMap { - ts.t.Fatalf("Invalid number of leftover tail log values (expected < %d, got %d)", ts.params.valuesPerMap, ts.lastRange.tailBlockLvPointer-ts.lastRange.tailLvPointer) + dbHashes: make(map[string]common.Hash), } } func (ts *testSetup) setHistory(history uint64, noHistory bool) { if ts.fm != nil { - ts.stopFm() + ts.fm.Stop() } ts.fm = NewFilterMaps(ts.db, ts.chain, ts.params, history, noHistory) - ts.fm.testHook = ts.testHook ts.fm.Start() - ts.lastEvent = <-ts.eventCh -} - -func (ts *testSetup) testHook(event int) { - ts.eventCh <- event - <-ts.resumeCh } -func (ts *testSetup) nextEvent() { - ts.resumeCh <- struct{}{} - ts.lastEvent = <-ts.eventCh - ts.lastRange = ts.fm.getRange() +func (ts *testSetup) storeDbHash(id string) { + dbHash := ts.fmDbHash() + for otherId, otherHash := range ts.dbHashes { + if otherHash == dbHash { + ts.t.Fatalf("Unexpected equal database hashes `%s` and `%s`", id, otherId) + } + } + ts.dbHashes[id] = dbHash } -func (ts *testSetup) stopFm() { - close(ts.fm.closeCh) - for { - ts.nextEvent() - if ts.lastEvent == testHookStop { - break - } +func (ts *testSetup) checkDbHash(id string) { + if ts.fmDbHash() != ts.dbHashes[id] { + ts.t.Fatalf("Database `%s` hash mismatch", id) } - ts.resumeCh <- struct{}{} - ts.fm.closeWg.Wait() } func (ts *testSetup) fmDbHash() common.Hash { @@ -202,7 +241,9 @@ func (ts *testSetup) fmDbHash() common.Hash { } func (ts *testSetup) close() { - ts.stopFm() + if ts.fm != nil { + ts.fm.Stop() + } ts.db.Close() ts.chain.db.Close() } diff --git a/core/filtermaps/matcher_test.go b/core/filtermaps/matcher_test.go new file mode 100644 index 000000000000..21265d5f0eaf --- /dev/null +++ b/core/filtermaps/matcher_test.go @@ -0,0 +1,86 @@ +// Copyright 2024 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package filtermaps + +import ( + "context" + "math/rand" + "testing" + + "github.com/ethereum/go-ethereum/common" +) + +func TestMatcher(t *testing.T) { + ts := newTestSetup(t) + defer ts.close() + + ts.chain.addBlocks(1000, 10, 10, 4, true) + ts.setHistory(0, false) + ts.fm.WaitIdle() + + for i := 0; i < 500; i++ { + bhash := ts.chain.canonical[rand.Intn(len(ts.chain.canonical))] + receipts := ts.chain.receipts[bhash] + if len(receipts) == 0 { + continue + } + receipt := receipts[rand.Intn(len(receipts))] + if len(receipt.Logs) == 0 { + continue + } + log := receipt.Logs[rand.Intn(len(receipt.Logs))] + var ok bool + addresses := make([]common.Address, rand.Intn(3)) + for i := range addresses { + rand.Read(addresses[i][:]) + } + if len(addresses) > 0 { + addresses[rand.Intn(len(addresses))] = log.Address + ok = true + } + topics := make([][]common.Hash, rand.Intn(len(log.Topics)+1)) + for j := range topics { + topics[j] = make([]common.Hash, rand.Intn(3)) + for i := range topics[j] { + rand.Read(topics[j][i][:]) + } + if len(topics[j]) > 0 { + topics[j][rand.Intn(len(topics[j]))] = log.Topics[j] + ok = true + } + } + if !ok { + continue // cannot search for match-all pattern + } + mb := ts.fm.NewMatcherBackend() + logs, err := GetPotentialMatches(context.Background(), mb, 0, 1000, addresses, topics) + mb.Close() + if err != nil { + t.Fatalf("Log search error: %v", err) + } + var found bool + for _, l := range logs { + if l == log { + found = true + break + } + } + if !found { + t.Fatalf("Log search did not return expected log (addresses: %v, topics: %v, expected log: %v)", addresses, topics, *log) + } + } +} diff --git a/core/rawdb/schema.go b/core/rawdb/schema.go index 0948fa9d9894..332c32a0eedb 100644 --- a/core/rawdb/schema.go +++ b/core/rawdb/schema.go @@ -347,14 +347,14 @@ func IsStorageTrieNode(key []byte) bool { // filterMapRowKey = filterMapRowPrefix + mapRowIndex (uint64 big endian) func filterMapRowKey(mapRowIndex uint64) []byte { key := append(filterMapRowPrefix, make([]byte, 8)...) - binary.BigEndian.PutUint64(key[1:], mapRowIndex) + binary.BigEndian.PutUint64(key[len(filterMapRowPrefix):], mapRowIndex) return key } // filterMapBlockPtrKey = filterMapBlockPtrPrefix + mapIndex (uint32 big endian) func filterMapBlockPtrKey(mapIndex uint32) []byte { key := append(filterMapBlockPtrPrefix, make([]byte, 4)...) - binary.BigEndian.PutUint32(key[1:], mapIndex) + binary.BigEndian.PutUint32(key[len(filterMapBlockPtrPrefix):], mapIndex) return key } diff --git a/eth/filters/filter_system_test.go b/eth/filters/filter_system_test.go index 0cc473d78642..6ce190ec8594 100644 --- a/eth/filters/filter_system_test.go +++ b/eth/filters/filter_system_test.go @@ -162,9 +162,7 @@ func (b *testBackend) NewMatcherBackend() filtermaps.MatcherBackend { func (b *testBackend) startFilterMaps(history uint64, noHistory bool) { b.fm = filtermaps.NewFilterMaps(b.db, b, filtermaps.DefaultParams, history, noHistory) b.fm.Start() - if !noHistory { - b.fm.WaitIdle() - } + b.fm.WaitIdle() } func (b *testBackend) stopFilterMaps() { From 37d9fd5432df8d78cf5aa8580dc032fa27a106c0 Mon Sep 17 00:00:00 2001 From: Zsolt Felfoldi Date: Thu, 3 Oct 2024 17:57:46 +0200 Subject: [PATCH 17/23] core/filtermaps: trigger undindexing after 1000 blocks --- core/filtermaps/filtermaps.go | 20 ++++++++++---------- core/filtermaps/indexer.go | 24 ++++++++++++------------ core/filtermaps/indexer_test.go | 2 +- eth/backend.go | 2 +- eth/filters/filter_system_test.go | 2 +- 5 files changed, 25 insertions(+), 25 deletions(-) diff --git a/core/filtermaps/filtermaps.go b/core/filtermaps/filtermaps.go index ab154824013e..90b6e9eb9b3c 100644 --- a/core/filtermaps/filtermaps.go +++ b/core/filtermaps/filtermaps.go @@ -49,12 +49,12 @@ type blockchain interface { // without the tree hashing and consensus changes: // https://eips.ethereum.org/EIPS/eip-7745 type FilterMaps struct { - lock sync.RWMutex - db ethdb.KeyValueStore - closeCh chan struct{} - closeWg sync.WaitGroup - history uint64 - noHistory bool + lock sync.RWMutex + db ethdb.KeyValueStore + closeCh chan struct{} + closeWg sync.WaitGroup + history, unindexLimit uint64 + noHistory bool Params filterMapsRange @@ -101,9 +101,9 @@ var emptyRow = FilterRow{} // Note that tailBlockLvPointer points to the earliest log value index belonging // to the tail block while tailLvPointer points to the earliest log value index // added to the corresponding filter map. The latter might point to an earlier -// index after tail blocks have been pruned because we do not remove tail values -// one by one, rather delete entire maps when all blocks that had log values in -// those maps are unindexed. +// index after tail blocks have been unindexed because we do not remove tail +// values one by one, rather delete entire maps when all blocks that had log +// values in those maps are unindexed. type filterMapsRange struct { initialized bool headLvPointer, tailLvPointer, tailBlockLvPointer uint64 @@ -113,7 +113,7 @@ type filterMapsRange struct { // NewFilterMaps creates a new FilterMaps and starts the indexer in order to keep // the structure in sync with the given blockchain. -func NewFilterMaps(db ethdb.KeyValueStore, chain blockchain, params Params, history uint64, noHistory bool) *FilterMaps { +func NewFilterMaps(db ethdb.KeyValueStore, chain blockchain, params Params, history, unindexLimit uint64, noHistory bool) *FilterMaps { rs, err := rawdb.ReadFilterMapsRange(db) if err != nil { log.Error("Error reading log index range", "error", err) diff --git a/core/filtermaps/indexer.go b/core/filtermaps/indexer.go index c04d8623a42a..6a2bae02d4a6 100644 --- a/core/filtermaps/indexer.go +++ b/core/filtermaps/indexer.go @@ -295,10 +295,10 @@ func (f *FilterMaps) tryUpdateTail(head *types.Header, stopFn func() bool) bool return false } } - if tailNum < tailTarget { - f.pruneTailPtr(tailTarget) + if tailNum+f.unindexLimit <= tailTarget { + f.unindexTailPtr(tailTarget) } - return f.tryPruneTailMaps(tailTarget, stopFn) + return f.tryUnindexTailMaps(tailTarget, stopFn) } // tryExtendTail attempts to extend the log index backwards until it indexes the @@ -335,12 +335,12 @@ func (f *FilterMaps) tryExtendTail(tailTarget uint64, stopFn func() bool) bool { return number <= tailTarget } -// pruneTailPtr updates the tail block number and hash and the corresponding +// unindexTailPtr updates the tail block number and hash and the corresponding // tailBlockLvPointer according to the given tail target block number. // Note that this function does not remove old index data, only marks it unused // by updating the tail pointers, except for targetLvPointer which is unchanged // as it marks the tail of the log index data stored in the database. -func (f *FilterMaps) pruneTailPtr(tailTarget uint64) { +func (f *FilterMaps) unindexTailPtr(tailTarget uint64) { f.lock.Lock() defer f.lock.Unlock() @@ -372,9 +372,9 @@ func (f *FilterMaps) pruneTailPtr(tailTarget uint64) { f.setRange(f.db, fmr) } -// tryPruneTailMaps removes unused filter maps and corresponding log index +// tryUnindexTailMaps removes unused filter maps and corresponding log index // pointers from the database. This function also updates targetLvPointer. -func (f *FilterMaps) tryPruneTailMaps(tailTarget uint64, stopFn func() bool) bool { +func (f *FilterMaps) tryUnindexTailMaps(tailTarget uint64, stopFn func() bool) bool { fmr := f.getRange() tailMap := uint32(fmr.tailLvPointer >> f.logValuesPerMap) targetMap := uint32(fmr.tailBlockLvPointer >> f.logValuesPerMap) @@ -394,11 +394,11 @@ func (f *FilterMaps) tryPruneTailMaps(tailTarget uint64, stopFn func() bool) boo for tailMap < targetMap && !stopFn() { tailEpoch := tailMap >> f.logMapsPerEpoch if tailEpoch == lastEpoch { - f.pruneMaps(tailMap, targetMap, &removeLvPtr) + f.unindexMaps(tailMap, targetMap, &removeLvPtr) break } nextTailMap := (tailEpoch + 1) << f.logMapsPerEpoch - f.pruneMaps(tailMap, nextTailMap, &removeLvPtr) + f.unindexMaps(tailMap, nextTailMap, &removeLvPtr) tailMap = nextTailMap if !logged || time.Since(lastLogged) >= time.Second*10 { log.Info("Pruning log index tail...", "filter maps left", targetMap-tailMap) @@ -411,9 +411,9 @@ func (f *FilterMaps) tryPruneTailMaps(tailTarget uint64, stopFn func() bool) boo return tailMap >= targetMap } -// pruneMaps removes filter maps and corresponding log index pointers in the +// unindexMaps removes filter maps and corresponding log index pointers in the // specified range in a single batch. -func (f *FilterMaps) pruneMaps(first, afterLast uint32, removeLvPtr *uint64) { +func (f *FilterMaps) unindexMaps(first, afterLast uint32, removeLvPtr *uint64) { nextBlockNumber, err := f.getMapBlockPtr(afterLast) if err != nil { log.Error("Error fetching next map block pointer", "map index", afterLast, "error", err) @@ -438,7 +438,7 @@ func (f *FilterMaps) pruneMaps(first, afterLast uint32, removeLvPtr *uint64) { fmr := f.getRange() fmr.tailLvPointer = uint64(afterLast) << f.logValuesPerMap if fmr.tailLvPointer > fmr.tailBlockLvPointer { - log.Error("Cannot prune filter maps beyond tail block log value pointer", "tailLvPointer", fmr.tailLvPointer, "tailBlockLvPointer", fmr.tailBlockLvPointer) + log.Error("Cannot unindex filter maps beyond tail block log value pointer", "tailLvPointer", fmr.tailLvPointer, "tailBlockLvPointer", fmr.tailBlockLvPointer) return } f.setRange(batch, fmr) diff --git a/core/filtermaps/indexer_test.go b/core/filtermaps/indexer_test.go index 6a1f25fc1a79..be65aa5cf80e 100644 --- a/core/filtermaps/indexer_test.go +++ b/core/filtermaps/indexer_test.go @@ -207,7 +207,7 @@ func (ts *testSetup) setHistory(history uint64, noHistory bool) { if ts.fm != nil { ts.fm.Stop() } - ts.fm = NewFilterMaps(ts.db, ts.chain, ts.params, history, noHistory) + ts.fm = NewFilterMaps(ts.db, ts.chain, ts.params, history, 1, noHistory) ts.fm.Start() } diff --git a/eth/backend.go b/eth/backend.go index f1db95803ff1..9630ef5d4412 100644 --- a/eth/backend.go +++ b/eth/backend.go @@ -214,7 +214,7 @@ func New(stack *node.Node, config *ethconfig.Config) (*Ethereum, error) { if err != nil { return nil, err } - eth.filterMaps = filtermaps.NewFilterMaps(chainDb, eth.blockchain, filtermaps.DefaultParams, config.LogHistory, config.LogNoHistory) + eth.filterMaps = filtermaps.NewFilterMaps(chainDb, eth.blockchain, filtermaps.DefaultParams, config.LogHistory, 1000, config.LogNoHistory) if config.BlobPool.Datadir != "" { config.BlobPool.Datadir = stack.ResolvePath(config.BlobPool.Datadir) diff --git a/eth/filters/filter_system_test.go b/eth/filters/filter_system_test.go index 6ce190ec8594..9b2bb5c2bcfa 100644 --- a/eth/filters/filter_system_test.go +++ b/eth/filters/filter_system_test.go @@ -160,7 +160,7 @@ func (b *testBackend) NewMatcherBackend() filtermaps.MatcherBackend { } func (b *testBackend) startFilterMaps(history uint64, noHistory bool) { - b.fm = filtermaps.NewFilterMaps(b.db, b, filtermaps.DefaultParams, history, noHistory) + b.fm = filtermaps.NewFilterMaps(b.db, b, filtermaps.DefaultParams, history, 1, noHistory) b.fm.Start() b.fm.WaitIdle() } From 455dd2016ef82c37d111a43b6031f3704d443e3b Mon Sep 17 00:00:00 2001 From: Zsolt Felfoldi Date: Thu, 3 Oct 2024 23:08:47 +0200 Subject: [PATCH 18/23] core/filtermaps: improved unindexer --- core/filtermaps/indexer.go | 189 +++++++++++++++++++------------------ 1 file changed, 95 insertions(+), 94 deletions(-) diff --git a/core/filtermaps/indexer.go b/core/filtermaps/indexer.go index 6a2bae02d4a6..0b1db011e8a9 100644 --- a/core/filtermaps/indexer.go +++ b/core/filtermaps/indexer.go @@ -278,10 +278,11 @@ func (f *FilterMaps) tryUpdateHead(newHead *types.Header) bool { return true } -// tryUpdateTail attempts to extend or prune the log index according to the +// tryUpdateTail attempts to extend or shorten the log index according to the // current head block number and the log history settings. // stopFn is called regularly during the process, and if it returns true, the // latest batch is written and the function returns. +// tryUpdateTail returns true if it has reached the desired history length. func (f *FilterMaps) tryUpdateTail(head *types.Header, stopFn func() bool) bool { var tailTarget uint64 if f.history > 0 { @@ -296,13 +297,13 @@ func (f *FilterMaps) tryUpdateTail(head *types.Header, stopFn func() bool) bool } } if tailNum+f.unindexLimit <= tailTarget { - f.unindexTailPtr(tailTarget) + return f.tryUnindexTail(tailTarget, stopFn) } - return f.tryUnindexTailMaps(tailTarget, stopFn) + return true } -// tryExtendTail attempts to extend the log index backwards until it indexes the -// tail target block or cannot find more block receipts. +// tryExtendTail attempts to extend the log index backwards until the desired +// indexed history length is achieved. Returns true if finished. func (f *FilterMaps) tryExtendTail(tailTarget uint64, stopFn func() bool) bool { fmr := f.getRange() number, parentHash := fmr.tailBlockNumber, fmr.tailParentHash @@ -335,116 +336,116 @@ func (f *FilterMaps) tryExtendTail(tailTarget uint64, stopFn func() bool) bool { return number <= tailTarget } -// unindexTailPtr updates the tail block number and hash and the corresponding -// tailBlockLvPointer according to the given tail target block number. -// Note that this function does not remove old index data, only marks it unused -// by updating the tail pointers, except for targetLvPointer which is unchanged -// as it marks the tail of the log index data stored in the database. -func (f *FilterMaps) unindexTailPtr(tailTarget uint64) { - f.lock.Lock() - defer f.lock.Unlock() - - // obtain target log value pointer - if tailTarget <= f.tailBlockNumber || tailTarget > f.headBlockNumber { - return // nothing to do - } - targetLvPointer, err := f.getBlockLvPointer(tailTarget) - fmr := f.filterMapsRange - - if err != nil { - log.Error("Error fetching tail target log value pointer", "block number", tailTarget, "error", err) - } - - // obtain tail target's parent hash - var tailParentHash common.Hash - if tailTarget > 0 { - if f.chain.GetCanonicalHash(fmr.headBlockNumber) != fmr.headBlockHash { - return // if a reorg is happening right now then try again later +// tryUnindexTail attempts to prune the log index tail until the desired indexed +// history length is achieved. Returns true if finished. +func (f *FilterMaps) tryUnindexTail(tailTarget uint64, stopFn func() bool) bool { + for { + if f.unindexTailEpoch(tailTarget) { + return true } - tailParentHash = f.chain.GetCanonicalHash(tailTarget - 1) - if f.chain.GetCanonicalHash(fmr.headBlockNumber) != fmr.headBlockHash { - return // check again to make sure that tailParentHash is consistent with the indexed chain + if stopFn() { + return false } } - - fmr.tailBlockNumber, fmr.tailParentHash = tailTarget, tailParentHash - fmr.tailBlockLvPointer = targetLvPointer - f.setRange(f.db, fmr) } -// tryUnindexTailMaps removes unused filter maps and corresponding log index -// pointers from the database. This function also updates targetLvPointer. -func (f *FilterMaps) tryUnindexTailMaps(tailTarget uint64, stopFn func() bool) bool { - fmr := f.getRange() - tailMap := uint32(fmr.tailLvPointer >> f.logValuesPerMap) - targetMap := uint32(fmr.tailBlockLvPointer >> f.logValuesPerMap) - if tailMap >= targetMap { - return true - } - lastEpoch := (targetMap - 1) >> f.logMapsPerEpoch - removeLvPtr, err := f.getMapBlockPtr(tailMap) - if err != nil { - log.Error("Error fetching tail map block pointer", "map index", tailMap, "error", err) - removeLvPtr = math.MaxUint64 // do not remove anything - } - var ( - logged bool - lastLogged time.Time - ) - for tailMap < targetMap && !stopFn() { - tailEpoch := tailMap >> f.logMapsPerEpoch - if tailEpoch == lastEpoch { - f.unindexMaps(tailMap, targetMap, &removeLvPtr) - break - } - nextTailMap := (tailEpoch + 1) << f.logMapsPerEpoch - f.unindexMaps(tailMap, nextTailMap, &removeLvPtr) - tailMap = nextTailMap - if !logged || time.Since(lastLogged) >= time.Second*10 { - log.Info("Pruning log index tail...", "filter maps left", targetMap-tailMap) - logged, lastLogged = true, time.Now() - } - } - if logged { - log.Info("Finished pruning log index tail", "filter maps left", targetMap-tailMap) - } - return tailMap >= targetMap -} +// unindexTailEpoch unindexes at most an epoch of tail log index data until the +// desired tail target is reached. +func (f *FilterMaps) unindexTailEpoch(tailTarget uint64) (finished bool) { + f.lock.Lock() + oldRange := f.filterMapsRange + newTailMap, changed := f.unindexTailPtr(tailTarget) + newRange := f.filterMapsRange + f.lock.Unlock() -// unindexMaps removes filter maps and corresponding log index pointers in the -// specified range in a single batch. -func (f *FilterMaps) unindexMaps(first, afterLast uint32, removeLvPtr *uint64) { - nextBlockNumber, err := f.getMapBlockPtr(afterLast) - if err != nil { - log.Error("Error fetching next map block pointer", "map index", afterLast, "error", err) - nextBlockNumber = 0 // do not remove anything + if !changed { + return true // nothing more to do } + finished = newRange.tailBlockNumber == tailTarget + + oldTailMap := uint32(oldRange.tailLvPointer >> f.logValuesPerMap) + // remove map data [oldTailMap, newTailMap) and block data + // [oldRange.tailBlockNumber, newRange.tailBlockNumber) batch := f.db.NewBatch() - for *removeLvPtr < nextBlockNumber { - f.deleteBlockLvPointer(batch, *removeLvPtr) - if (*removeLvPtr)%revertPointFrequency == 0 { - rawdb.DeleteRevertPoint(batch, *removeLvPtr) + for blockNumber := oldRange.tailBlockNumber; blockNumber < newRange.tailBlockNumber; blockNumber++ { + f.deleteBlockLvPointer(batch, blockNumber) + if blockNumber%revertPointFrequency == 0 { + rawdb.DeleteRevertPoint(batch, blockNumber) } - (*removeLvPtr)++ } - for mapIndex := first; mapIndex < afterLast; mapIndex++ { + for mapIndex := oldTailMap; mapIndex < newTailMap; mapIndex++ { f.deleteMapBlockPtr(batch, mapIndex) } for rowIndex := uint32(0); rowIndex < f.mapHeight; rowIndex++ { - for mapIndex := first; mapIndex < afterLast; mapIndex++ { + for mapIndex := oldTailMap; mapIndex < newTailMap; mapIndex++ { f.storeFilterMapRow(batch, mapIndex, rowIndex, emptyRow) } } - fmr := f.getRange() - fmr.tailLvPointer = uint64(afterLast) << f.logValuesPerMap - if fmr.tailLvPointer > fmr.tailBlockLvPointer { - log.Error("Cannot unindex filter maps beyond tail block log value pointer", "tailLvPointer", fmr.tailLvPointer, "tailBlockLvPointer", fmr.tailBlockLvPointer) + newRange.tailLvPointer = uint64(newTailMap) << f.logValuesPerMap + if newRange.tailLvPointer > newRange.tailBlockLvPointer { + log.Error("Cannot unindex filter maps beyond tail block log value pointer", "tailLvPointer", newRange.tailLvPointer, "tailBlockLvPointer", newRange.tailBlockLvPointer) return } - f.setRange(batch, fmr) + f.lock.Lock() + f.setRange(batch, newRange) if err := batch.Write(); err != nil { log.Crit("Could not write update batch", "error", err) } + f.lock.Unlock() + return +} + +// unindexTailPtr determines the range of tail maps to be removed in the next +// batch and updates the tail block number and hash and the corresponding +// tailBlockLvPointer accordingly. +// Note that this function does not remove old index data, only marks it unused +// by updating the tail pointers, except for targetLvPointer which is not changed +// yet as it marks the tail of the log index data stored in the database and +// therefore should be updated when map data is actually removed. +func (f *FilterMaps) unindexTailPtr(tailTarget uint64) (newTailMap uint32, changed bool) { + // obtain target log value pointer + if tailTarget <= f.tailBlockNumber || tailTarget > f.headBlockNumber { + return 0, false // nothing to do + } + targetLvPointer, err := f.getBlockLvPointer(tailTarget) + if err != nil { + log.Error("Error fetching tail target log value pointer", "block number", tailTarget, "error", err) + return 0, false + } + newRange := f.filterMapsRange + tailMap := uint32(f.tailBlockLvPointer >> f.logValuesPerMap) + nextEpochFirstMap := ((tailMap >> f.logMapsPerEpoch) + 1) << f.logMapsPerEpoch + targetMap := uint32(targetLvPointer >> f.logValuesPerMap) + if targetMap <= nextEpochFirstMap { + // unindexed range is within a single epoch, do it in a single batch + newRange.tailBlockNumber, newRange.tailBlockLvPointer, newTailMap = tailTarget, targetLvPointer, targetMap + } else { + // new tail map should be nextEpochFirstMap, determine new tail block + tailBlockNumber, err := f.getMapBlockPtr(nextEpochFirstMap) + if err != nil { + log.Error("Error fetching tail map block pointer", "map index", nextEpochFirstMap, "error", err) + return 0, false + } + tailBlockNumber++ + tailBlockLvPointer, err := f.getBlockLvPointer(tailBlockNumber) + if err != nil { + log.Error("Error fetching tail block log value pointer", "block number", tailBlockNumber, "error", err) + return 0, false + } + newRange.tailBlockNumber, newRange.tailBlockLvPointer, newTailMap = tailBlockNumber, tailBlockLvPointer, uint32(tailBlockLvPointer>>f.logValuesPerMap) + } + // obtain tail target's parent hash + if newRange.tailBlockNumber > 0 { + if f.chain.GetCanonicalHash(f.headBlockNumber) != f.headBlockHash { + return 0, false // if a reorg is happening right now then try again later + } + newRange.tailParentHash = f.chain.GetCanonicalHash(newRange.tailBlockNumber - 1) + if f.chain.GetCanonicalHash(f.headBlockNumber) != f.headBlockHash { + return 0, false // check again to make sure that tailParentHash is consistent with the indexed chain + } + } + f.setRange(f.db, newRange) + return newTailMap, true } // updateBatch is a memory overlay collecting changes to the index log structure From 2d8fc05670d8dbbd2f96eac84c5d2a458798d1c1 Mon Sep 17 00:00:00 2001 From: Zsolt Felfoldi Date: Fri, 4 Oct 2024 00:35:45 +0200 Subject: [PATCH 19/23] core/filtermaps: nice log info during indexing/unindexing --- core/filtermaps/filtermaps.go | 39 +++++++++----- core/filtermaps/indexer.go | 85 ++++++++++++++++++++++++++++-- core/filtermaps/matcher_backend.go | 1 + 3 files changed, 106 insertions(+), 19 deletions(-) diff --git a/core/filtermaps/filtermaps.go b/core/filtermaps/filtermaps.go index 90b6e9eb9b3c..ed6a00c96555 100644 --- a/core/filtermaps/filtermaps.go +++ b/core/filtermaps/filtermaps.go @@ -49,18 +49,22 @@ type blockchain interface { // without the tree hashing and consensus changes: // https://eips.ethereum.org/EIPS/eip-7745 type FilterMaps struct { - lock sync.RWMutex - db ethdb.KeyValueStore closeCh chan struct{} closeWg sync.WaitGroup history, unindexLimit uint64 noHistory bool - Params - filterMapsRange chain blockchain matcherSyncCh chan *FilterMapsMatcherBackend - matchers map[*FilterMapsMatcherBackend]struct{} + + // db and range are only modified by indexer under write lock; indexer can + // read them without a lock while matchers can access them under read lock + lock sync.RWMutex + db ethdb.KeyValueStore + filterMapsRange + + matchers map[*FilterMapsMatcherBackend]struct{} + // filterMapCache caches certain filter maps (headCacheSize most recent maps // and one tail map) that are expected to be frequently accessed and modified // while updating the structure. Note that the set of cached maps depends @@ -71,6 +75,11 @@ type FilterMaps struct { lvPointerCache *lru.Cache[uint64, uint64] revertPoints map[uint64]*revertPoint + startHeadUpdate, loggedHeadUpdate, loggedTailExtend, loggedTailUnindex bool + startedHeadUpdate, startedTailExtend, startedTailUnindex time.Time + lastLogHeadUpdate, lastLogTailExtend, lastLogTailUnindex time.Time + ptrHeadUpdate, ptrTailExtend, ptrTailUnindex uint64 + waitIdleCh chan chan bool } @@ -120,13 +129,14 @@ func NewFilterMaps(db ethdb.KeyValueStore, chain blockchain, params Params, hist } params.deriveFields() fm := &FilterMaps{ - db: db, - chain: chain, - closeCh: make(chan struct{}), - waitIdleCh: make(chan chan bool), - history: history, - noHistory: noHistory, - Params: params, + db: db, + chain: chain, + closeCh: make(chan struct{}), + waitIdleCh: make(chan chan bool), + history: history, + noHistory: noHistory, + unindexLimit: unindexLimit, + Params: params, filterMapsRange: filterMapsRange{ initialized: rs.Initialized, headLvPointer: rs.HeadLvPointer, @@ -151,13 +161,14 @@ func NewFilterMaps(db ethdb.KeyValueStore, chain blockchain, params Params, hist return fm } +// Start starts the indexer. func (f *FilterMaps) Start() { f.closeWg.Add(2) go f.removeBloomBits() go f.updateLoop() } -// Close ensures that the indexer is fully stopped before returning. +// Stop ensures that the indexer is fully stopped before returning. func (f *FilterMaps) Stop() { close(f.closeCh) f.closeWg.Wait() @@ -172,10 +183,10 @@ func (f *FilterMaps) reset() bool { f.revertPoints = make(map[uint64]*revertPoint) f.blockPtrCache.Purge() f.lvPointerCache.Purge() - f.lock.Unlock() // deleting the range first ensures that resetDb will be called again at next // startup and any leftover data will be removed even if it cannot finish now. rawdb.DeleteFilterMapsRange(f.db) + f.lock.Unlock() return f.removeDbWithPrefix(rawdb.FilterMapsPrefix, "Resetting log index database") } diff --git a/core/filtermaps/indexer.go b/core/filtermaps/indexer.go index 0b1db011e8a9..1703c858fef5 100644 --- a/core/filtermaps/indexer.go +++ b/core/filtermaps/indexer.go @@ -29,10 +29,11 @@ import ( ) const ( - startLvMap = 1 << 31 // map index assigned to init block - removedPointer = math.MaxUint64 // used in updateBatch to signal removed items - revertPointFrequency = 256 // frequency of revert points in database - cachedRevertPoints = 64 // revert points for most recent blocks in memory + startLvMap = 1 << 31 // map index assigned to init block + removedPointer = math.MaxUint64 // used in updateBatch to signal removed items + revertPointFrequency = 256 // frequency of revert points in database + cachedRevertPoints = 64 // revert points for most recent blocks in memory + logFrequency = time.Second * 8 // log info frequency during long indexing/unindexing process ) // updateLoop initializes and updates the log index structure according to the @@ -44,7 +45,10 @@ func (f *FilterMaps) updateLoop() { f.reset() return } + + f.lock.Lock() f.updateMapCache() + f.lock.Unlock() if rp, err := f.newUpdateBatch().makeRevertPoint(); err == nil { f.revertPoints[rp.blockNumber] = rp } else { @@ -198,6 +202,7 @@ func (f *FilterMaps) tryInit(head *types.Header) bool { log.Error("Could not initialize log index", "error", err) } f.applyUpdateBatch(update) + log.Info("Initialized log index", "head", head.Number.Uint64()) return true } @@ -209,6 +214,32 @@ func (f *FilterMaps) tryInit(head *types.Header) bool { // indexer should exit and remaining parts of the old database will be removed // at next startup. func (f *FilterMaps) tryUpdateHead(newHead *types.Header) bool { + defer func() { + fmr := f.getRange() + if newHead.Hash() == fmr.headBlockHash { + if f.loggedHeadUpdate { + log.Info("Forward log indexing finished", "processed", fmr.headBlockNumber-f.ptrHeadUpdate, + "elapsed", common.PrettyDuration(time.Since(f.lastLogHeadUpdate))) + f.loggedHeadUpdate, f.startHeadUpdate = false, false + } + } else { + if time.Since(f.lastLogHeadUpdate) > logFrequency || !f.loggedHeadUpdate { + log.Info("Forward log indexing in progress", "processed", fmr.headBlockNumber-f.ptrHeadUpdate, + "remaining", newHead.Number.Uint64()-fmr.headBlockNumber, + "elapsed", common.PrettyDuration(time.Since(f.startedHeadUpdate))) + f.loggedHeadUpdate = true + f.lastLogHeadUpdate = time.Now() + } + } + + }() + + if !f.startHeadUpdate { + f.lastLogHeadUpdate = time.Now() + f.startedHeadUpdate = f.lastLogHeadUpdate + f.startHeadUpdate = true + f.ptrHeadUpdate = f.getRange().headBlockNumber + } // iterate back from new head until the log index head or a revert point and // collect headers of blocks to be added var ( @@ -305,14 +336,41 @@ func (f *FilterMaps) tryUpdateTail(head *types.Header, stopFn func() bool) bool // tryExtendTail attempts to extend the log index backwards until the desired // indexed history length is achieved. Returns true if finished. func (f *FilterMaps) tryExtendTail(tailTarget uint64, stopFn func() bool) bool { + defer func() { + fmr := f.getRange() + if fmr.tailBlockNumber <= tailTarget { + if f.loggedTailExtend { + log.Info("Reverse log indexing finished", "history", fmr.headBlockNumber+1-fmr.tailBlockNumber, + "processed", f.ptrTailExtend-fmr.tailBlockNumber, "elapsed", common.PrettyDuration(time.Since(f.lastLogTailExtend))) + f.loggedTailExtend = false + } + } + }() + fmr := f.getRange() number, parentHash := fmr.tailBlockNumber, fmr.tailParentHash + + if !f.loggedTailExtend { + f.lastLogTailExtend = time.Now() + f.startedTailExtend = f.lastLogTailExtend + f.ptrTailExtend = fmr.tailBlockNumber + } + update := f.newUpdateBatch() lastTailEpoch := update.tailEpoch() for number > tailTarget && !stopFn() { if tailEpoch := update.tailEpoch(); tailEpoch < lastTailEpoch { // limit the amount of data updated in a single batch f.applyUpdateBatch(update) + + if time.Since(f.lastLogTailExtend) > logFrequency || !f.loggedTailExtend { + log.Info("Reverse log indexing in progress", "history", update.headBlockNumber+1-update.tailBlockNumber, + "processed", f.ptrTailExtend-update.tailBlockNumber, "remaining", update.tailBlockNumber-tailTarget, + "elapsed", common.PrettyDuration(time.Since(f.startedTailExtend))) + f.loggedTailExtend = true + f.lastLogTailExtend = time.Now() + } + update = f.newUpdateBatch() lastTailEpoch = tailEpoch } @@ -339,10 +397,27 @@ func (f *FilterMaps) tryExtendTail(tailTarget uint64, stopFn func() bool) bool { // tryUnindexTail attempts to prune the log index tail until the desired indexed // history length is achieved. Returns true if finished. func (f *FilterMaps) tryUnindexTail(tailTarget uint64, stopFn func() bool) bool { + if !f.loggedTailUnindex { + f.lastLogTailUnindex = time.Now() + f.startedTailUnindex = f.lastLogTailUnindex + f.ptrTailUnindex = f.getRange().tailBlockNumber + } for { if f.unindexTailEpoch(tailTarget) { + fmr := f.getRange() + log.Info("Log unindexing finished", "history", fmr.headBlockNumber+1-fmr.tailBlockNumber, + "removed", fmr.tailBlockNumber-f.ptrTailUnindex, "elapsed", common.PrettyDuration(time.Since(f.lastLogTailUnindex))) + f.loggedTailUnindex = false return true } + if time.Since(f.lastLogTailUnindex) > logFrequency || !f.loggedTailUnindex { + fmr := f.getRange() + log.Info("Log unindexing in progress", "history", fmr.headBlockNumber+1-fmr.tailBlockNumber, + "removed", fmr.tailBlockNumber-f.ptrTailUnindex, "remaining", tailTarget-fmr.tailBlockNumber, + "elapsed", common.PrettyDuration(time.Since(f.startedTailUnindex))) + f.loggedTailUnindex = true + f.lastLogTailUnindex = time.Now() + } if stopFn() { return false } @@ -402,6 +477,7 @@ func (f *FilterMaps) unindexTailEpoch(tailTarget uint64) (finished bool) { // by updating the tail pointers, except for targetLvPointer which is not changed // yet as it marks the tail of the log index data stored in the database and // therefore should be updated when map data is actually removed. +// Note that this function assumes that the read/write lock is being held. func (f *FilterMaps) unindexTailPtr(tailTarget uint64) (newTailMap uint32, changed bool) { // obtain target log value pointer if tailTarget <= f.tailBlockNumber || tailTarget > f.headBlockNumber { @@ -542,7 +618,6 @@ func (f *FilterMaps) applyUpdateBatch(u *updateBatch) { if err := batch.Write(); err != nil { log.Crit("Could not write update batch", "error", err) } - log.Info("Log index block range updated", "tail", u.tailBlockNumber, "head", u.headBlockNumber, "log values", u.headLvPointer-u.tailBlockLvPointer) } // updatedRangeLength returns the lenght of the updated filter map range. diff --git a/core/filtermaps/matcher_backend.go b/core/filtermaps/matcher_backend.go index 0bc87e1e9355..37a51eac48c7 100644 --- a/core/filtermaps/matcher_backend.go +++ b/core/filtermaps/matcher_backend.go @@ -167,6 +167,7 @@ func (fm *FilterMapsMatcherBackend) SyncLogIndex(ctx context.Context) (SyncRange // valid range with the current indexed range. This function should be called // whenever a part of the log index has been removed, before adding new blocks // to it. +// Note that this function assumes that the read lock is being held. func (f *FilterMaps) updateMatchersValidRange() { for fm := range f.matchers { if !f.initialized { From 9dbcb1dbf2f4380d36899a165b9eeb0a4c8a3344 Mon Sep 17 00:00:00 2001 From: Zsolt Felfoldi Date: Sun, 6 Oct 2024 18:33:22 +0200 Subject: [PATCH 20/23] core/filtermaps: simplified locking scheme --- core/filtermaps/filtermaps.go | 66 +++++++++++++-------- core/filtermaps/indexer.go | 94 ++++++++++++------------------ core/filtermaps/indexer_test.go | 21 ++++--- core/filtermaps/matcher_backend.go | 42 ++++++++----- core/filtermaps/matcher_test.go | 4 +- 5 files changed, 117 insertions(+), 110 deletions(-) diff --git a/core/filtermaps/filtermaps.go b/core/filtermaps/filtermaps.go index ed6a00c96555..908ee250c86f 100644 --- a/core/filtermaps/filtermaps.go +++ b/core/filtermaps/filtermaps.go @@ -57,24 +57,32 @@ type FilterMaps struct { chain blockchain matcherSyncCh chan *FilterMapsMatcherBackend - // db and range are only modified by indexer under write lock; indexer can - // read them without a lock while matchers can access them under read lock - lock sync.RWMutex - db ethdb.KeyValueStore - filterMapsRange - - matchers map[*FilterMapsMatcherBackend]struct{} + db ethdb.KeyValueStore + // fields written by the indexer and read by matcher backend. Indexer can + // read them without a lock and write them under indexLock write lock. + // Matcher backend can read them under indexLock read lock. + indexLock sync.RWMutex + filterMapsRange // filterMapCache caches certain filter maps (headCacheSize most recent maps // and one tail map) that are expected to be frequently accessed and modified // while updating the structure. Note that the set of cached maps depends // only on filterMapsRange and rows of other maps are not cached here. - filterMapLock sync.Mutex filterMapCache map[uint32]filterMap + + // also accessed by indexer and matcher backend but no locking needed. blockPtrCache *lru.Cache[uint32, uint64] lvPointerCache *lru.Cache[uint64, uint64] - revertPoints map[uint64]*revertPoint + // the matchers set and the fields of FilterMapsMatcherBackend instances are + // read and written both by exported functions and the indexer. + // Note that if both indexLock and matchersLock needs to be locked then + // indexLock should be locked first. + matchersLock sync.Mutex + matchers map[*FilterMapsMatcherBackend]struct{} + + // fields only accessed by the indexer (no mutex required). + revertPoints map[uint64]*revertPoint startHeadUpdate, loggedHeadUpdate, loggedTailExtend, loggedTailUnindex bool startedHeadUpdate, startedTailExtend, startedTailUnindex time.Time lastLogHeadUpdate, lastLogTailExtend, lastLogTailUnindex time.Time @@ -177,16 +185,16 @@ func (f *FilterMaps) Stop() { // reset un-initializes the FilterMaps structure and removes all related data from // the database. The function returns true if everything was successfully removed. func (f *FilterMaps) reset() bool { - f.lock.Lock() + f.indexLock.Lock() f.filterMapsRange = filterMapsRange{} f.filterMapCache = make(map[uint32]filterMap) f.revertPoints = make(map[uint64]*revertPoint) f.blockPtrCache.Purge() f.lvPointerCache.Purge() + f.indexLock.Unlock() // deleting the range first ensures that resetDb will be called again at next // startup and any leftover data will be removed even if it cannot finish now. rawdb.DeleteFilterMapsRange(f.db) - f.lock.Unlock() return f.removeDbWithPrefix(rawdb.FilterMapsPrefix, "Resetting log index database") } @@ -240,7 +248,7 @@ func (f *FilterMaps) removeDbWithPrefix(prefix []byte, action string) bool { } // setRange updates the covered range and also adds the changes to the given batch. -// Note that this function assumes that the read/write lock is being held. +// Note that this function assumes that the index write lock is being held. func (f *FilterMaps) setRange(batch ethdb.KeyValueWriter, newRange filterMapsRange) { f.filterMapsRange = newRange rs := rawdb.FilterMapsRange{ @@ -259,14 +267,11 @@ func (f *FilterMaps) setRange(batch ethdb.KeyValueWriter, newRange filterMapsRan // updateMapCache updates the maps covered by the filterMapCache according to the // covered range. -// Note that this function assumes that the read lock is being held. +// Note that this function assumes that the index write lock is being held. func (f *FilterMaps) updateMapCache() { if !f.initialized { return } - f.filterMapLock.Lock() - defer f.filterMapLock.Unlock() - newFilterMapCache := make(map[uint32]filterMap) firstMap, afterLastMap := uint32(f.tailBlockLvPointer>>f.logValuesPerMap), uint32((f.headLvPointer+f.valuesPerMap-1)>>f.logValuesPerMap) headCacheFirst := firstMap + 1 @@ -294,7 +299,8 @@ func (f *FilterMaps) updateMapCache() { // Note that this function assumes that the log index structure is consistent // with the canonical chain at the point where the given log value index points. // If this is not the case then an invalid result or an error may be returned. -// Note that this function assumes that the read lock is being held. +// Note that this function assumes that the indexer read lock is being held when +// called from outside the updateLoop goroutine. func (f *FilterMaps) getLogByLvIndex(lvIndex uint64) (*types.Log, error) { if lvIndex < f.tailBlockLvPointer || lvIndex > f.headLvPointer { return nil, nil @@ -361,10 +367,9 @@ func (f *FilterMaps) getLogByLvIndex(lvIndex uint64) (*types.Log, error) { // then a non-nil zero length row is returned. // Note that the returned slices should not be modified, they should be copied // on write. +// Note that the function assumes that the indexLock is not being held (should +// only be called from the updateLoop goroutine). func (f *FilterMaps) getFilterMapRow(mapIndex, rowIndex uint32) (FilterRow, error) { - f.filterMapLock.Lock() - defer f.filterMapLock.Unlock() - fm := f.filterMapCache[mapIndex] if fm != nil && fm[rowIndex] != nil { return fm[rowIndex], nil @@ -374,19 +379,31 @@ func (f *FilterMaps) getFilterMapRow(mapIndex, rowIndex uint32) (FilterRow, erro return nil, err } if fm != nil { + f.indexLock.Lock() fm[rowIndex] = FilterRow(row) + f.indexLock.Unlock() } return FilterRow(row), nil } +// getFilterMapRowUncached returns the given row of the given map. If the row is +// empty then a non-nil zero length row is returned. +// This function bypasses the memory cache which is mostly useful for processing +// the head and tail maps during the indexing process and should be used by the +// matcher backend which rarely accesses the same row twice and therefore does +// not really benefit from caching anyways. +// The function is unaffected by the indexLock mutex. +func (f *FilterMaps) getFilterMapRowUncached(mapIndex, rowIndex uint32) (FilterRow, error) { + row, err := rawdb.ReadFilterMapRow(f.db, f.mapRowIndex(mapIndex, rowIndex)) + return FilterRow(row), err +} + // storeFilterMapRow stores a row at the given row index of the given map and also // caches it in filterMapCache if the given map is cached. // Note that empty rows are not stored in the database and therefore there is no // separate delete function; deleting a row is the same as storing an empty row. +// Note that this function assumes that the indexer write lock is being held. func (f *FilterMaps) storeFilterMapRow(batch ethdb.Batch, mapIndex, rowIndex uint32, row FilterRow) { - f.filterMapLock.Lock() - defer f.filterMapLock.Unlock() - if fm := f.filterMapCache[mapIndex]; fm != nil { fm[rowIndex] = row } @@ -407,7 +424,8 @@ func (f *FilterMaps) mapRowIndex(mapIndex, rowIndex uint32) uint64 { // getBlockLvPointer returns the starting log value index where the log values // generated by the given block are located. If blockNumber is beyond the current // head then the first unoccupied log value index is returned. -// Note that this function assumes that the read lock is being held. +// Note that this function assumes that the indexer read lock is being held when +// called from outside the updateLoop goroutine. func (f *FilterMaps) getBlockLvPointer(blockNumber uint64) (uint64, error) { if blockNumber > f.headBlockNumber { return f.headLvPointer, nil diff --git a/core/filtermaps/indexer.go b/core/filtermaps/indexer.go index 1703c858fef5..9bf705cf83e5 100644 --- a/core/filtermaps/indexer.go +++ b/core/filtermaps/indexer.go @@ -46,9 +46,9 @@ func (f *FilterMaps) updateLoop() { return } - f.lock.Lock() + f.indexLock.Lock() f.updateMapCache() - f.lock.Unlock() + f.indexLock.Unlock() if rp, err := f.newUpdateBatch().makeRevertPoint(); err == nil { f.revertPoints[rp.blockNumber] = rp } else { @@ -61,11 +61,10 @@ func (f *FilterMaps) updateLoop() { head = f.chain.CurrentBlock() stop bool syncMatcher *FilterMapsMatcherBackend - fmr = f.getRange() ) matcherSync := func() { - if syncMatcher != nil && fmr.initialized && fmr.headBlockHash == head.Hash() { + if syncMatcher != nil && f.initialized && f.headBlockHash == head.Hash() { syncMatcher.synced(head) syncMatcher = nil } @@ -92,7 +91,7 @@ func (f *FilterMaps) updateLoop() { stop = true case ch := <-f.waitIdleCh: head = f.chain.CurrentBlock() - if head.Hash() == f.getRange().headBlockHash { + if head.Hash() == f.headBlockHash { ch <- true continue loop } @@ -110,27 +109,24 @@ func (f *FilterMaps) updateLoop() { return } } - fmr = f.getRange() for !stop { - if !fmr.initialized { + if !f.initialized { if !f.tryInit(head) { return } - fmr = f.getRange() - if !fmr.initialized { + if !f.initialized { wait() continue } } // log index is initialized - if fmr.headBlockHash != head.Hash() { + if f.headBlockHash != head.Hash() { if !f.tryUpdateHead(head) { return } - fmr = f.getRange() - if fmr.headBlockHash != head.Hash() { + if f.headBlockHash != head.Hash() { wait() continue } @@ -151,8 +147,8 @@ func (f *FilterMaps) updateLoop() { head = f.chain.CurrentBlock() } // stop if there is a new chain head (always prioritize head updates) - return fmr.headBlockHash != head.Hash() - }) && fmr.headBlockHash == head.Hash() { + return f.headBlockHash != head.Hash() || syncMatcher != nil + }) && f.headBlockHash == head.Hash() { // if tail processing reached its final state and there is no new // head then wait for more events wait() @@ -176,14 +172,6 @@ func (f *FilterMaps) WaitIdle() { } } -// getRange returns the current filterMapsRange. -func (f *FilterMaps) getRange() filterMapsRange { - f.lock.RLock() - defer f.lock.RUnlock() - - return f.filterMapsRange -} - // tryInit attempts to initialize the log index structure. // Returns false if indexer was stopped during a database reset. In this case the // indexer should exit and remaining parts of the old database will be removed @@ -215,17 +203,16 @@ func (f *FilterMaps) tryInit(head *types.Header) bool { // at next startup. func (f *FilterMaps) tryUpdateHead(newHead *types.Header) bool { defer func() { - fmr := f.getRange() - if newHead.Hash() == fmr.headBlockHash { + if newHead.Hash() == f.headBlockHash { if f.loggedHeadUpdate { - log.Info("Forward log indexing finished", "processed", fmr.headBlockNumber-f.ptrHeadUpdate, + log.Info("Forward log indexing finished", "processed", f.headBlockNumber-f.ptrHeadUpdate, "elapsed", common.PrettyDuration(time.Since(f.lastLogHeadUpdate))) f.loggedHeadUpdate, f.startHeadUpdate = false, false } } else { if time.Since(f.lastLogHeadUpdate) > logFrequency || !f.loggedHeadUpdate { - log.Info("Forward log indexing in progress", "processed", fmr.headBlockNumber-f.ptrHeadUpdate, - "remaining", newHead.Number.Uint64()-fmr.headBlockNumber, + log.Info("Forward log indexing in progress", "processed", f.headBlockNumber-f.ptrHeadUpdate, + "remaining", newHead.Number.Uint64()-f.headBlockNumber, "elapsed", common.PrettyDuration(time.Since(f.startedHeadUpdate))) f.loggedHeadUpdate = true f.lastLogHeadUpdate = time.Now() @@ -238,7 +225,7 @@ func (f *FilterMaps) tryUpdateHead(newHead *types.Header) bool { f.lastLogHeadUpdate = time.Now() f.startedHeadUpdate = f.lastLogHeadUpdate f.startHeadUpdate = true - f.ptrHeadUpdate = f.getRange().headBlockNumber + f.ptrHeadUpdate = f.headBlockNumber } // iterate back from new head until the log index head or a revert point and // collect headers of blocks to be added @@ -321,7 +308,7 @@ func (f *FilterMaps) tryUpdateTail(head *types.Header, stopFn func() bool) bool tailTarget = headNum + 1 - f.history } } - tailNum := f.getRange().tailBlockNumber + tailNum := f.tailBlockNumber if tailNum > tailTarget { if !f.tryExtendTail(tailTarget, stopFn) { return false @@ -337,23 +324,21 @@ func (f *FilterMaps) tryUpdateTail(head *types.Header, stopFn func() bool) bool // indexed history length is achieved. Returns true if finished. func (f *FilterMaps) tryExtendTail(tailTarget uint64, stopFn func() bool) bool { defer func() { - fmr := f.getRange() - if fmr.tailBlockNumber <= tailTarget { + if f.tailBlockNumber <= tailTarget { if f.loggedTailExtend { - log.Info("Reverse log indexing finished", "history", fmr.headBlockNumber+1-fmr.tailBlockNumber, - "processed", f.ptrTailExtend-fmr.tailBlockNumber, "elapsed", common.PrettyDuration(time.Since(f.lastLogTailExtend))) + log.Info("Reverse log indexing finished", "history", f.headBlockNumber+1-f.tailBlockNumber, + "processed", f.ptrTailExtend-f.tailBlockNumber, "elapsed", common.PrettyDuration(time.Since(f.lastLogTailExtend))) f.loggedTailExtend = false } } }() - fmr := f.getRange() - number, parentHash := fmr.tailBlockNumber, fmr.tailParentHash + number, parentHash := f.tailBlockNumber, f.tailParentHash if !f.loggedTailExtend { f.lastLogTailExtend = time.Now() f.startedTailExtend = f.lastLogTailExtend - f.ptrTailExtend = fmr.tailBlockNumber + f.ptrTailExtend = f.tailBlockNumber } update := f.newUpdateBatch() @@ -400,20 +385,18 @@ func (f *FilterMaps) tryUnindexTail(tailTarget uint64, stopFn func() bool) bool if !f.loggedTailUnindex { f.lastLogTailUnindex = time.Now() f.startedTailUnindex = f.lastLogTailUnindex - f.ptrTailUnindex = f.getRange().tailBlockNumber + f.ptrTailUnindex = f.tailBlockNumber } for { if f.unindexTailEpoch(tailTarget) { - fmr := f.getRange() - log.Info("Log unindexing finished", "history", fmr.headBlockNumber+1-fmr.tailBlockNumber, - "removed", fmr.tailBlockNumber-f.ptrTailUnindex, "elapsed", common.PrettyDuration(time.Since(f.lastLogTailUnindex))) + log.Info("Log unindexing finished", "history", f.headBlockNumber+1-f.tailBlockNumber, + "removed", f.tailBlockNumber-f.ptrTailUnindex, "elapsed", common.PrettyDuration(time.Since(f.lastLogTailUnindex))) f.loggedTailUnindex = false return true } if time.Since(f.lastLogTailUnindex) > logFrequency || !f.loggedTailUnindex { - fmr := f.getRange() - log.Info("Log unindexing in progress", "history", fmr.headBlockNumber+1-fmr.tailBlockNumber, - "removed", fmr.tailBlockNumber-f.ptrTailUnindex, "remaining", tailTarget-fmr.tailBlockNumber, + log.Info("Log unindexing in progress", "history", f.headBlockNumber+1-f.tailBlockNumber, + "removed", f.tailBlockNumber-f.ptrTailUnindex, "remaining", tailTarget-f.tailBlockNumber, "elapsed", common.PrettyDuration(time.Since(f.startedTailUnindex))) f.loggedTailUnindex = true f.lastLogTailUnindex = time.Now() @@ -427,11 +410,9 @@ func (f *FilterMaps) tryUnindexTail(tailTarget uint64, stopFn func() bool) bool // unindexTailEpoch unindexes at most an epoch of tail log index data until the // desired tail target is reached. func (f *FilterMaps) unindexTailEpoch(tailTarget uint64) (finished bool) { - f.lock.Lock() oldRange := f.filterMapsRange newTailMap, changed := f.unindexTailPtr(tailTarget) newRange := f.filterMapsRange - f.lock.Unlock() if !changed { return true // nothing more to do @@ -441,6 +422,7 @@ func (f *FilterMaps) unindexTailEpoch(tailTarget uint64) (finished bool) { oldTailMap := uint32(oldRange.tailLvPointer >> f.logValuesPerMap) // remove map data [oldTailMap, newTailMap) and block data // [oldRange.tailBlockNumber, newRange.tailBlockNumber) + f.indexLock.Lock() batch := f.db.NewBatch() for blockNumber := oldRange.tailBlockNumber; blockNumber < newRange.tailBlockNumber; blockNumber++ { f.deleteBlockLvPointer(batch, blockNumber) @@ -459,14 +441,15 @@ func (f *FilterMaps) unindexTailEpoch(tailTarget uint64) (finished bool) { newRange.tailLvPointer = uint64(newTailMap) << f.logValuesPerMap if newRange.tailLvPointer > newRange.tailBlockLvPointer { log.Error("Cannot unindex filter maps beyond tail block log value pointer", "tailLvPointer", newRange.tailLvPointer, "tailBlockLvPointer", newRange.tailBlockLvPointer) + f.indexLock.Unlock() return } - f.lock.Lock() f.setRange(batch, newRange) + f.indexLock.Unlock() + if err := batch.Write(); err != nil { log.Crit("Could not write update batch", "error", err) } - f.lock.Unlock() return } @@ -539,9 +522,6 @@ type updateBatch struct { // newUpdateBatch creates a new updateBatch. func (f *FilterMaps) newUpdateBatch() *updateBatch { - f.lock.RLock() - defer f.lock.RUnlock() - return &updateBatch{ f: f, filterMapsRange: f.filterMapsRange, @@ -555,8 +535,7 @@ func (f *FilterMaps) newUpdateBatch() *updateBatch { // applyUpdateBatch writes creates a batch and writes all changes to the database // and also updates the in-memory representations of log index data. func (f *FilterMaps) applyUpdateBatch(u *updateBatch) { - f.lock.Lock() - defer f.lock.Unlock() + f.indexLock.Lock() batch := f.db.NewBatch() // write or remove block to log value index pointers @@ -615,6 +594,8 @@ func (f *FilterMaps) applyUpdateBatch(u *updateBatch) { } // update filterMapsRange f.setRange(batch, u.filterMapsRange) + f.indexLock.Unlock() + if err := batch.Write(); err != nil { log.Crit("Could not write update batch", "error", err) } @@ -849,9 +830,6 @@ func (u *updateBatch) makeRevertPoint() (*revertPoint, error) { // number from memory cache or from the database if available. If no such revert // point is available then it returns no result and no error. func (f *FilterMaps) getRevertPoint(blockNumber uint64) (*revertPoint, error) { - f.lock.RLock() - defer f.lock.RUnlock() - if blockNumber > f.headBlockNumber { blockNumber = f.headBlockNumber } @@ -879,9 +857,6 @@ func (f *FilterMaps) getRevertPoint(blockNumber uint64) (*revertPoint, error) { // revertTo reverts the log index to the given revert point. func (f *FilterMaps) revertTo(rp *revertPoint) error { - f.lock.Lock() - defer f.lock.Unlock() - batch := f.db.NewBatch() afterLastMap := uint32((f.headLvPointer + f.valuesPerMap - 1) >> f.logValuesPerMap) if rp.mapIndex > afterLastMap { @@ -918,7 +893,10 @@ func (f *FilterMaps) revertTo(rp *revertPoint) error { newRange.headLvPointer = lvPointer newRange.headBlockNumber = rp.blockNumber newRange.headBlockHash = rp.blockHash + f.indexLock.Lock() f.setRange(batch, newRange) + f.indexLock.Unlock() + if err := batch.Write(); err != nil { log.Crit("Could not write update batch", "error", err) } diff --git a/core/filtermaps/indexer_test.go b/core/filtermaps/indexer_test.go index be65aa5cf80e..a20b062e40dd 100644 --- a/core/filtermaps/indexer_test.go +++ b/core/filtermaps/indexer_test.go @@ -79,14 +79,13 @@ func TestIndexerRandomRange(t *testing.T) { ts.chain.setCanonicalChain(forks[fork][:head+1]) } ts.fm.WaitIdle() - fmr := ts.fm.getRange() if noHistory { - if fmr.initialized { + if ts.fm.initialized { t.Fatalf("filterMapsRange initialized while indexing is disabled") } continue } - if !fmr.initialized { + if !ts.fm.initialized { t.Fatalf("filterMapsRange not initialized while indexing is enabled") } var ( @@ -99,21 +98,21 @@ func TestIndexerRandomRange(t *testing.T) { if tail > 0 { tpHash = forks[fork][tail-1] } - if fmr.headBlockNumber != uint64(head) || fmr.headBlockHash != forks[fork][head] { - ts.t.Fatalf("Invalid index head (expected #%d %v, got #%d %v)", head, forks[fork][head], fmr.headBlockNumber, fmr.headBlockHash) + if ts.fm.headBlockNumber != uint64(head) || ts.fm.headBlockHash != forks[fork][head] { + ts.t.Fatalf("Invalid index head (expected #%d %v, got #%d %v)", head, forks[fork][head], ts.fm.headBlockNumber, ts.fm.headBlockHash) } - if fmr.tailBlockNumber != uint64(tail) || fmr.tailParentHash != tpHash { - ts.t.Fatalf("Invalid index head (expected #%d %v, got #%d %v)", tail, tpHash, fmr.tailBlockNumber, fmr.tailParentHash) + if ts.fm.tailBlockNumber != uint64(tail) || ts.fm.tailParentHash != tpHash { + ts.t.Fatalf("Invalid index head (expected #%d %v, got #%d %v)", tail, tpHash, ts.fm.tailBlockNumber, ts.fm.tailParentHash) } expLvCount := uint64(head+1-tail) * 50 if tail == 0 { expLvCount -= 50 // no logs in genesis block } - if fmr.headLvPointer-fmr.tailBlockLvPointer != expLvCount { - ts.t.Fatalf("Invalid number of log values (expected %d, got %d)", expLvCount, fmr.headLvPointer-fmr.tailBlockLvPointer) + if ts.fm.headLvPointer-ts.fm.tailBlockLvPointer != expLvCount { + ts.t.Fatalf("Invalid number of log values (expected %d, got %d)", expLvCount, ts.fm.headLvPointer-ts.fm.tailBlockLvPointer) } - if fmr.tailBlockLvPointer-fmr.tailLvPointer >= ts.params.valuesPerMap { - ts.t.Fatalf("Invalid number of leftover tail log values (expected < %d, got %d)", ts.params.valuesPerMap, fmr.tailBlockLvPointer-fmr.tailLvPointer) + if ts.fm.tailBlockLvPointer-ts.fm.tailLvPointer >= ts.params.valuesPerMap { + ts.t.Fatalf("Invalid number of leftover tail log values (expected < %d, got %d)", ts.params.valuesPerMap, ts.fm.tailBlockLvPointer-ts.fm.tailLvPointer) } } } diff --git a/core/filtermaps/matcher_backend.go b/core/filtermaps/matcher_backend.go index 37a51eac48c7..f5acfa0d3ea1 100644 --- a/core/filtermaps/matcher_backend.go +++ b/core/filtermaps/matcher_backend.go @@ -25,7 +25,8 @@ import ( // FilterMapsMatcherBackend implements MatcherBackend. type FilterMapsMatcherBackend struct { - f *FilterMaps + f *FilterMaps + // these fields should be accessed under f.matchersLock mutex. valid bool firstValid, lastValid uint64 syncCh chan SyncRange @@ -35,8 +36,12 @@ type FilterMapsMatcherBackend struct { // the active matcher set. // Note that Close should always be called when the matcher is no longer used. func (f *FilterMaps) NewMatcherBackend() *FilterMapsMatcherBackend { - f.lock.Lock() - defer f.lock.Unlock() + f.indexLock.RLock() + f.matchersLock.Lock() + defer func() { + f.matchersLock.Unlock() + f.indexLock.RUnlock() + }() fm := &FilterMapsMatcherBackend{ f: f, @@ -58,8 +63,8 @@ func (fm *FilterMapsMatcherBackend) GetParams() *Params { // any SyncLogIndex calls are cancelled. // Close implements MatcherBackend. func (fm *FilterMapsMatcherBackend) Close() { - fm.f.lock.Lock() - defer fm.f.lock.Unlock() + fm.f.matchersLock.Lock() + defer fm.f.matchersLock.Unlock() delete(fm.f.matchers, fm) } @@ -70,7 +75,7 @@ func (fm *FilterMapsMatcherBackend) Close() { // on write. // GetFilterMapRow implements MatcherBackend. func (fm *FilterMapsMatcherBackend) GetFilterMapRow(ctx context.Context, mapIndex, rowIndex uint32) (FilterRow, error) { - return fm.f.getFilterMapRow(mapIndex, rowIndex) + return fm.f.getFilterMapRowUncached(mapIndex, rowIndex) } // GetBlockLvPointer returns the starting log value index where the log values @@ -78,8 +83,8 @@ func (fm *FilterMapsMatcherBackend) GetFilterMapRow(ctx context.Context, mapInde // head then the first unoccupied log value index is returned. // GetBlockLvPointer implements MatcherBackend. func (fm *FilterMapsMatcherBackend) GetBlockLvPointer(ctx context.Context, blockNumber uint64) (uint64, error) { - fm.f.lock.RLock() - defer fm.f.lock.RUnlock() + fm.f.indexLock.RLock() + defer fm.f.indexLock.RUnlock() return fm.f.getBlockLvPointer(blockNumber) } @@ -94,8 +99,8 @@ func (fm *FilterMapsMatcherBackend) GetBlockLvPointer(ctx context.Context, block // using SyncLogIndex and re-process certain blocks if necessary. // GetLogByLvIndex implements MatcherBackend. func (fm *FilterMapsMatcherBackend) GetLogByLvIndex(ctx context.Context, lvIndex uint64) (*types.Log, error) { - fm.f.lock.RLock() - defer fm.f.lock.RUnlock() + fm.f.indexLock.RLock() + defer fm.f.indexLock.RUnlock() return fm.f.getLogByLvIndex(lvIndex) } @@ -108,8 +113,12 @@ func (fm *FilterMapsMatcherBackend) GetLogByLvIndex(ctx context.Context, lvIndex // should be passed as a parameter and the existing log index should be consistent // with that chain. func (fm *FilterMapsMatcherBackend) synced(head *types.Header) { - fm.f.lock.Lock() - defer fm.f.lock.Unlock() + fm.f.indexLock.RLock() + fm.f.matchersLock.Lock() + defer func() { + fm.f.matchersLock.Unlock() + fm.f.indexLock.RUnlock() + }() fm.syncCh <- SyncRange{ Head: head, @@ -143,9 +152,9 @@ func (fm *FilterMapsMatcherBackend) SyncLogIndex(ctx context.Context) (SyncRange } // add SyncRange return channel, ensuring that syncCh := make(chan SyncRange, 1) - fm.f.lock.Lock() + fm.f.matchersLock.Lock() fm.syncCh = syncCh - fm.f.lock.Unlock() + fm.f.matchersLock.Unlock() select { case fm.f.matcherSyncCh <- fm: @@ -167,8 +176,11 @@ func (fm *FilterMapsMatcherBackend) SyncLogIndex(ctx context.Context) (SyncRange // valid range with the current indexed range. This function should be called // whenever a part of the log index has been removed, before adding new blocks // to it. -// Note that this function assumes that the read lock is being held. +// Note that this function assumes that the index read lock is being held. func (f *FilterMaps) updateMatchersValidRange() { + f.matchersLock.Lock() + defer f.matchersLock.Unlock() + for fm := range f.matchers { if !f.initialized { fm.valid = false diff --git a/core/filtermaps/matcher_test.go b/core/filtermaps/matcher_test.go index 21265d5f0eaf..7754057d4b90 100644 --- a/core/filtermaps/matcher_test.go +++ b/core/filtermaps/matcher_test.go @@ -28,11 +28,11 @@ func TestMatcher(t *testing.T) { ts := newTestSetup(t) defer ts.close() - ts.chain.addBlocks(1000, 10, 10, 4, true) + ts.chain.addBlocks(100, 10, 10, 4, true) ts.setHistory(0, false) ts.fm.WaitIdle() - for i := 0; i < 500; i++ { + for i := 0; i < 5000; i++ { bhash := ts.chain.canonical[rand.Intn(len(ts.chain.canonical))] receipts := ts.chain.receipts[bhash] if len(receipts) == 0 { From b63fce0af021448711a98f79ca2bbad75043ead4 Mon Sep 17 00:00:00 2001 From: Zsolt Felfoldi Date: Sun, 6 Oct 2024 22:57:48 +0200 Subject: [PATCH 21/23] core/filtermaps: fixed comment --- core/filtermaps/math.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/filtermaps/math.go b/core/filtermaps/math.go index ca6d01c08c31..c629409048a4 100644 --- a/core/filtermaps/math.go +++ b/core/filtermaps/math.go @@ -174,8 +174,8 @@ func (p potentialMatches) Less(i, j int) bool { return p[i] < p[j] } func (p potentialMatches) Swap(i, j int) { p[i], p[j] = p[j], p[i] } // uint32ModInverse takes an odd 32 bit number and returns its modular -// multiplicative inverse (mod 2**32), meaning that for any uint32 x and odd y -// x * y * uint32ModInverse(y) == 1. +// multiplicative inverse (mod 2**32), meaning that for any odd uint32 value v +// uint32(v * uint32ModInverse(v)) == 1. func uint32ModInverse(v uint32) uint32 { if v&1 == 0 { panic("uint32ModInverse called with even argument") From 665ff3edaa9fd4be296296795bf7a71f422ddbdc Mon Sep 17 00:00:00 2001 From: Zsolt Felfoldi Date: Sun, 6 Oct 2024 23:01:30 +0200 Subject: [PATCH 22/23] core/filtermaps: ensure 8 byte alignment of struct fields --- core/filtermaps/matcher.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/filtermaps/matcher.go b/core/filtermaps/matcher.go index dd088455a991..316a6dff37ac 100644 --- a/core/filtermaps/matcher.go +++ b/core/filtermaps/matcher.go @@ -334,11 +334,11 @@ func mergeResults(results []potentialMatches) potentialMatches { // gives a match at X+offset. Note that matchSequence can be used recursively to // detect any log value sequence. type matchSequence struct { - params *Params - base, next matcher - offset uint64 + baseEmptyRate, nextEmptyRate uint64 // first in struct to ensure 8 byte alignment + params *Params + base, next matcher + offset uint64 // *EmptyRate == totalCount << 32 + emptyCount (atomically accessed) - baseEmptyRate, nextEmptyRate uint64 } // newMatchSequence creates a recursive sequence matcher from a list of underlying From 026d49802d6ffd3f8cbcaaa100a958377b2513b3 Mon Sep 17 00:00:00 2001 From: Zsolt Felfoldi Date: Sun, 6 Oct 2024 23:10:04 +0200 Subject: [PATCH 23/23] core/filtermaps, eth/filters: fixed linter issues --- core/filtermaps/indexer.go | 3 +-- core/filtermaps/indexer_test.go | 5 +++-- core/filtermaps/matcher.go | 1 - core/filtermaps/matcher_test.go | 5 +++-- core/filtermaps/math_test.go | 7 ++++--- eth/filters/filter_system_test.go | 1 - eth/filters/filter_test.go | 2 +- 7 files changed, 12 insertions(+), 12 deletions(-) diff --git a/core/filtermaps/indexer.go b/core/filtermaps/indexer.go index 9bf705cf83e5..08563f04483c 100644 --- a/core/filtermaps/indexer.go +++ b/core/filtermaps/indexer.go @@ -218,7 +218,6 @@ func (f *FilterMaps) tryUpdateHead(newHead *types.Header) bool { f.lastLogHeadUpdate = time.Now() } } - }() if !f.startHeadUpdate { @@ -601,7 +600,7 @@ func (f *FilterMaps) applyUpdateBatch(u *updateBatch) { } } -// updatedRangeLength returns the lenght of the updated filter map range. +// updatedRangeLength returns the length of the updated filter map range. func (u *updateBatch) updatedRangeLength() uint32 { return u.afterLastMap - u.firstMap } diff --git a/core/filtermaps/indexer_test.go b/core/filtermaps/indexer_test.go index a20b062e40dd..84e348cd423a 100644 --- a/core/filtermaps/indexer_test.go +++ b/core/filtermaps/indexer_test.go @@ -17,6 +17,7 @@ package filtermaps import ( + crand "crypto/rand" "crypto/sha256" "math/big" "math/rand" @@ -324,7 +325,7 @@ func (tc *testChain) addBlocks(count, maxTxPerBlock, maxLogsPerReceipt, maxTopic for i := range receipt.Logs { log := &types.Log{} receipt.Logs[i] = log - rand.Read(log.Address[:]) + crand.Read(log.Address[:]) var topicCount int if random { topicCount = rand.Intn(maxTopicsPerLog + 1) @@ -333,7 +334,7 @@ func (tc *testChain) addBlocks(count, maxTxPerBlock, maxLogsPerReceipt, maxTopic } log.Topics = make([]common.Hash, topicCount) for j := range log.Topics { - rand.Read(log.Topics[j][:]) + crand.Read(log.Topics[j][:]) } } gen.AddUncheckedReceipt(receipt) diff --git a/core/filtermaps/matcher.go b/core/filtermaps/matcher.go index 316a6dff37ac..593fab2743a1 100644 --- a/core/filtermaps/matcher.go +++ b/core/filtermaps/matcher.go @@ -168,7 +168,6 @@ func GetPotentialMatches(ctx context.Context, backend MatcherBackend, firstBlock close(task.done) } wg.Done() - return } for i := 0; i < 4; i++ { diff --git a/core/filtermaps/matcher_test.go b/core/filtermaps/matcher_test.go index 7754057d4b90..0f05aec1d460 100644 --- a/core/filtermaps/matcher_test.go +++ b/core/filtermaps/matcher_test.go @@ -18,6 +18,7 @@ package filtermaps import ( "context" + crand "crypto/rand" "math/rand" "testing" @@ -46,7 +47,7 @@ func TestMatcher(t *testing.T) { var ok bool addresses := make([]common.Address, rand.Intn(3)) for i := range addresses { - rand.Read(addresses[i][:]) + crand.Read(addresses[i][:]) } if len(addresses) > 0 { addresses[rand.Intn(len(addresses))] = log.Address @@ -56,7 +57,7 @@ func TestMatcher(t *testing.T) { for j := range topics { topics[j] = make([]common.Hash, rand.Intn(3)) for i := range topics[j] { - rand.Read(topics[j][i][:]) + crand.Read(topics[j][i][:]) } if len(topics[j]) > 0 { topics[j][rand.Intn(len(topics[j]))] = log.Topics[j] diff --git a/core/filtermaps/math_test.go b/core/filtermaps/math_test.go index 3210c833ac5e..5ab2c5aa8aee 100644 --- a/core/filtermaps/math_test.go +++ b/core/filtermaps/math_test.go @@ -17,6 +17,7 @@ package filtermaps import ( + crand "crypto/rand" "math/rand" "testing" @@ -32,7 +33,7 @@ func TestSingleMatch(t *testing.T) { mapIndex := rand.Uint32() lvIndex := uint64(mapIndex)<