From c94c7db57149f7b5b8e4d14da8c4ccaaf8453f49 Mon Sep 17 00:00:00 2001 From: paulwe Date: Fri, 7 Jul 2023 17:35:27 -0700 Subject: [PATCH] pack buckets into uint64 --- bucket.go | 41 ++++++++++++++++-------------------- bucket_test.go | 55 ++++++++++++++++++++++++++++++++++++++++++++++++- cuckoofilter.go | 28 ++++++++++++------------- util.go | 21 ++++++++++--------- 4 files changed, 96 insertions(+), 49 deletions(-) diff --git a/bucket.go b/bucket.go index 7aa3629..87b8e1c 100644 --- a/bucket.go +++ b/bucket.go @@ -3,13 +3,14 @@ package cuckoo import ( "bytes" "fmt" + "math/bits" ) // fingerprint represents a single entry in a bucket. type fingerprint uint16 // bucket keeps track of fingerprints hashing to the same index. -type bucket [bucketSize]fingerprint +type bucket uint64 const ( nullFp = 0 @@ -21,8 +22,8 @@ const ( // insert a fingerprint into a bucket. Returns true if there was enough space and insertion succeeded. // Note it allows inserting the same fingerprint multiple times. func (b *bucket) insert(fp fingerprint) bool { - if i := b.index(nullFp); i != 4 { - b[i] = fp + if i := findZeros(uint64(*b)); i != 0 { + *b |= bucket(fp) << ((bits.Len64(i)/fingerprintSizeBits - 1) * fingerprintSizeBits) return true } return false @@ -31,43 +32,37 @@ func (b *bucket) insert(fp fingerprint) bool { // delete a fingerprint from a bucket. // Returns true if the fingerprint was present and successfully removed. func (b *bucket) delete(fp fingerprint) bool { - if i := b.index(fp); i != 4 { - b[i] = nullFp + if i := findValue(uint64(*b), uint16(fp)); i != 0 { + *b &= ^(maxFingerprint << ((bits.Len64(i)/fingerprintSizeBits - 1) * fingerprintSizeBits)) return true } return false } +func (b *bucket) swap(i uint64, fp fingerprint) fingerprint { + p := (*b) >> (i * fingerprintSizeBits) & maxFingerprint + *b = (*b) & ^(maxFingerprint<<(i*fingerprintSizeBits)) | (bucket(fp) << (i * fingerprintSizeBits)) + return fingerprint(p) +} + func (b *bucket) contains(needle fingerprint) bool { - return b.index(needle) != 4 + return findValue(uint64(*b), uint16(needle)) != 0 } -func (b *bucket) index(needle fingerprint) uint8 { - if b[0] == needle { - return 0 - } - if b[1] == needle { - return 1 - } - if b[2] == needle { - return 2 - } - if b[3] == needle { - return 3 - } - return 4 +func (b *bucket) nullsCount() uint { + return uint(bits.OnesCount64(findValue(uint64(*b), nullFp))) } // reset deletes all fingerprints in the bucket. func (b *bucket) reset() { - *b = [bucketSize]fingerprint{nullFp, nullFp, nullFp, nullFp} + *b = 0 } func (b *bucket) String() string { var buf bytes.Buffer buf.WriteString("[") - for _, by := range b { - buf.WriteString(fmt.Sprintf("%5d ", by)) + for i := 3; i >= 0; i-- { + buf.WriteString(fmt.Sprintf("%5d ", ((*b)>>(i*fingerprintSizeBits))&maxFingerprint)) } buf.WriteString("]") return buf.String() diff --git a/bucket_test.go b/bucket_test.go index ea57baa..452027e 100644 --- a/bucket_test.go +++ b/bucket_test.go @@ -8,8 +8,9 @@ import ( func TestBucket_Reset(t *testing.T) { var bkt bucket for i := fingerprint(0); i < bucketSize; i++ { - bkt[i] = i + bkt.insert(i + 1) } + bkt.reset() var want bucket @@ -17,3 +18,55 @@ func TestBucket_Reset(t *testing.T) { t.Errorf("bucket.reset() got %v, want %v", bkt, want) } } + +func TestBucket_Insert(t *testing.T) { + var bkt bucket + for i := fingerprint(0); i < bucketSize; i++ { + if !bkt.insert(i + 1) { + t.Error("bucket insert failed") + } + } + if bkt.insert(5) { + t.Error("expected bucket insert to fail after overflow") + } +} + +func TestBucket_Delete(t *testing.T) { + var bkt bucket + for i := fingerprint(0); i < bucketSize; i++ { + bkt.insert(i + 1) + } + + for i := fingerprint(0); i < bucketSize; i++ { + if !bkt.delete(i + 1) { + t.Error("bucket delete failed") + } + if !bkt.insert(i + 1) { + t.Error("bucket insert after delete failed") + } + } +} + +func TestBucket_Swap(t *testing.T) { + var bkt bucket + bkt.insert(123) + if prev := bkt.swap(3, 321); prev != 123 { + t.Errorf("swap returned unexpected value %d", prev) + } + if !bkt.contains(321) { + t.Errorf("contains after swap failed") + } +} + +func TestBucket_Contains(t *testing.T) { + var bkt bucket + for i := fingerprint(0); i < bucketSize; i++ { + bkt.insert(i + 1) + } + + for i := fingerprint(0); i < bucketSize; i++ { + if !bkt.contains(i + 1) { + t.Error("bucket contains failed") + } + } +} diff --git a/cuckoofilter.go b/cuckoofilter.go index 2f9255c..02bb46c 100644 --- a/cuckoofilter.go +++ b/cuckoofilter.go @@ -37,7 +37,7 @@ func NewFilter(numElements uint) *Filter { return &Filter{ buckets: buckets, count: 0, - bucketIndexMask: uint(len(buckets) - 1), + bucketIndexMask: numBuckets - 1, } } @@ -73,7 +73,11 @@ func (cf *Filter) Insert(data []byte) bool { if cf.insert(fp, i2) { return true } - return cf.reinsert(fp, randi(&cf.rng, i1, i2)) + if cf.rng.Uint64()&1 == 0 { + return cf.reinsert(fp, i1) + } else { + return cf.reinsert(fp, i2) + } } func (cf *Filter) insert(fp fingerprint, i uint) bool { @@ -86,9 +90,9 @@ func (cf *Filter) insert(fp fingerprint, i uint) bool { func (cf *Filter) reinsert(fp fingerprint, i uint) bool { for k := 0; k < maxCuckooKickouts; k++ { - j := cf.rng.Intn(bucketSize) + j := cf.rng.Uint64() & (bucketSize - 1) // Swap fingerprint with bucket entry. - cf.buckets[i][j], fp = fp, cf.buckets[i][j] + fp = cf.buckets[i].swap(j, fp) // Move kicked out fingerprint to alternate location. i = getAltIndex(fp, i, cf.bucketIndexMask) @@ -130,9 +134,7 @@ const bytesPerBucket = bucketSize * fingerprintSizeBits / 8 func (cf *Filter) Encode() []byte { buf := make([]byte, 0, len(cf.buckets)*bytesPerBucket) for _, b := range cf.buckets { - for _, fp := range b { - buf = binary.LittleEndian.AppendUint16(buf, uint16(fp)) - } + buf = binary.LittleEndian.AppendUint64(buf, uint64(b)) } return buf } @@ -152,14 +154,10 @@ func Decode(data []byte) (*Filter, error) { var count, pos uint buckets := make([]bucket, numBuckets) - for i, b := range buckets { - for j := range b { - buckets[i][j] = fingerprint(binary.LittleEndian.Uint16(data[pos : pos+2])) - pos += 2 - if buckets[i][j] != nullFp { - count++ - } - } + for i := range buckets { + buckets[i] = bucket(binary.LittleEndian.Uint64(data[pos : pos+8])) + pos += 8 + count += bucketSize - buckets[i].nullsCount() } return &Filter{ buckets: buckets, diff --git a/util.go b/util.go index 1ec2830..4393a97 100644 --- a/util.go +++ b/util.go @@ -4,7 +4,6 @@ import ( "encoding/binary" "math/bits" - "github.com/zeebo/wyhash" "github.com/zeebo/xxh3" ) @@ -14,18 +13,10 @@ func init() { b := make([]byte, 2) for i := 0; i < maxFingerprint+1; i++ { binary.LittleEndian.PutUint16(b, uint16(i)) - altHash[i] = (uint(xxh3.Hash(b))) + altHash[i] = uint(xxh3.Hash(b)) } } -// randi returns either i1 or i2 randomly. -func randi(rng *wyhash.RNG, i1, i2 uint) uint { - if rng.Uint64()&1 == 0 { - return i1 - } - return i2 -} - func getAltIndex(fp fingerprint, i uint, bucketIndexMask uint) uint { return (i ^ altHash[fp]) & bucketIndexMask } @@ -50,3 +41,13 @@ func getIndexAndFingerprint(data []byte, bucketIndexMask uint) (uint, fingerprin func getNextPow2(n uint64) uint { return uint(1 << bits.Len64(n-1)) } + +// SEE: https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord +func findZeros(v uint64) uint64 { + return ^((((v & 0x7FFF7FFF7FFF7FFF) + 0x7FFF7FFF7FFF7FFF) | v) | 0x7FFF7FFF7FFF7FFF) +} + +// SEE: https://graphics.stanford.edu/~seander/bithacks.html#ValueInWord +func findValue(x uint64, n uint16) uint64 { + return findZeros(x ^ (^uint64(0) / (1<<16 - 1) * uint64(n))) +}