Skip to content

Commit

Permalink
pack buckets into uint64
Browse files Browse the repository at this point in the history
  • Loading branch information
paulwe committed Jul 8, 2023
1 parent a4ac97c commit c94c7db
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 49 deletions.
41 changes: 18 additions & 23 deletions bucket.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@ package cuckoo
import (
"bytes"
"fmt"
"math/bits"
)

// fingerprint represents a single entry in a bucket.
type fingerprint uint16

// bucket keeps track of fingerprints hashing to the same index.
type bucket [bucketSize]fingerprint
type bucket uint64

const (
nullFp = 0
Expand All @@ -21,8 +22,8 @@ const (
// insert a fingerprint into a bucket. Returns true if there was enough space and insertion succeeded.
// Note it allows inserting the same fingerprint multiple times.
func (b *bucket) insert(fp fingerprint) bool {
if i := b.index(nullFp); i != 4 {
b[i] = fp
if i := findZeros(uint64(*b)); i != 0 {
*b |= bucket(fp) << ((bits.Len64(i)/fingerprintSizeBits - 1) * fingerprintSizeBits)
return true
}
return false
Expand All @@ -31,43 +32,37 @@ func (b *bucket) insert(fp fingerprint) bool {
// delete a fingerprint from a bucket.
// Returns true if the fingerprint was present and successfully removed.
func (b *bucket) delete(fp fingerprint) bool {
if i := b.index(fp); i != 4 {
b[i] = nullFp
if i := findValue(uint64(*b), uint16(fp)); i != 0 {
*b &= ^(maxFingerprint << ((bits.Len64(i)/fingerprintSizeBits - 1) * fingerprintSizeBits))
return true
}
return false
}

func (b *bucket) swap(i uint64, fp fingerprint) fingerprint {
p := (*b) >> (i * fingerprintSizeBits) & maxFingerprint
*b = (*b) & ^(maxFingerprint<<(i*fingerprintSizeBits)) | (bucket(fp) << (i * fingerprintSizeBits))
return fingerprint(p)
}

func (b *bucket) contains(needle fingerprint) bool {
return b.index(needle) != 4
return findValue(uint64(*b), uint16(needle)) != 0
}

func (b *bucket) index(needle fingerprint) uint8 {
if b[0] == needle {
return 0
}
if b[1] == needle {
return 1
}
if b[2] == needle {
return 2
}
if b[3] == needle {
return 3
}
return 4
func (b *bucket) nullsCount() uint {
return uint(bits.OnesCount64(findValue(uint64(*b), nullFp)))
}

// reset deletes all fingerprints in the bucket.
func (b *bucket) reset() {
*b = [bucketSize]fingerprint{nullFp, nullFp, nullFp, nullFp}
*b = 0
}

func (b *bucket) String() string {
var buf bytes.Buffer
buf.WriteString("[")
for _, by := range b {
buf.WriteString(fmt.Sprintf("%5d ", by))
for i := 3; i >= 0; i-- {
buf.WriteString(fmt.Sprintf("%5d ", ((*b)>>(i*fingerprintSizeBits))&maxFingerprint))
}
buf.WriteString("]")
return buf.String()
Expand Down
55 changes: 54 additions & 1 deletion bucket_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,65 @@ import (
func TestBucket_Reset(t *testing.T) {
var bkt bucket
for i := fingerprint(0); i < bucketSize; i++ {
bkt[i] = i
bkt.insert(i + 1)
}

bkt.reset()

var want bucket
if !reflect.DeepEqual(bkt, want) {
t.Errorf("bucket.reset() got %v, want %v", bkt, want)
}
}

func TestBucket_Insert(t *testing.T) {
var bkt bucket
for i := fingerprint(0); i < bucketSize; i++ {
if !bkt.insert(i + 1) {
t.Error("bucket insert failed")
}
}
if bkt.insert(5) {
t.Error("expected bucket insert to fail after overflow")
}
}

func TestBucket_Delete(t *testing.T) {
var bkt bucket
for i := fingerprint(0); i < bucketSize; i++ {
bkt.insert(i + 1)
}

for i := fingerprint(0); i < bucketSize; i++ {
if !bkt.delete(i + 1) {
t.Error("bucket delete failed")
}
if !bkt.insert(i + 1) {
t.Error("bucket insert after delete failed")
}
}
}

func TestBucket_Swap(t *testing.T) {
var bkt bucket
bkt.insert(123)
if prev := bkt.swap(3, 321); prev != 123 {
t.Errorf("swap returned unexpected value %d", prev)
}
if !bkt.contains(321) {
t.Errorf("contains after swap failed")
}
}

func TestBucket_Contains(t *testing.T) {
var bkt bucket
for i := fingerprint(0); i < bucketSize; i++ {
bkt.insert(i + 1)
}

for i := fingerprint(0); i < bucketSize; i++ {
if !bkt.contains(i + 1) {
t.Error("bucket contains failed")
}
}
}
28 changes: 13 additions & 15 deletions cuckoofilter.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ func NewFilter(numElements uint) *Filter {
return &Filter{
buckets: buckets,
count: 0,
bucketIndexMask: uint(len(buckets) - 1),
bucketIndexMask: numBuckets - 1,
}
}

Expand Down Expand Up @@ -73,7 +73,11 @@ func (cf *Filter) Insert(data []byte) bool {
if cf.insert(fp, i2) {
return true
}
return cf.reinsert(fp, randi(&cf.rng, i1, i2))
if cf.rng.Uint64()&1 == 0 {
return cf.reinsert(fp, i1)
} else {
return cf.reinsert(fp, i2)
}
}

func (cf *Filter) insert(fp fingerprint, i uint) bool {
Expand All @@ -86,9 +90,9 @@ func (cf *Filter) insert(fp fingerprint, i uint) bool {

func (cf *Filter) reinsert(fp fingerprint, i uint) bool {
for k := 0; k < maxCuckooKickouts; k++ {
j := cf.rng.Intn(bucketSize)
j := cf.rng.Uint64() & (bucketSize - 1)
// Swap fingerprint with bucket entry.
cf.buckets[i][j], fp = fp, cf.buckets[i][j]
fp = cf.buckets[i].swap(j, fp)

// Move kicked out fingerprint to alternate location.
i = getAltIndex(fp, i, cf.bucketIndexMask)
Expand Down Expand Up @@ -130,9 +134,7 @@ const bytesPerBucket = bucketSize * fingerprintSizeBits / 8
func (cf *Filter) Encode() []byte {
buf := make([]byte, 0, len(cf.buckets)*bytesPerBucket)
for _, b := range cf.buckets {
for _, fp := range b {
buf = binary.LittleEndian.AppendUint16(buf, uint16(fp))
}
buf = binary.LittleEndian.AppendUint64(buf, uint64(b))
}
return buf
}
Expand All @@ -152,14 +154,10 @@ func Decode(data []byte) (*Filter, error) {

var count, pos uint
buckets := make([]bucket, numBuckets)
for i, b := range buckets {
for j := range b {
buckets[i][j] = fingerprint(binary.LittleEndian.Uint16(data[pos : pos+2]))
pos += 2
if buckets[i][j] != nullFp {
count++
}
}
for i := range buckets {
buckets[i] = bucket(binary.LittleEndian.Uint64(data[pos : pos+8]))
pos += 8
count += bucketSize - buckets[i].nullsCount()
}
return &Filter{
buckets: buckets,
Expand Down
21 changes: 11 additions & 10 deletions util.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"encoding/binary"
"math/bits"

"github.com/zeebo/wyhash"
"github.com/zeebo/xxh3"
)

Expand All @@ -14,18 +13,10 @@ func init() {
b := make([]byte, 2)
for i := 0; i < maxFingerprint+1; i++ {
binary.LittleEndian.PutUint16(b, uint16(i))
altHash[i] = (uint(xxh3.Hash(b)))
altHash[i] = uint(xxh3.Hash(b))
}
}

// randi returns either i1 or i2 randomly.
func randi(rng *wyhash.RNG, i1, i2 uint) uint {
if rng.Uint64()&1 == 0 {
return i1
}
return i2
}

func getAltIndex(fp fingerprint, i uint, bucketIndexMask uint) uint {
return (i ^ altHash[fp]) & bucketIndexMask
}
Expand All @@ -50,3 +41,13 @@ func getIndexAndFingerprint(data []byte, bucketIndexMask uint) (uint, fingerprin
func getNextPow2(n uint64) uint {
return uint(1 << bits.Len64(n-1))
}

// SEE: https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
func findZeros(v uint64) uint64 {
return ^((((v & 0x7FFF7FFF7FFF7FFF) + 0x7FFF7FFF7FFF7FFF) | v) | 0x7FFF7FFF7FFF7FFF)
}

// SEE: https://graphics.stanford.edu/~seander/bithacks.html#ValueInWord
func findValue(x uint64, n uint16) uint64 {
return findZeros(x ^ (^uint64(0) / (1<<16 - 1) * uint64(n)))
}

0 comments on commit c94c7db

Please sign in to comment.