-
Notifications
You must be signed in to change notification settings - Fork 3
/
kmv.go
62 lines (50 loc) · 1.19 KB
/
kmv.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
package gokmv
import (
"math"
"math/rand"
"time"
adaptivetable "github.com/positiveblue/adaptive-table"
murmur3 "github.com/spaolacci/murmur3"
)
type KMV struct {
table adaptivetable.AdaptiveTable
initialSize int
seed uint32
totalCounter uint64
}
func NewKMV(size int) *KMV {
rand.Seed(time.Now().UnixNano())
return NewKMVWithSeed(size, rand.Uint32())
}
func NewKMVWithSeed(size int, seed uint32) *KMV {
return &KMV{
table: adaptivetable.NewAdaptiveTableComplete(size, math.MaxInt64, size),
initialSize: size,
seed: seed,
totalCounter: 0,
}
}
func (kmv *KMV) ElementsAdded() uint64 {
return kmv.totalCounter
}
func (kmv *KMV) Size() int {
return kmv.table.Size()
}
func (kmv *KMV) Seed() uint32 {
return kmv.seed
}
func (kmv *KMV) InsertUint64(hash uint64) {
kmv.totalCounter++
kmv.table.Insert(hash)
}
func (kmv *KMV) InsertString(s string) {
hash := murmur3.Sum64WithSeed([]byte(s), kmv.seed)
kmv.InsertUint64(hash)
}
func (kmv *KMV) EstimateCardinality() uint64 {
if kmv.Size() < kmv.initialSize {
return uint64(kmv.table.Size())
}
meanDistance := kmv.table.Max() / uint64(kmv.table.Size())
return uint64(math.MaxUint64 / meanDistance)
}