Skip to content

Commit

Permalink
Merge branch 'master' into remove-bsi-allocs
Browse files Browse the repository at this point in the history
  • Loading branch information
lemire authored Aug 24, 2023
2 parents f008344 + 952b765 commit d672d44
Show file tree
Hide file tree
Showing 10 changed files with 255 additions and 84 deletions.
22 changes: 6 additions & 16 deletions BitSliceIndexing/bsi.go
Original file line number Diff line number Diff line change
Expand Up @@ -753,27 +753,17 @@ func (b *BSI) ClearValues(foundSet *roaring.Bitmap) {
wg.Wait()
}

// NewBSIRetainSet - Construct a new BSI from a clone of existing BSI, retain only values contained in foundSet
// NewBSIRetainSet - Construct a new BSI from a clone of existing BSI, retain only values contained
// in foundSet
func (b *BSI) NewBSIRetainSet(foundSet *roaring.Bitmap) *BSI {

newBSI := NewBSI(b.MaxValue, b.MinValue)
newBSI.bA = make([]*roaring.Bitmap, b.BitCount())
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
newBSI.eBM = b.eBM.Clone()
newBSI.eBM.And(foundSet)
}()
newBSI.eBM = b.eBM.Clone()
newBSI.eBM.And(foundSet)
for i := 0; i < b.BitCount(); i++ {
wg.Add(1)
go func(j int) {
defer wg.Done()
newBSI.bA[j] = b.bA[j].Clone()
newBSI.bA[j].And(foundSet)
}(i)
newBSI.bA[i] = b.bA[i].Clone()
newBSI.bA[i].And(foundSet)
}
wg.Wait()
return newBSI
}

Expand Down
1 change: 0 additions & 1 deletion BitSliceIndexing/bsi_test.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package roaring

import (
_ "fmt"
"github.com/RoaringBitmap/roaring"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
Expand Down
46 changes: 39 additions & 7 deletions internal/byte_input.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,25 @@ type ByteBuffer struct {
off int
}

// NewByteBuffer creates a new ByteBuffer.
func NewByteBuffer(buf []byte) *ByteBuffer {
return &ByteBuffer{
buf: buf,
}
}

var _ io.Reader = (*ByteBuffer)(nil)

// Read implements io.Reader.
func (b *ByteBuffer) Read(p []byte) (int, error) {
data, err := b.Next(len(p))
if err != nil {
return 0, err
}
copy(p, data)
return len(data), nil
}

// Next returns a slice containing the next n bytes from the reader
// If there are fewer bytes than the given n, io.ErrUnexpectedEOF will be returned
func (b *ByteBuffer) Next(n int) ([]byte, error) {
Expand Down Expand Up @@ -109,26 +128,39 @@ func (b *ByteBuffer) Reset(buf []byte) {
type ByteInputAdapter struct {
r io.Reader
readBytes int
buf [4]byte
}

var _ io.Reader = (*ByteInputAdapter)(nil)

// Read implements io.Reader.
func (b *ByteInputAdapter) Read(buf []byte) (int, error) {
m, err := io.ReadAtLeast(b.r, buf, len(buf))
b.readBytes += m

if err != nil {
return 0, err
}

return m, nil
}

// Next returns a slice containing the next n bytes from the buffer,
// advancing the buffer as if the bytes had been returned by Read.
func (b *ByteInputAdapter) Next(n int) ([]byte, error) {
buf := make([]byte, n)
m, err := io.ReadAtLeast(b.r, buf, n)
b.readBytes += m
_, err := b.Read(buf)

if err != nil {
return nil, err
}

return buf, nil
}

// ReadUInt32 reads uint32 with LittleEndian order
func (b *ByteInputAdapter) ReadUInt32() (uint32, error) {
buf, err := b.Next(4)

buf := b.buf[:4]
_, err := b.Read(buf)
if err != nil {
return 0, err
}
Expand All @@ -138,8 +170,8 @@ func (b *ByteInputAdapter) ReadUInt32() (uint32, error) {

// ReadUInt16 reads uint16 with LittleEndian order
func (b *ByteInputAdapter) ReadUInt16() (uint16, error) {
buf, err := b.Next(2)

buf := b.buf[:2]
_, err := b.Read(buf)
if err != nil {
return 0, err
}
Expand Down
45 changes: 28 additions & 17 deletions roaring.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ func (rb *Bitmap) ToBytes() ([]byte, error) {
func (rb *Bitmap) Checksum() uint64 {
const (
offset = 14695981039346656037
prime = 1099511628211
prime = 1099511628211
)

var bytes []byte
Expand Down Expand Up @@ -106,6 +106,14 @@ func (rb *Bitmap) Checksum() uint64 {
return hash
}

// FromUnsafeBytes reads a serialized version of this bitmap from the byte buffer without copy.
// It is the caller's responsibility to ensure that the input data is not modified and remains valid for the entire lifetime of this bitmap.
// This method avoids small allocations but holds references to the input data buffer. It is GC-friendly, but it may consume more memory eventually.
func (rb *Bitmap) FromUnsafeBytes(data []byte, cookieHeader ...byte) (p int64, err error) {
stream := internal.NewByteBuffer(data)
return rb.ReadFrom(stream)
}

// ReadFrom reads a serialized version of this bitmap from stream.
// The format is compatible with other RoaringBitmap
// implementations (Java, C) and is documented here:
Expand All @@ -114,12 +122,18 @@ func (rb *Bitmap) Checksum() uint64 {
// So add cookieHeader to accept the 4-byte data that has been read in roaring64.ReadFrom.
// It is not necessary to pass cookieHeader when call roaring.ReadFrom to read the roaring32 data directly.
func (rb *Bitmap) ReadFrom(reader io.Reader, cookieHeader ...byte) (p int64, err error) {
stream := internal.ByteInputAdapterPool.Get().(*internal.ByteInputAdapter)
stream.Reset(reader)
stream, ok := reader.(internal.ByteInput)
if !ok {
byteInputAdapter := internal.ByteInputAdapterPool.Get().(*internal.ByteInputAdapter)
byteInputAdapter.Reset(reader)
stream = byteInputAdapter
}

p, err = rb.highlowcontainer.readFrom(stream, cookieHeader...)
internal.ByteInputAdapterPool.Put(stream)

if !ok {
internal.ByteInputAdapterPool.Put(stream.(*internal.ByteInputAdapter))
}
return
}

Expand All @@ -144,7 +158,6 @@ func (rb *Bitmap) ReadFrom(reader io.Reader, cookieHeader ...byte) (p int64, err
// bitmap derived from this bitmap (e.g., via Or, And) might
// also be broken. Thus, before making buf unavailable, you should
// call CloneCopyOnWriteContainers on all such bitmaps.
//
func (rb *Bitmap) FromBuffer(buf []byte) (p int64, err error) {
stream := internal.ByteBufferPool.Get().(*internal.ByteBuffer)
stream.Reset(buf)
Expand Down Expand Up @@ -276,9 +289,9 @@ type intIterator struct {
// This way, instead of making up-to 64k allocations per full iteration
// we get a single allocation and simply reinitialize the appropriate
// iterator and point to it in the generic `iter` member on each key bound.
shortIter shortIterator
runIter runIterator16
bitmapIter bitmapContainerShortIterator
shortIter shortIterator
runIter runIterator16
bitmapIter bitmapContainerShortIterator
}

// HasNext returns true if there are more integers to iterate over
Expand Down Expand Up @@ -341,7 +354,6 @@ func (ii *intIterator) AdvanceIfNeeded(minval uint32) {
// IntIterator is meant to allow you to iterate through the values of a bitmap, see Initialize(a *Bitmap)
type IntIterator = intIterator


// Initialize configures the existing iterator so that it can iterate through the values of
// the provided bitmap.
// The iteration results are undefined if the bitmap is modified (e.g., with Add or Remove).
Expand All @@ -357,9 +369,9 @@ type intReverseIterator struct {
iter shortIterable
highlowcontainer *roaringArray

shortIter reverseIterator
runIter runReverseIterator16
bitmapIter reverseBitmapContainerShortIterator
shortIter reverseIterator
runIter runReverseIterator16
bitmapIter reverseBitmapContainerShortIterator
}

// HasNext returns true if there are more integers to iterate over
Expand Down Expand Up @@ -434,9 +446,9 @@ type manyIntIterator struct {
iter manyIterable
highlowcontainer *roaringArray

shortIter shortIterator
runIter runIterator16
bitmapIter bitmapContainerManyIterator
shortIter shortIterator
runIter runIterator16
bitmapIter bitmapContainerManyIterator
}

func (ii *manyIntIterator) init() {
Expand Down Expand Up @@ -495,7 +507,6 @@ func (ii *manyIntIterator) NextMany64(hs64 uint64, buf []uint64) int {
return n
}


// ManyIntIterator is meant to allow you to iterate through the values of a bitmap, see Initialize(a *Bitmap)
type ManyIntIterator = manyIntIterator

Expand Down Expand Up @@ -569,7 +580,7 @@ func (rb *Bitmap) Iterate(cb func(x uint32) bool) {
// Iterator creates a new IntPeekable to iterate over the integers contained in the bitmap, in sorted order;
// the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove).
func (rb *Bitmap) Iterator() IntPeekable {
p := new(intIterator)
p := new(intIterator)
p.Initialize(rb)
return p
}
Expand Down
2 changes: 1 addition & 1 deletion roaring64/aggregation64_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ func TestParAggregations2(t *testing.T) {
orFunc := func(bitmaps ...*Bitmap) *Bitmap {
return ParOr(0, bitmaps...)
}
t.Run(fmt.Sprintf("par0"), func(t *testing.T) {
t.Run("par0", func(t *testing.T) {
testAggregations(t, nil, orFunc, nil)
})
}
Expand Down
44 changes: 21 additions & 23 deletions roaring64/bsi64.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,11 @@ func (b *BSI) GetCardinality() uint64 {

// BitCount returns the number of bits needed to represent values.
func (b *BSI) BitCount() int {

return len(b.bA)
}

// SetValue sets a value for a given columnID.
func (b *BSI) SetValue(columnID uint64, value int64) {

// If max/min values are set to zero then automatically determine bit array size
if b.MaxValue == 0 && b.MinValue == 0 {
minBits := bits.Len64(uint64(value))
Expand All @@ -96,36 +94,28 @@ func (b *BSI) SetValue(columnID uint64, value int64) {
}
}

var wg sync.WaitGroup

for i := 0; i < b.BitCount(); i++ {
wg.Add(1)
go func(j int) {
defer wg.Done()
if uint64(value)&(1<<uint64(j)) > 0 {
b.bA[j].Add(uint64(columnID))
} else {
b.bA[j].Remove(uint64(columnID))
}
}(i)
if uint64(value)&(1<<uint64(i)) > 0 {
b.bA[i].Add(columnID)
} else {
b.bA[i].Remove(columnID)
}
}
wg.Wait()
b.eBM.Add(uint64(columnID))
b.eBM.Add(columnID)
}

// GetValue gets the value at the column ID. Second param will be false for non-existent values.
func (b *BSI) GetValue(columnID uint64) (int64, bool) {
value := int64(0)
exists := b.eBM.Contains(uint64(columnID))
// GetValue gets the value at the column ID. Second param will be false for non-existent values.
func (b *BSI) GetValue(columnID uint64) (value int64, exists bool) {
exists = b.eBM.Contains(columnID)
if !exists {
return value, exists
return
}
for i := 0; i < b.BitCount(); i++ {
if b.bA[i].Contains(uint64(columnID)) {
value |= (1 << uint64(i))
if b.bA[i].Contains(columnID) {
value |= 1 << i
}
}
return int64(value), exists
return
}

type action func(t *task, batch []uint64, resultsChan chan *Bitmap, wg *sync.WaitGroup)
Expand Down Expand Up @@ -911,3 +901,11 @@ func (b *BSI) Equals(other *BSI) bool {
}
return true
}

func (b *BSI) GetSizeInBytes() int {
size := b.eBM.GetSizeInBytes()
for _, bm := range b.bA {
size += bm.GetSizeInBytes()
}
return int(size)
}
1 change: 0 additions & 1 deletion roaring64/bsi64_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"bytes"
"encoding/binary"
"fmt"
_ "fmt"
"io"
"io/ioutil"
"math/rand"
Expand Down
Loading

0 comments on commit d672d44

Please sign in to comment.