Skip to content

Commit

Permalink
Transform into BitmapContainer if the container gets bigger
Browse files Browse the repository at this point in the history
  • Loading branch information
cubicYYY committed Sep 1, 2024
1 parent 478a965 commit 195760f
Show file tree
Hide file tree
Showing 19 changed files with 428 additions and 250 deletions.
2 changes: 1 addition & 1 deletion examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
add_executable(example example.cpp)

# Include directory for the example program
target_include_directories(example PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ../lib)
target_include_directories(example PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ../include)
15 changes: 15 additions & 0 deletions examples/example.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,21 @@ int main() {

cout << a.count() << endl;
assert(a.count() == 4);
assert(a == a);

froaring::FlexibleRoaringBitmap<> x, y;
x.set(1);
x.set(2);
x.set(3);
x.set(114514);
x.set(1919810);
x.reset(2);
y.set(1);
y.set(3);
y.set(114514);
y.set(1919810);
assert(x == y);

cout << (a == a) << endl;
cout << sizeof(a) << endl;
cout << sizeof(std::vector<int>) << endl;
Expand Down
3 changes: 3 additions & 0 deletions include/api.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#pragma once

#include "froaring_api/equal.h"
2 changes: 1 addition & 1 deletion lib/array_container.h → include/array_container.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class ArrayContainer : public froaring_container_t {
}

ArrayContainer(SizeType capacity = ARRAY_CONTAINER_INIT_CAPACITY, SizeType size = 0)
: capacity(capacity),
: capacity(std::max(capacity, size)),
size(size),
vals(static_cast<IndexOrNumType*>(malloc(capacity * sizeof(IndexOrNumType)))) {
assert(vals && "Failed to allocate memory for ArrayContainer");
Expand Down
98 changes: 50 additions & 48 deletions lib/binsearch_index.h → include/binsearch_index.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#pragma once

#include "froaring_api/equal.h"
#include "handle.h"
#include "utils.h"
#include "transform.h"

namespace froaring {

Expand All @@ -28,6 +29,13 @@ class BinsearchIndex : public froaring_container_t {
using CTy = froaring::ContainerType;
using ContainerHandle = froaring::ContainerHandle<IndexType>;

/// Bit capacity for containers indexed
static constexpr size_t ContainerCapacity = (1 << DataBits);
/// Array threshold (Array will not be optimum for storing more
/// elements)
static constexpr size_t ArrayToBitmapCountThreshold = ContainerCapacity / DataBits;
/// RLE threshold (RLE will not be optimum for more runs)
static constexpr size_t RleToBitmapRunThreshold = ContainerCapacity / (DataBits * 2);
public:

BinsearchIndex(SizeType capacity = CONTAINERS_INIT_CAPACITY, SizeType size = 0)
Expand All @@ -37,10 +45,29 @@ class BinsearchIndex : public froaring_container_t {
assert(containers && "Failed to allocate memory for containers");
}

void debug_print() const {
for (SizeType i = 0; i < size; ++i) {
std::cout << "Index: " << containers[i].index << " Type: " << static_cast<int>(containers[i].type) << std::endl;
switch (containers[i].type) {
case CTy::RLE:
CAST_TO_RLE(containers[i].ptr)->debug_print();
break;
case CTy::Array:
CAST_TO_ARRAY(containers[i].ptr)->debug_print();
break;
case CTy::Bitmap:
CAST_TO_BITMAP(containers[i].ptr)->debug_print();
break;
default:
FROARING_UNREACHABLE
}
}
}

/// Return the entry position if found, otherwise the iterator points to the
/// lower bound (to be inserted at).
///
SizeType getContainerPosByIndex(IndexType index) const {
SizeType lower_bound(IndexType index) const {
SizeType left = 0;
SizeType right = size;

Expand Down Expand Up @@ -72,10 +99,9 @@ class BinsearchIndex : public froaring_container_t {
can_fit_t<DataBits> data;
num2index_n_data<IndexBits, DataBits>(value, index, data);

SizeType entry_pos = getContainerPosByIndex(index);
SizeType entry_pos = lower_bound(index);

if (entry_pos == size) return false;
if (containers[entry_pos].index != index) return false;
if (entry_pos == size || containers[entry_pos].index != index) return false;

// Now we found the corresponding container
switch (containers[entry_pos].type) {
Expand All @@ -97,50 +123,41 @@ class BinsearchIndex : public froaring_container_t {
can_fit_t<DataBits> data;
num2index_n_data<IndexBits, DataBits>(value, index, data);

SizeType pos = getContainerPosByIndex(index);

// Not found, insert a new container at the end:
if (pos == size) {
if (size == capacity) {
expand();
}

auto array_ptr = new ArraySized(ARRAY_CONTAINER_INIT_CAPACITY, 1);
array_ptr->vals[0] = data;
containers[size] = ContainerHandle(array_ptr, CTy::Array, index);
size++;
return;
}
SizeType pos = lower_bound(index);

// Not found, insert a new container in the middle:
if (pos < size && containers[pos].index != index) {
// Not found, insert a new container:
if (pos == size || containers[pos].index != index) {
if (size == capacity) {
expand();
}

std::memmove(&containers[pos + 2], &containers[pos + 1], (size - pos - 1) * sizeof(ContainerHandle));
std::memmove(&containers[pos + 1], &containers[pos], (size - pos) * sizeof(ContainerHandle));
auto array_ptr = new ArraySized(ARRAY_CONTAINER_INIT_CAPACITY, 1);
array_ptr->vals[0] = data;
containers[pos + 1] = ContainerHandle(array_ptr, CTy::Array, index);
containers[pos] = ContainerHandle(array_ptr, CTy::Array, index);
size++;
return;
}

ContainerHandle& entry = containers[pos];
assert(entry.index == index && "??? Wrong container found or created");

// Now we found the corresponding container
switch (entry.type) {
switch (containers[pos].type) {
case CTy::RLE: {
CAST_TO_RLE(entry.ptr)->set(data);
CAST_TO_RLE(containers[pos].ptr)->set(data);
break;
}
case CTy::Array: {
CAST_TO_ARRAY(entry.ptr)->set(data);
auto array_ptr = CAST_TO_ARRAY(containers[pos].ptr);
array_ptr->set(data);
// Transform into a bitmap container if it gets bigger
if (array_ptr->size >= ArrayToBitmapCountThreshold) {
auto new_bitmap = froaring_array_to_bitmap<WordType, DataBits>(array_ptr);
delete containers[pos].ptr;
containers[pos].ptr = new_bitmap;
containers[pos].type = CTy::Bitmap;
}
break;
}
case CTy::Bitmap: {
CAST_TO_BITMAP(entry.ptr)->set(data);
CAST_TO_BITMAP(containers[pos].ptr)->set(data);
break;
}
default:
Expand Down Expand Up @@ -175,7 +192,7 @@ class BinsearchIndex : public froaring_container_t {
can_fit_t<DataBits> data;
num2index_n_data<IndexBits, DataBits>(value, index, data);

SizeType pos = getContainerPosByIndex(index);
SizeType pos = lower_bound(index);
if (pos == size || containers[pos].index != index) { // not found: return directly
return;
}
Expand All @@ -187,7 +204,7 @@ class BinsearchIndex : public froaring_container_t {
switch (entry.type) {
case CTy::RLE: {
CAST_TO_RLE(entry.ptr)->reset(data);
if (CAST_TO_RLE(entry.ptr)->size == 0) {
if (CAST_TO_RLE(entry.ptr)->run_count == 0) {
delete entry.ptr;
if (size > 1) {
std::memmove(&containers[pos], &containers[pos + 1],
Expand Down Expand Up @@ -226,21 +243,6 @@ class BinsearchIndex : public froaring_container_t {
}
}

bool operator==(const BinsearchIndex& other) const {
if (size != other.size) return false;
for (SizeType i = 0; i < size; ++i) { // quick check
if (containers[i].index != other.containers[i].index) {
return false;
}
}
for (SizeType i = 0; i < size; ++i) {
auto res = froaring_equal(containers[i].ptr, other.containers[i].ptr, containers[i].type,
other.containers[i].type);
if (!res) return false;
}
return true;
}


// Release all containers
~BinsearchIndex() {
Expand Down
42 changes: 37 additions & 5 deletions lib/bitmap_container.h → include/bitmap_container.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@ class BitmapContainer : public froaring_container_t {
public:
static constexpr size_t BitsPerWord = 8 * sizeof(WordType);
static constexpr size_t TotalBits = (1 << DataBits);
static constexpr size_t WordCount = (TotalBits + BitsPerWord - 1) / BitsPerWord; // ceiling
static constexpr size_t WordsCount = (TotalBits + BitsPerWord - 1) / BitsPerWord; // ceiling
using NumType = froaring::can_fit_t<DataBits>;
using IndexType = froaring::can_fit_t<DataBits - BitsPerWord>;
using SizeType = froaring::can_fit_t<DataBits + 1>;
static constexpr WordType IndexInsideWordMask = (1 << cexpr_log2(BitsPerWord)) - 1;

public:
BitmapContainer() { memset(words, 0, sizeof(words)); }
Expand All @@ -27,17 +28,17 @@ class BitmapContainer : public froaring_container_t {
static void destroy(BitmapContainer* obj) { delete (obj); }

void debug_print() const {
for (size_t i = 0; i < WordCount; ++i) {
for (size_t i = 0; i < WordsCount; ++i) {
// std::cout << words[i] << " ";
WordType w = words[i];
WordType t = w & (~w + 1);
WordType r = i * sizeof(WordType) + std::countr_zero(w);
auto r = i * sizeof(WordType) + std::countr_zero(w);
std::cout << r << " ";
}
std::cout << std::endl;
}

void clear() { std::memset(words.data(), 0, WordCount * sizeof(WordType)); }
void clear() { std::memset(words.data(), 0, WordsCount * sizeof(WordType)); }

void set(NumType index) { words[index / BitsPerWord] |= ((WordType)1 << (index % BitsPerWord)); }

Expand All @@ -52,6 +53,37 @@ class BitmapContainer : public froaring_container_t {

void reset(NumType index) { words[index / BitsPerWord] &= ~((WordType)1 << (index % BitsPerWord)); }

bool containesRange(IndexType start, IndexType end) const {
if (start >= end) return true;
constexpr WordType low_bits_mask = (1 << BitsPerWord) - 1;
constexpr WordType full_1_mask = (1 << BitsPerWord) - 1;
const IndexType start_word = start / BitsPerWord;
const IndexType end_word = end / BitsPerWord;

// All "1" from `start` to MSB
const WordType first_mask = ~(((SizeType)1 << (start & IndexInsideWordMask)) - 1);
// All "1" from LSB to `end`
const WordType last_mask = ((SizeType)1 << (end & IndexInsideWordMask)) - 1;

if (start_word == end_word) {
return ((words[end_word] & first_mask & last_mask) == (first_mask & last_mask));
}

if (start_word >= WordsCount || (words[start_word] & first_mask) != first_mask) {
return false;
}
if (end_word >= WordsCount || (words[end_word] & last_mask) != last_mask) {
return false;
}

for (IndexType i = start_word + 1; i < end_word; ++i) {
if (words[i] != full_1_mask) {
return false;
}
}

return true;
}
SizeType cardinality() const {
SizeType count = 0;
for (const auto& word : words) {
Expand All @@ -61,6 +93,6 @@ class BitmapContainer : public froaring_container_t {
}

public:
WordType words[WordCount];
WordType words[WordsCount];
};
} // namespace froaring
Loading

0 comments on commit 195760f

Please sign in to comment.