From 195760fecc89381014344d55c30161fdd8604de9 Mon Sep 17 00:00:00 2001 From: cubicYYY Date: Sun, 1 Sep 2024 22:25:42 +0800 Subject: [PATCH] Transform into BitmapContainer if the container gets bigger --- examples/CMakeLists.txt | 2 +- examples/example.cpp | 15 +++ include/api.h | 3 + {lib => include}/array_container.h | 2 +- {lib => include}/binsearch_index.h | 98 +++++++++--------- {lib => include}/bitmap_container.h | 42 +++++++- {lib => include}/froaring.h | 80 ++++++++++---- include/froaring_api/equal.h | 143 ++++++++++++++++++++++++++ {lib => include}/froaring_api/reset.h | 0 {lib => include}/froaring_api/set.h | 0 {lib => include}/froaring_api/test.h | 0 {lib => include}/handle.h | 1 + {lib => include}/prelude.h | 9 ++ {lib => include}/rle_container.h | 101 +++++++++--------- include/transform.h | 37 +++++++ {lib => include}/utils.h | 3 + lib/froaring_api/equal.h | 122 ---------------------- tests/CMakeLists.txt | 2 +- tests/integration_test.cpp | 18 +++- 19 files changed, 428 insertions(+), 250 deletions(-) create mode 100644 include/api.h rename {lib => include}/array_container.h (98%) rename {lib => include}/binsearch_index.h (76%) rename {lib => include}/bitmap_container.h (52%) rename {lib => include}/froaring.h (70%) create mode 100644 include/froaring_api/equal.h rename {lib => include}/froaring_api/reset.h (100%) rename {lib => include}/froaring_api/set.h (100%) rename {lib => include}/froaring_api/test.h (100%) rename {lib => include}/handle.h (95%) rename {lib => include}/prelude.h (92%) rename {lib => include}/rle_container.h (54%) create mode 100644 include/transform.h rename {lib => include}/utils.h (63%) delete mode 100644 lib/froaring_api/equal.h diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 611182e..d4b1c81 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -2,4 +2,4 @@ add_executable(example example.cpp) # Include directory for the example program -target_include_directories(example PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ../lib) +target_include_directories(example PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ../include) diff --git a/examples/example.cpp b/examples/example.cpp index bff15b0..3ec2515 100644 --- a/examples/example.cpp +++ b/examples/example.cpp @@ -33,6 +33,21 @@ int main() { cout << a.count() << endl; assert(a.count() == 4); + assert(a == a); + + froaring::FlexibleRoaringBitmap<> x, y; + x.set(1); + x.set(2); + x.set(3); + x.set(114514); + x.set(1919810); + x.reset(2); + y.set(1); + y.set(3); + y.set(114514); + y.set(1919810); + assert(x == y); + cout << (a == a) << endl; cout << sizeof(a) << endl; cout << sizeof(std::vector) << endl; diff --git a/include/api.h b/include/api.h new file mode 100644 index 0000000..ef5f0c4 --- /dev/null +++ b/include/api.h @@ -0,0 +1,3 @@ +#pragma once + +#include "froaring_api/equal.h" \ No newline at end of file diff --git a/lib/array_container.h b/include/array_container.h similarity index 98% rename from lib/array_container.h rename to include/array_container.h index 6b08b37..46b47bf 100644 --- a/lib/array_container.h +++ b/include/array_container.h @@ -24,7 +24,7 @@ class ArrayContainer : public froaring_container_t { } ArrayContainer(SizeType capacity = ARRAY_CONTAINER_INIT_CAPACITY, SizeType size = 0) - : capacity(capacity), + : capacity(std::max(capacity, size)), size(size), vals(static_cast(malloc(capacity * sizeof(IndexOrNumType)))) { assert(vals && "Failed to allocate memory for ArrayContainer"); diff --git a/lib/binsearch_index.h b/include/binsearch_index.h similarity index 76% rename from lib/binsearch_index.h rename to include/binsearch_index.h index 7addbbd..366cf30 100644 --- a/lib/binsearch_index.h +++ b/include/binsearch_index.h @@ -1,7 +1,8 @@ #pragma once -#include "froaring_api/equal.h" #include "handle.h" +#include "utils.h" +#include "transform.h" namespace froaring { @@ -28,6 +29,13 @@ class BinsearchIndex : public froaring_container_t { using CTy = froaring::ContainerType; using ContainerHandle = froaring::ContainerHandle; + /// Bit capacity for containers indexed + static constexpr size_t ContainerCapacity = (1 << DataBits); + /// Array threshold (Array will not be optimum for storing more + /// elements) + static constexpr size_t ArrayToBitmapCountThreshold = ContainerCapacity / DataBits; + /// RLE threshold (RLE will not be optimum for more runs) + static constexpr size_t RleToBitmapRunThreshold = ContainerCapacity / (DataBits * 2); public: BinsearchIndex(SizeType capacity = CONTAINERS_INIT_CAPACITY, SizeType size = 0) @@ -37,10 +45,29 @@ class BinsearchIndex : public froaring_container_t { assert(containers && "Failed to allocate memory for containers"); } + void debug_print() const { + for (SizeType i = 0; i < size; ++i) { + std::cout << "Index: " << containers[i].index << " Type: " << static_cast(containers[i].type) << std::endl; + switch (containers[i].type) { + case CTy::RLE: + CAST_TO_RLE(containers[i].ptr)->debug_print(); + break; + case CTy::Array: + CAST_TO_ARRAY(containers[i].ptr)->debug_print(); + break; + case CTy::Bitmap: + CAST_TO_BITMAP(containers[i].ptr)->debug_print(); + break; + default: + FROARING_UNREACHABLE + } + } + } + /// Return the entry position if found, otherwise the iterator points to the /// lower bound (to be inserted at). /// - SizeType getContainerPosByIndex(IndexType index) const { + SizeType lower_bound(IndexType index) const { SizeType left = 0; SizeType right = size; @@ -72,10 +99,9 @@ class BinsearchIndex : public froaring_container_t { can_fit_t data; num2index_n_data(value, index, data); - SizeType entry_pos = getContainerPosByIndex(index); + SizeType entry_pos = lower_bound(index); - if (entry_pos == size) return false; - if (containers[entry_pos].index != index) return false; + if (entry_pos == size || containers[entry_pos].index != index) return false; // Now we found the corresponding container switch (containers[entry_pos].type) { @@ -97,50 +123,41 @@ class BinsearchIndex : public froaring_container_t { can_fit_t data; num2index_n_data(value, index, data); - SizeType pos = getContainerPosByIndex(index); - - // Not found, insert a new container at the end: - if (pos == size) { - if (size == capacity) { - expand(); - } - - auto array_ptr = new ArraySized(ARRAY_CONTAINER_INIT_CAPACITY, 1); - array_ptr->vals[0] = data; - containers[size] = ContainerHandle(array_ptr, CTy::Array, index); - size++; - return; - } + SizeType pos = lower_bound(index); - // Not found, insert a new container in the middle: - if (pos < size && containers[pos].index != index) { + // Not found, insert a new container: + if (pos == size || containers[pos].index != index) { if (size == capacity) { expand(); } - - std::memmove(&containers[pos + 2], &containers[pos + 1], (size - pos - 1) * sizeof(ContainerHandle)); + std::memmove(&containers[pos + 1], &containers[pos], (size - pos) * sizeof(ContainerHandle)); auto array_ptr = new ArraySized(ARRAY_CONTAINER_INIT_CAPACITY, 1); array_ptr->vals[0] = data; - containers[pos + 1] = ContainerHandle(array_ptr, CTy::Array, index); + containers[pos] = ContainerHandle(array_ptr, CTy::Array, index); size++; return; } - ContainerHandle& entry = containers[pos]; - assert(entry.index == index && "??? Wrong container found or created"); - // Now we found the corresponding container - switch (entry.type) { + switch (containers[pos].type) { case CTy::RLE: { - CAST_TO_RLE(entry.ptr)->set(data); + CAST_TO_RLE(containers[pos].ptr)->set(data); break; } case CTy::Array: { - CAST_TO_ARRAY(entry.ptr)->set(data); + auto array_ptr = CAST_TO_ARRAY(containers[pos].ptr); + array_ptr->set(data); + // Transform into a bitmap container if it gets bigger + if (array_ptr->size >= ArrayToBitmapCountThreshold) { + auto new_bitmap = froaring_array_to_bitmap(array_ptr); + delete containers[pos].ptr; + containers[pos].ptr = new_bitmap; + containers[pos].type = CTy::Bitmap; + } break; } case CTy::Bitmap: { - CAST_TO_BITMAP(entry.ptr)->set(data); + CAST_TO_BITMAP(containers[pos].ptr)->set(data); break; } default: @@ -175,7 +192,7 @@ class BinsearchIndex : public froaring_container_t { can_fit_t data; num2index_n_data(value, index, data); - SizeType pos = getContainerPosByIndex(index); + SizeType pos = lower_bound(index); if (pos == size || containers[pos].index != index) { // not found: return directly return; } @@ -187,7 +204,7 @@ class BinsearchIndex : public froaring_container_t { switch (entry.type) { case CTy::RLE: { CAST_TO_RLE(entry.ptr)->reset(data); - if (CAST_TO_RLE(entry.ptr)->size == 0) { + if (CAST_TO_RLE(entry.ptr)->run_count == 0) { delete entry.ptr; if (size > 1) { std::memmove(&containers[pos], &containers[pos + 1], @@ -226,21 +243,6 @@ class BinsearchIndex : public froaring_container_t { } } - bool operator==(const BinsearchIndex& other) const { - if (size != other.size) return false; - for (SizeType i = 0; i < size; ++i) { // quick check - if (containers[i].index != other.containers[i].index) { - return false; - } - } - for (SizeType i = 0; i < size; ++i) { - auto res = froaring_equal(containers[i].ptr, other.containers[i].ptr, containers[i].type, - other.containers[i].type); - if (!res) return false; - } - return true; - } - // Release all containers ~BinsearchIndex() { diff --git a/lib/bitmap_container.h b/include/bitmap_container.h similarity index 52% rename from lib/bitmap_container.h rename to include/bitmap_container.h index bbe96ef..25007a4 100644 --- a/lib/bitmap_container.h +++ b/include/bitmap_container.h @@ -13,10 +13,11 @@ class BitmapContainer : public froaring_container_t { public: static constexpr size_t BitsPerWord = 8 * sizeof(WordType); static constexpr size_t TotalBits = (1 << DataBits); - static constexpr size_t WordCount = (TotalBits + BitsPerWord - 1) / BitsPerWord; // ceiling + static constexpr size_t WordsCount = (TotalBits + BitsPerWord - 1) / BitsPerWord; // ceiling using NumType = froaring::can_fit_t; using IndexType = froaring::can_fit_t; using SizeType = froaring::can_fit_t; + static constexpr WordType IndexInsideWordMask = (1 << cexpr_log2(BitsPerWord)) - 1; public: BitmapContainer() { memset(words, 0, sizeof(words)); } @@ -27,17 +28,17 @@ class BitmapContainer : public froaring_container_t { static void destroy(BitmapContainer* obj) { delete (obj); } void debug_print() const { - for (size_t i = 0; i < WordCount; ++i) { + for (size_t i = 0; i < WordsCount; ++i) { // std::cout << words[i] << " "; WordType w = words[i]; WordType t = w & (~w + 1); - WordType r = i * sizeof(WordType) + std::countr_zero(w); + auto r = i * sizeof(WordType) + std::countr_zero(w); std::cout << r << " "; } std::cout << std::endl; } - void clear() { std::memset(words.data(), 0, WordCount * sizeof(WordType)); } + void clear() { std::memset(words.data(), 0, WordsCount * sizeof(WordType)); } void set(NumType index) { words[index / BitsPerWord] |= ((WordType)1 << (index % BitsPerWord)); } @@ -52,6 +53,37 @@ class BitmapContainer : public froaring_container_t { void reset(NumType index) { words[index / BitsPerWord] &= ~((WordType)1 << (index % BitsPerWord)); } + bool containesRange(IndexType start, IndexType end) const { + if (start >= end) return true; + constexpr WordType low_bits_mask = (1 << BitsPerWord) - 1; + constexpr WordType full_1_mask = (1 << BitsPerWord) - 1; + const IndexType start_word = start / BitsPerWord; + const IndexType end_word = end / BitsPerWord; + + // All "1" from `start` to MSB + const WordType first_mask = ~(((SizeType)1 << (start & IndexInsideWordMask)) - 1); + // All "1" from LSB to `end` + const WordType last_mask = ((SizeType)1 << (end & IndexInsideWordMask)) - 1; + + if (start_word == end_word) { + return ((words[end_word] & first_mask & last_mask) == (first_mask & last_mask)); + } + + if (start_word >= WordsCount || (words[start_word] & first_mask) != first_mask) { + return false; + } + if (end_word >= WordsCount || (words[end_word] & last_mask) != last_mask) { + return false; + } + + for (IndexType i = start_word + 1; i < end_word; ++i) { + if (words[i] != full_1_mask) { + return false; + } + } + + return true; + } SizeType cardinality() const { SizeType count = 0; for (const auto& word : words) { @@ -61,6 +93,6 @@ class BitmapContainer : public froaring_container_t { } public: - WordType words[WordCount]; + WordType words[WordsCount]; }; } // namespace froaring \ No newline at end of file diff --git a/lib/froaring.h b/include/froaring.h similarity index 70% rename from lib/froaring.h rename to include/froaring.h index eb51c30..789c5e5 100644 --- a/lib/froaring.h +++ b/include/froaring.h @@ -11,6 +11,8 @@ #include "bitmap_container.h" #include "prelude.h" #include "rle_container.h" +#include "utils.h" +#include "api.h" namespace froaring { /// @brief A flexible Roaring bitmap consists with a binary-search-indexed @@ -23,14 +25,6 @@ namespace froaring { /// @tparam DataBits low bits to be stored in containers. template class FlexibleRoaringBitmap { - /// Bit capacity for containers indexed - static constexpr size_t ContainerCapacity = (1 << DataBits); - /// Array threshold (Array will not be optimum for storing more - /// elements) - static constexpr size_t ArrayToBitmapCountThreshold = ContainerCapacity / DataBits; - /// RLE threshold (RLE will not be optimum for more runs) - static constexpr size_t RleToBitmapRunThreshold = ContainerCapacity / (DataBits * 2); - /// The container type for the index layer. Editable. // TODO: Support more index layers with maybe different types (maybe by Curiously Recurring Template Pattern) using ContainersSized = BinsearchIndex; @@ -69,7 +63,32 @@ class FlexibleRoaringBitmap { FROARING_UNREACHABLE } } + + void debug_print() { + switch (handle.type) { + case CTy::Array: + CAST_TO_ARRAY(handle.ptr)->debug_print(); + break; + case CTy::Bitmap: + CAST_TO_BITMAP(handle.ptr)->debug_print(); + break; + case CTy::RLE: + CAST_TO_RLE(handle.ptr)->debug_print(); + break; + case CTy::Containers: + castToContainers(handle.ptr)->debug_print(); + break; + default: + FROARING_UNREACHABLE + } + } + void set(WordType num) { + if (handle.type == CTy::Containers) { + castToContainers(handle.ptr)->set(num); + return; + } + can_fit_t index; can_fit_t data; num2index_n_data(num, index, data); @@ -81,11 +100,12 @@ class FlexibleRoaringBitmap { CAST_TO_ARRAY(handle.ptr)->set(data); return; } - if (handle.type != CTy::Containers && handle.index != index) { // Single container, and is set: + if (handle.index != index) { // Single container, and is set: switchToContainers(); castToContainers(handle.ptr)->set(num); return; } + switch (handle.type) { case CTy::Array: CAST_TO_ARRAY(handle.ptr)->set(data); @@ -96,9 +116,6 @@ class FlexibleRoaringBitmap { case CTy::RLE: CAST_TO_RLE(handle.ptr)->set(data); break; - case CTy::Containers: - castToContainers(handle.ptr)->set(num); - break; default: FROARING_UNREACHABLE } @@ -109,11 +126,15 @@ class FlexibleRoaringBitmap { return false; } + if (handle.type == CTy::Containers) { + return castToContainers(handle.ptr)->test(num); + } + can_fit_t index; can_fit_t data; num2index_n_data(num, index, data); - if (handle.type != CTy::Containers && handle.index != index) { // Single container, and is set: + if (handle.index != index) { // Single container, and is set: return false; } switch (handle.type) { @@ -136,11 +157,15 @@ class FlexibleRoaringBitmap { if (!was_set()) { return; } + if (handle.type == CTy::Containers) { + castToContainers(handle.ptr)->reset(num); + return; + } can_fit_t index; can_fit_t data; num2index_n_data(num, index, data); - if (handle.type != CTy::Containers && handle.index != index) { // Single container, and is set: + if (handle.index != index) { // Single container, and is set: return; } @@ -183,8 +208,23 @@ class FlexibleRoaringBitmap { } bool operator==(const FlexibleRoaringBitmap& other) const { - // TODO: - return true; + if (!was_set()) { + return (other.count() == 0); + } + if (handle.type == CTy::Containers && other.handle.type == CTy::Containers) { + return froaring_equal_bsbs(castToContainers(handle.ptr), castToContainers(other.handle.ptr)); + } + if (handle.type == CTy::Containers) { // the other is not containers + const ContainerHandle& lhs = castToContainers(handle.ptr)->containers[0]; + const ContainerHandle& rhs = other.handle; + return froaring_equal(lhs.ptr, rhs.ptr, lhs.type, rhs.type); + } + if (other.handle.type == CTy::Containers) { // other is not containers + const ContainerHandle& lhs = castToContainers(other.handle.ptr)->containers[0]; + const ContainerHandle& rhs = other.handle; + return froaring_equal(lhs.ptr, rhs.ptr, lhs.type, rhs.type); + } + return froaring_equal(handle.ptr, other.handle.ptr, handle.type, other.handle.type); } /// @brief Called when the current container exceeds the block size: @@ -195,16 +235,18 @@ class FlexibleRoaringBitmap { ContainersSized* containers = new ContainersSized(CONTAINERS_INIT_CAPACITY, 1); // TODO: do not modify the containers directly containers->containers[0] = std::move(handle); - handle = ContainerHandle(containers, CTy::Containers, -1); } bool was_set() const { return flag & WAS_SET; } private: - inline ContainersSized* castToContainers(froaring_container_t* p) { return static_cast(p); } + ContainersSized* castToContainers(froaring_container_t* p) { return static_cast(p); } + froaring_container_t* castToFroaring(ContainersSized* p) { return static_cast(p); } + const ContainersSized* castToContainers(const froaring_container_t* p) { return static_cast(p); } + const froaring_container_t* castToFroaring(const ContainersSized* p) { return static_cast(p); } - inline const ContainersSized* castToContainers(const froaring_container_t* p) const { + const ContainersSized* castToContainers(const froaring_container_t* p) const { return static_cast(p); } diff --git a/include/froaring_api/equal.h b/include/froaring_api/equal.h new file mode 100644 index 0000000..37e99ce --- /dev/null +++ b/include/froaring_api/equal.h @@ -0,0 +1,143 @@ +#pragma once +// TODO: pointer to referece +// TODO: to const pointers +#include + +#include "utils.h" + +namespace froaring { +using CTy = froaring::ContainerType; +template +bool froaring_equal_bb(const BitmapContainer* a, const BitmapContainer* b) { + for (auto i = 0; i < a->WordsCount; ++i) { + if (a->words[i] != b->words[i]) return false; + } + return true; +} +template +bool froaring_equal_aa(const ArrayContainer* a, const ArrayContainer* b) { + if (a->size != b->size) return false; + for (auto i = 0; i < a->size; ++i) { + if (a->vals[i] != b->vals[i]) return false; + } + return true; +} +template +bool froaring_equal_rr(const RLEContainer* a, const RLEContainer* b) { + if (a->run_count != b->run_count) return false; + for (auto i = 0; i < a->run_count; ++i) { + if (a->runs[i].first != b->runs[i].first || a->runs[i].second != b->runs[i].second) return false; + } + return true; +} +template +bool froaring_equal_ar(const ArrayContainer* a, const RLEContainer* b) { + if (a->size > b->run_count) return false; + if (a->cardinality() != b->cardinality()) return false; + size_t pos = 0; + for (size_t i = 0; i < b->run_count; ++i) { + if (pos >= a->size) return false; + if (a->vals[pos] != b->runs[i].first) return false; + if (pos + (b->runs[i].second - b->runs[i].first) >= a->size) return false; + if (a->vals[pos + (b->runs[i].second - b->runs[i].first)] != b->runs[i].second) return false; + pos += (b->runs[i].second - b->runs[i].first) + 1; + } + return true; +} +template +bool froaring_equal_br(const BitmapContainer* a, const RLEContainer* b) { + // TODO: + return false; +} + +template +bool froaring_equal_ba(const BitmapContainer* a, const ArrayContainer* b) { + if (a->cardinality() != b->cardinality()) { + return false; + } + + typename BitmapContainer::SizeType pos = 0; + for (auto i = 0; i < a->WordsCount; ++i) { + WordType w = a->words[i]; + while (w != 0) { + if (pos >= b->cardinality()) { + return false; + } + WordType t = w & (~w + 1); + WordType r = i * sizeof(WordType) + std::countr_zero(w); + if (b->vals[pos] != r) { + return false; + } + ++pos; + w ^= t; + } + } + return (pos == b->cardinality()); +} + +template +bool froaring_equal_bsbs(const BinsearchIndex* a, + const BinsearchIndex* b) { + if (a->size != b->size) { + return false; + } + + for (size_t i = 0; i < a->size; ++i) { // quick check + if (a->containers[i].index != b->containers[i].index) { + return false; + } + } + for (size_t i = 0; i < a->size; ++i) { + auto res = + froaring_equal(a->containers[i].ptr, b->containers[i].ptr, a->containers[i].type, b->containers[i].type); + if (!res) { + return false; + } + } + return true; +} + +template +bool froaring_equal(const froaring_container_t* a, const froaring_container_t* b, CTy ta, CTy tb) { + switch (CTYPE_PAIR(ta, tb)) { + case CTYPE_PAIR(CTy::Bitmap, CTy::Bitmap): { + return froaring_equal_bb(CAST_TO_BITMAP_CONST(a), CAST_TO_BITMAP_CONST(b)); + break; + } + case CTYPE_PAIR(CTy::Array, CTy::Array): { + return froaring_equal_aa(CAST_TO_ARRAY_CONST(a), CAST_TO_ARRAY_CONST(b)); + break; + } + case CTYPE_PAIR(CTy::RLE, CTy::RLE): { + return froaring_equal_rr(CAST_TO_RLE_CONST(a), CAST_TO_RLE_CONST(b)); + break; + } + case CTYPE_PAIR(CTy::Bitmap, CTy::Array): { + return froaring_equal_ba(CAST_TO_BITMAP_CONST(a), CAST_TO_ARRAY_CONST(b)); + break; + } + case CTYPE_PAIR(CTy::Array, CTy::Bitmap): { + return froaring_equal_ba(CAST_TO_BITMAP_CONST(b), CAST_TO_ARRAY_CONST(a)); + break; + } + case CTYPE_PAIR(CTy::Bitmap, CTy::RLE): { + return froaring_equal_br(CAST_TO_BITMAP_CONST(a), CAST_TO_RLE_CONST(b)); + break; + } + case CTYPE_PAIR(CTy::RLE, CTy::Bitmap): { + return froaring_equal_br(CAST_TO_BITMAP_CONST(b), CAST_TO_RLE_CONST(a)); + break; + } + case CTYPE_PAIR(CTy::Array, CTy::RLE): { + return froaring_equal_ar(CAST_TO_ARRAY_CONST(a), CAST_TO_RLE_CONST(b)); + break; + } + case CTYPE_PAIR(CTy::RLE, CTy::Array): { + return froaring_equal_ar(CAST_TO_ARRAY_CONST(b), CAST_TO_RLE_CONST(a)); + break; + } + default: + FROARING_UNREACHABLE + } +} +} // namespace froaring \ No newline at end of file diff --git a/lib/froaring_api/reset.h b/include/froaring_api/reset.h similarity index 100% rename from lib/froaring_api/reset.h rename to include/froaring_api/reset.h diff --git a/lib/froaring_api/set.h b/include/froaring_api/set.h similarity index 100% rename from lib/froaring_api/set.h rename to include/froaring_api/set.h diff --git a/lib/froaring_api/test.h b/include/froaring_api/test.h similarity index 100% rename from lib/froaring_api/test.h rename to include/froaring_api/test.h diff --git a/lib/handle.h b/include/handle.h similarity index 95% rename from lib/handle.h rename to include/handle.h index 952e5fd..15f670c 100644 --- a/lib/handle.h +++ b/include/handle.h @@ -15,6 +15,7 @@ struct ContainerHandle { IndexType index = 0; // Index for this container public: ContainerHandle() = default; + ContainerHandle(ContainerHandle&&) = default; ContainerHandle& operator=(ContainerHandle&&) = default; ContainerHandle(const ContainerHandle&) = delete; diff --git a/lib/prelude.h b/include/prelude.h similarity index 92% rename from lib/prelude.h rename to include/prelude.h index 10a5917..f6b3dee 100644 --- a/lib/prelude.h +++ b/include/prelude.h @@ -2,6 +2,7 @@ #include #include +#include #define WAS_SET 0x1 #define FROARING_UNREACHABLE assert(false && "Should never reach here"); @@ -75,4 +76,12 @@ void num2index_n_data(can_fit_t value, can_fit_t> DataBits; } +/// @brief Get log2(x) floored at compile time. +/// @example cexpr_log2(8) = 3, cexpr_log2(9) = 3, cexpr_log2(16) = 4 +template +constexpr T cexpr_log2(T x) +{ + return x == 1 ? 0 : 1+cexpr_log2(x >> 1); +} + } // namespace froaring \ No newline at end of file diff --git a/lib/rle_container.h b/include/rle_container.h similarity index 54% rename from lib/rle_container.h rename to include/rle_container.h index 67a40cb..533bc2c 100644 --- a/lib/rle_container.h +++ b/include/rle_container.h @@ -20,8 +20,8 @@ class RLEContainer : public froaring_container_t { // [5,6,7,8,9,10] public: - RLEContainer(SizeType capacity = RLE_CONTAINER_INIT_CAPACITY, SizeType size = 0) - : capacity(capacity), size(size), runs(static_cast(malloc(capacity * sizeof(RunPair)))) { + RLEContainer(SizeType capacity = RLE_CONTAINER_INIT_CAPACITY, SizeType run_count = 0) + : capacity(std::max(capacity, run_count)), run_count(run_count), runs(static_cast(malloc(capacity * sizeof(RunPair)))) { assert(runs && "Failed to allocate memory for RLEContainer"); } @@ -31,65 +31,64 @@ class RLEContainer : public froaring_container_t { RLEContainer& operator=(const RLEContainer&) = delete; void debug_print() const { - for (size_t i = 0; i < size; ++i) { + for (size_t i = 0; i < run_count; ++i) { std::cout << "[" << int(runs[i].first) << "," << int(runs[i].second) << "] "; } std::cout << std::endl; } - void clear() { size = 0; } + void clear() { run_count = 0; } /// @brief Set a bit of the container. /// @param num The bit to set. void set(IndexType num) { - if (!size) { + if (!run_count) { runs[0] = {num, num}; - size = 1; + run_count = 1; return; } - auto pos = upper_bound(num); - if (pos < size && runs[pos].first <= num && num <= runs[pos].second) { + auto pos = lower_bound(num); + if (pos < run_count && runs[pos].first <= num && num <= runs[pos].second) { return; // already set, do nothing. } set_raw(pos, num); } void reset(IndexType num) { - if (!size) return; - auto pos = upper_bound(num); + if (!run_count) return; + auto pos = lower_bound(num); + if (pos == run_count || runs[pos].first > num || runs[pos].second < num) return; auto old_end = runs[pos].second; - if (runs[pos].first > num || runs[pos].second < num) return; - if (runs[pos].first == num && runs[pos].second == num) { // run is a single element, just remove it - if (pos < size) memmove(&runs[pos], &runs[pos + 1], (size - pos - 1) * sizeof(RunPair)); - --size; + if (pos < run_count) memmove(&runs[pos], &runs[pos + 1], (run_count - pos - 1) * sizeof(RunPair)); + --run_count; } else if (runs[pos].first == num) { runs[pos].first++; } else if (runs[pos].second == num) { runs[pos].second--; } else { // split the run [a,b] into: [a, num-1] and [num+1, b] - if (size == capacity) expand(); - if (pos < size) std::memmove(&runs[pos + 2], &runs[pos + 1], (size - pos - 1) * sizeof(RunPair)); - size++; + if (run_count == capacity) expand(); + std::memmove(&runs[pos + 1], &runs[pos], (run_count - pos) * sizeof(RunPair)); + run_count++; runs[pos].second = num - 1; - runs[pos + 1] = {num + 1, old_end}; + runs[pos + 1].first = num + 1; } } bool test(IndexType num) const { - if (!size) return false; - auto pos = upper_bound(num); - return (pos < size && runs[pos].first <= num && num <= runs[pos].second); + if (!run_count) return false; + auto pos = lower_bound(num); + return (pos < run_count && runs[pos].first <= num && num <= runs[pos].second); } bool test_and_set(IndexType num) { bool was_set; IndexType pos; - if (!size) { + if (!run_count) { was_set = false; } else { - pos = upper_bound(num); - was_set = (pos < size && runs[pos].first <= num && num <= runs[pos].second); + pos = lower_bound(num); + was_set = (pos < run_count && runs[pos].first <= num && num <= runs[pos].second); } if (was_set) return false; set_raw(pos, num); @@ -98,29 +97,29 @@ class RLEContainer : public froaring_container_t { SizeType cardinality() const { SizeType count = 0; - for (IndexType i = 0; i < size; ++i) { + for (IndexType i = 0; i < run_count; ++i) { count += runs[i].second - runs[i].first; } // We need to add 1 to the count because the range is inclusive - return count + size; + return count + run_count; } - IndexType runsCount() const { return size; } + IndexType runsCount() const { return run_count; } private: - SizeType upper_bound(IndexType num) const { - assert(size && "Cannot find upper bound in an empty container"); - SizeType low = 0; - SizeType high = size; - while (low < high) { - SizeType mid = low + (high - low) / 2; - if (runs[mid].first <= num) { - low = mid + 1; + SizeType lower_bound(IndexType num) const { + assert(run_count && "Cannot find lower bound in an empty container"); + SizeType left = 0; + SizeType right = run_count; + while (left < right) { + SizeType mid = left + (right - left) / 2; + if (runs[mid].second < num) { + left = mid + 1; } else { - high = mid; + right = mid; } } - return (low > 0) ? low - 1 : 0; + return left; } void expand() { @@ -133,37 +132,37 @@ class RLEContainer : public froaring_container_t { void set_raw(IndexType pos, IndexType num) { // If the value is next to the previous run's end (and need merging) - bool merge_prev = (num > 0 && num - 1 == runs[pos].second); + bool merge_prev = (pos > 0 && num > 0 && num - 1 == runs[pos - 1].second); // If the value is next to the next run's start (and need merging) - bool merge_next = (pos < size - 1 && runs[pos + 1].first > 0 && runs[pos + 1].first - 1 == num); + bool merge_next = (pos < run_count && runs[pos].first > 0 && runs[pos].first - 1 == num); if (merge_prev && merge_next) { // [a,num-1] + num + [num+1, b] - runs[pos].second = runs[pos + 1].second; - if (pos < size) std::memmove(&runs[pos + 1], &runs[pos + 2], (size - pos - 1) * sizeof(RunPair)); - size--; + runs[pos - 1].second = runs[pos].second; + if (pos < run_count) std::memmove(&runs[pos], &runs[pos + 1], (run_count - pos - 1) * sizeof(RunPair)); + run_count--; return; } - if (merge_prev) { // [a,num-1] + num - runs[pos].second++; + if (merge_prev) { // [a,num-1] + num -> [a, num] + runs[pos - 1].second++; return; } - if (merge_next) { // num + [num+1, b] - runs[pos + 1].first--; + if (merge_next) { // num + [num+1, b] -> [num, b] + runs[pos].first--; return; } - if (size == capacity) { + if (run_count == capacity) { expand(); } - if (pos < size) std::memmove(&runs[pos + 2], &runs[pos + 1], (size - pos - 1) * sizeof(RunPair)); - size++; - runs[pos + 1] = {num, num}; + if (pos < run_count) std::memmove(&runs[pos + 1], &runs[pos], (run_count - pos) * sizeof(RunPair)); + run_count++; + runs[pos] = {num, num}; return; } public: SizeType capacity; - IndexType size; // Always less than 2**(DataBits-1), so we do not need SizeType + IndexType run_count; // Always less than 2**(DataBits-1), so we do not need SizeType RunPair* runs; }; } // namespace froaring \ No newline at end of file diff --git a/include/transform.h b/include/transform.h new file mode 100644 index 0000000..b2aaec9 --- /dev/null +++ b/include/transform.h @@ -0,0 +1,37 @@ +#pragma once + +#include "array_container.h" +#include "bitmap_container.h" +#include "prelude.h" +#include "rle_container.h" + +namespace froaring { +template +ArrayContainer* froaring_bitmap_to_array(const BitmapContainer* c) { + // TODO: accelerate with SSE, AVX2 or AVX512 + auto cardinality = c->cardinality(); + auto ans = new ArrayContainer(cardinality, cardinality); + int outpos = 0; + typename ArrayContainer::IndexOrNumType base = 0; + for (size_t i = 0; i < c->WordsCount; ++i) { + WordType w = c->words[i]; + while (w != 0) { + WordType t = w & (~w + 1); + auto r = std::countr_zero(w); + ans->vals[outpos++] = (ArrayContainer::IndexOrNumType)(r + base); + w ^= t; + } + base += DataBits; + } + return ans; +} +template +BitmapContainer* froaring_array_to_bitmap(const ArrayContainer* c) { + auto ans = new BitmapContainer(); + auto size = c->cardinality(); + for (int i = 0; i < size; ++i) { + ans->set(c->vals[i]); + } + return ans; +} +}; // namespace froaring \ No newline at end of file diff --git a/lib/utils.h b/include/utils.h similarity index 63% rename from lib/utils.h rename to include/utils.h index e327dc2..78b155c 100644 --- a/lib/utils.h +++ b/include/utils.h @@ -9,4 +9,7 @@ #define CAST_TO_BITMAP(p) static_cast*>(p) #define CAST_TO_ARRAY(p) static_cast*>(p) #define CAST_TO_RLE(p) static_cast*>(p) +#define CAST_TO_BITMAP_CONST(p) static_cast*>(p) +#define CAST_TO_ARRAY_CONST(p) static_cast*>(p) +#define CAST_TO_RLE_CONST(p) static_cast*>(p) #define CTYPE_PAIR(t1, t2) (static_cast(t1) * 4 + static_cast(t2)) diff --git a/lib/froaring_api/equal.h b/lib/froaring_api/equal.h deleted file mode 100644 index 3c19b79..0000000 --- a/lib/froaring_api/equal.h +++ /dev/null @@ -1,122 +0,0 @@ -#pragma once - -#include - -#include "utils.h" - -namespace froaring { -using CTy = froaring::ContainerType; -template -bool froaring_equal_bb(BitmapContainer* a, - BitmapContainer* b) { - for (auto i = 0; i < BitmapContainer::WordCount; ++i) { - if (a->words[i] != b->words[i]) return false; - } - return true; -} -template -bool froaring_equal_aa(ArrayContainer* a, - ArrayContainer* b) { - if (a->size != b->size) return false; - for (auto i = 0; i < a->size; ++i) { - if (a->vals[i] != b->vals[i]) return false; - } - return true; -} -template -bool froaring_equal_rr(RLEContainer* a, - RLEContainer* b) { - if (a->size != b->size) return false; - for (auto i = 0; i < a->size; ++i) { - if (a->runs[i].first != b->runs[i].first || - a->runs[i].second != b->runs[i].second) - return false; - } - return true; -} -template -bool froaring_equal_ar(ArrayContainer* a, - RLEContainer* b) { - // TODO: - FROARING_NOT_IMPLEMENTED - return false; -} -template -bool froaring_equal_br(BitmapContainer* a, - RLEContainer* b) { - // TODO: - FROARING_NOT_IMPLEMENTED - return false; -} - -template -bool froaring_equal_ba(BitmapContainer* a, - ArrayContainer* b) { - if (a->cardinality() != b->cardinality()) { - return false; - } - - typename BitmapContainer::SizeType pos = 0; - for (auto i = 0; i < BitmapContainer::WordCount; ++i) { - WordType w = a->words[i]; - while (w != 0) { - if (pos >= b->cardinality()) { - return false; - } - WordType t = w & (~w + 1); - WordType r = i * sizeof(WordType) + std::countr_zero(w); - if (b->vals[pos] != r) { - return false; - } - ++pos; - w ^= t; - } - } - return (pos == b->cardinality()); -} - -template -bool froaring_equal(froaring_container_t* a, froaring_container_t* b, CTy ta, - CTy tb) { - switch (CTYPE_PAIR(ta, tb)) { - case CTYPE_PAIR(CTy::Bitmap, CTy::Bitmap): { - return froaring_equal_bb(CAST_TO_BITMAP(a), CAST_TO_BITMAP(b)); - break; - } - case CTYPE_PAIR(CTy::Array, CTy::Array): { - return froaring_equal_aa(CAST_TO_ARRAY(a), CAST_TO_ARRAY(b)); - break; - } - case CTYPE_PAIR(CTy::RLE, CTy::RLE): { - return froaring_equal_rr(CAST_TO_RLE(a), CAST_TO_RLE(b)); - break; - } - case CTYPE_PAIR(CTy::Bitmap, CTy::Array): { - return froaring_equal_ba(CAST_TO_BITMAP(a), CAST_TO_ARRAY(b)); - break; - } - case CTYPE_PAIR(CTy::Array, CTy::Bitmap): { - return froaring_equal_ba(CAST_TO_BITMAP(b), CAST_TO_ARRAY(a)); - break; - } - case CTYPE_PAIR(CTy::Bitmap, CTy::RLE): { - return froaring_equal_br(CAST_TO_BITMAP(a), CAST_TO_RLE(b)); - break; - } - case CTYPE_PAIR(CTy::RLE, CTy::Bitmap): { - return froaring_equal_br(CAST_TO_BITMAP(b), CAST_TO_RLE(a)); - break; - } - case CTYPE_PAIR(CTy::Array, CTy::RLE): { - return froaring_equal_ar(CAST_TO_ARRAY(a), CAST_TO_RLE(b)); - break; - } - case CTYPE_PAIR(CTy::RLE, CTy::Array): { - return froaring_equal_ar(CAST_TO_ARRAY(b), CAST_TO_RLE(a)); - break; - } - default: - FROARING_UNREACHABLE - } -} -} // namespace froaring \ No newline at end of file diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 4f9cca2..1453eaf 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,7 +1,7 @@ # Add GTest find_package(GTest REQUIRED) include_directories(${GTEST_INCLUDE_DIRS}) -include_directories(${CMAKE_SOURCE_DIR}/lib) +include_directories(${CMAKE_SOURCE_DIR}/include) # Testing files add_executable(ArrayContainerTest test_array_container.cpp) diff --git a/tests/integration_test.cpp b/tests/integration_test.cpp index 253aa67..4221343 100644 --- a/tests/integration_test.cpp +++ b/tests/integration_test.cpp @@ -36,10 +36,24 @@ TEST(FlexibleRoaringBitmapTest, TypeConversion) { for (auto i = 0; i < 1000; i++) { bitmap.set(i); - bitmap.set(i+4090); + bitmap.set(i + 4090); } - + bitmap.debug_print(); EXPECT_EQ(bitmap.count(), 2000); + + for (auto i = 0; i < 1000; i++) { + bitmap.set(i); + bitmap.set(i + 4090); + } + + EXPECT_EQ(bitmap.count(), 2000); + + for (auto i = 0; i < 1000; i++) { + bitmap.reset(i); + bitmap.reset(i + 4090); + } + + EXPECT_EQ(bitmap.count(), 0); } int main(int argc, char **argv) {