From 3194adca2a8fe3b7d1e3f3c72b6cbf9d08e987c4 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Thu, 25 Aug 2022 17:16:48 -0400 Subject: [PATCH 001/162] Verifies fix on issue 382 (#383) * More portable code * Verifies fix on issue 382 * Corrected variable name. --- .github/workflows/ubuntu-debug-sani-ci.yml | 25 ++++++++++++++++++++++ src/array_util.c | 5 +++-- tests/cpp_unit.cpp | 16 ++++++++++++++ 3 files changed, 44 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/ubuntu-debug-sani-ci.yml diff --git a/.github/workflows/ubuntu-debug-sani-ci.yml b/.github/workflows/ubuntu-debug-sani-ci.yml new file mode 100644 index 000000000..52848d873 --- /dev/null +++ b/.github/workflows/ubuntu-debug-sani-ci.yml @@ -0,0 +1,25 @@ +name: Ubuntu-Debug-Sanitized-CI + +'on': + - push + - pull_request + + +jobs: + ci: + name: ubuntu-gcc + runs-on: ubuntu-latest + + env: + CC: gcc + CXX: g++ + + steps: + - uses: actions/checkout@v2 + - name: Build and Test + run: | + mkdir build + cd build + cmake -DCMAKE_BUILD_TYPE=Debug -DROARING_SANITIZE=ON .. + cmake --build . + ctest . --output-on-failure diff --git a/src/array_util.c b/src/array_util.c index d01deae2a..48349105a 100644 --- a/src/array_util.c +++ b/src/array_util.c @@ -1912,8 +1912,9 @@ static inline bool _avx2_memequals(const void *s1, const void *s2, size_t n) { } while (ptr1 < end8) { - uint64_t v1 = *((const uint64_t*)ptr1); - uint64_t v2 = *((const uint64_t*)ptr2); + uint64_t v1, v2; + memcpy(&v1,ptr1,sizeof(uint64_t)); + memcpy(&v2,ptr2,sizeof(uint64_t)); if (v1 != v2) { return false; } diff --git a/tests/cpp_unit.cpp b/tests/cpp_unit.cpp index 5c3de8fca..d0ba16e93 100644 --- a/tests/cpp_unit.cpp +++ b/tests/cpp_unit.cpp @@ -699,6 +699,22 @@ DEFINE_TEST(test_cpp_frozen) { const Roaring r2 = Roaring::frozenView(buf, num_bytes); assert_true(r1 == r2); + { + Roaring r; + r.addRange(0, 100000); + r.flip(90000, 91000); + r.runOptimize(); + + // allocate a buffer and serialize to it + size_t num_bytes1 = r.getFrozenSizeInBytes(); + char *buf1 = (char *)roaring_aligned_malloc(32, num_bytes1); + r.writeFrozen(buf1); + + // ensure the frozen bitmap is the same as the original + const Roaring rr = Roaring::frozenView(buf1, num_bytes1); + assert_true(r == rr); + roaring_aligned_free(buf1); + } #if ROARING_EXCEPTIONS // try viewing a misaligned/invalid buffer try { From 063a6c398b5778ca3e5e782d252102e4f77020a3 Mon Sep 17 00:00:00 2001 From: Soerian Lieve Date: Fri, 26 Aug 2022 04:08:37 +0100 Subject: [PATCH 002/162] Add add/removeRange to Roaring and Roaring64Map (#381) These were already present in the C API, but the C++ API had limited support. Co-authored-by: Daniel Lemire --- cpp/roaring.hh | 27 ++++- cpp/roaring64map.hh | 98 +++++++++++++++++ tests/cpp_random_unit.cpp | 9 +- tests/cpp_unit.cpp | 216 ++++++++++++++++++++++++++++++++++++++ tests/roaring_checked.hh | 31 ++++-- 5 files changed, 368 insertions(+), 13 deletions(-) diff --git a/cpp/roaring.hh b/cpp/roaring.hh index 8ee9e7b30..3853ae7b6 100644 --- a/cpp/roaring.hh +++ b/cpp/roaring.hh @@ -127,10 +127,17 @@ public: } /** - * Add all values from x (included) to y (excluded) + * Add all values in range [min, max) */ - void addRange(const uint64_t x, const uint64_t y) { - return api::roaring_bitmap_add_range(&roaring, x, y); + void addRange(const uint64_t min, const uint64_t max) { + return api::roaring_bitmap_add_range(&roaring, min, max); + } + + /** + * Add all values in range [min, max] + */ + void addRangeClosed(const uint32_t min, const uint32_t max) { + return api::roaring_bitmap_add_range_closed(&roaring, min, max); } /** @@ -154,6 +161,20 @@ public: return api::roaring_bitmap_remove_checked(&roaring, x); } + /** + * Remove all values in range [min, max) + */ + void removeRange(uint64_t min, uint64_t max) { + return api::roaring_bitmap_remove_range(&roaring, min, max); + } + + /** + * Remove all values in range [min, max] + */ + void removeRangeClosed(uint32_t min, uint32_t max) { + return api::roaring_bitmap_remove_range_closed(&roaring, min, max); + } + /** * Return the largest value (if not empty) */ diff --git a/cpp/roaring64map.hh b/cpp/roaring64map.hh index 6ec9ccdff..383271b05 100644 --- a/cpp/roaring64map.hh +++ b/cpp/roaring64map.hh @@ -118,6 +118,52 @@ public: return result; } + /** + * Add all values in range [min, max) + */ + void addRange(uint64_t min, uint64_t max) { + if (min >= max) { + return; + } + addRangeClosed(min, max - 1); + } + + /** + * Add all values in range [min, max] + */ + void addRangeClosed(uint32_t min, uint32_t max) { + roarings[0].addRangeClosed(min, max); + } + void addRangeClosed(uint64_t min, uint64_t max) { + if (min > max) { + return; + } + uint32_t start_high = highBytes(min); + uint32_t start_low = lowBytes(min); + uint32_t end_high = highBytes(max); + uint32_t end_low = lowBytes(max); + if (start_high == end_high) { + roarings[start_high].addRangeClosed(start_low, end_low); + roarings[start_high].setCopyOnWrite(copyOnWrite); + return; + } + // we put std::numeric_limits<>::max/min in parenthesis to avoid a clash + // with the Windows.h header under Windows + roarings[start_high].addRangeClosed( + start_low, (std::numeric_limits::max)()); + roarings[start_high].setCopyOnWrite(copyOnWrite); + start_high++; + for (; start_high < end_high; ++start_high) { + roarings[start_high].addRangeClosed( + (std::numeric_limits::min)(), + (std::numeric_limits::max)()); + roarings[start_high].setCopyOnWrite(copyOnWrite); + } + roarings[end_high].addRangeClosed( + (std::numeric_limits::min)(), end_low); + roarings[end_high].setCopyOnWrite(copyOnWrite); + } + /** * Add value n_args from pointer vals */ @@ -158,6 +204,58 @@ public: return false; } + /** + * Remove all values in range [min, max) + */ + void removeRange(uint64_t min, uint64_t max) { + if (min >= max) { + return; + } + return removeRangeClosed(min, max - 1); + } + + /** + * Remove all values in range [min, max] + */ + void removeRangeClosed(uint32_t min, uint32_t max) { + return roarings[0].removeRangeClosed(min, max); + } + void removeRangeClosed(uint64_t min, uint64_t max) { + if (min > max) { + return; + } + uint32_t start_high = highBytes(min); + uint32_t start_low = lowBytes(min); + uint32_t end_high = highBytes(max); + uint32_t end_low = lowBytes(max); + + if (roarings.empty() || end_high < roarings.cbegin()->first || + start_high > (roarings.crbegin())->first) { + return; + } + + auto start_iter = roarings.lower_bound(start_high); + auto end_iter = roarings.lower_bound(end_high); + if (start_iter->first == start_high) { + if (start_iter == end_iter) { + start_iter->second.removeRangeClosed(start_low, end_low); + return; + } + // we put std::numeric_limits<>::max/min in parenthesis + // to avoid a clash with the Windows.h header under Windows + start_iter->second.removeRangeClosed( + start_low, (std::numeric_limits::max)()); + start_iter++; + } + + roarings.erase(start_iter, end_iter); + + if (end_iter != roarings.cend() && end_iter->first == end_high) { + end_iter->second.removeRangeClosed( + (std::numeric_limits::min)(), end_low); + } + } + /** * Clear the bitmap */ diff --git a/tests/cpp_random_unit.cpp b/tests/cpp_random_unit.cpp index 21749133e..37000ebf5 100644 --- a/tests/cpp_random_unit.cpp +++ b/tests/cpp_random_unit.cpp @@ -57,7 +57,7 @@ Roaring make_random_bitset() { Roaring r; int num_ops = rand() % 100; for (int i = 0; i < num_ops; ++i) { - switch (rand() % 4) { + switch (rand() % 5) { case 0: r.add(gravity); break; @@ -68,11 +68,16 @@ Roaring make_random_bitset() { break; } case 2: { + uint32_t start = gravity + (rand() % 10) - 5; + r.removeRange(start, start + rand() % 5); + break; } + + case 3: { uint32_t start = gravity + (rand() % 50) - 25; r.flip(start, rand() % 50); break; } - case 3: { // tests remove(), select(), rank() + case 4: { // tests remove(), select(), rank() uint32_t card = r.cardinality(); if (card != 0) { uint32_t rnk = rand() % card; diff --git a/tests/cpp_unit.cpp b/tests/cpp_unit.cpp index d0ba16e93..09953d9d2 100644 --- a/tests/cpp_unit.cpp +++ b/tests/cpp_unit.cpp @@ -3,12 +3,15 @@ */ #include +#include #include #include // access to pure C exported API for testing #include #include #include #include +#include + #include #include @@ -613,6 +616,215 @@ DEFINE_TEST(test_cpp_add_remove_checked_64) { assert_true(roaring.isEmpty()); } +DEFINE_TEST(test_cpp_add_range) { + std::vector> ranges = { + {1, 5}, + {1, 1}, + {2, 1}, + }; + for (const auto &range : ranges) { + uint64_t min = range.first; + uint64_t max = range.second; + Roaring r1; + r1.addRangeClosed(min, max); + Roaring r2; + for (uint64_t v = min; v <= max; ++v) { + r2.add(v); + } + assert_true(r1 == r2); + } +} + +DEFINE_TEST(test_cpp_remove_range) { + { + // min < r1.minimum, max > r1.maximum + Roaring r1 = Roaring::bitmapOf(3, 1, 2, 4); + r1.removeRangeClosed(0, 5); + assert_true(r1.isEmpty()); + } + { + // min < r1.minimum, max < r1.maximum, max does not exactly match an + // element + Roaring r1 = Roaring::bitmapOf(3, 1, 2, 4); + r1.removeRangeClosed(0, 3); + Roaring r2 = Roaring::bitmapOf(1, 4); + assert_true(r1 == r2); + } + { + // min < r1.minimum, max < r1.maximum, max exactly matches an element + Roaring r1 = Roaring::bitmapOf(3, 1, 2, 4); + r1.removeRangeClosed(0, 2); + Roaring r2 = Roaring::bitmapOf(1, 4); + assert_true(r1 == r2); + } + { + // min > r1.minimum, max > r1.maximum, min does not exactly match an + // element + Roaring r1 = Roaring::bitmapOf(3, 1, 2, 4); + r1.removeRangeClosed(3, 5); + Roaring r2 = Roaring::bitmapOf(2, 1, 2); + assert_true(r1 == r2); + } + { + // min > r1.minimum, max > r1.maximum, min exactly matches an element + Roaring r1 = Roaring::bitmapOf(3, 1, 2, 4); + r1.removeRangeClosed(2, 5); + Roaring r2 = Roaring::bitmapOf(1, 1); + assert_true(r1 == r2); + } + { + // min > r1.minimum, max < r1.maximum, no elements between min and max + Roaring r1 = Roaring::bitmapOf(3, 1, 2, 4); + r1.removeRangeClosed(3, 3); + Roaring r2 = Roaring::bitmapOf(3, 1, 2, 4); + assert_true(r1 == r2); + } + { + // max < r1.minimum + Roaring r1 = Roaring::bitmapOf(3, 1, 2, 4); + r1.removeRangeClosed(0, 0); + Roaring r2 = Roaring::bitmapOf(3, 1, 2, 4); + assert_true(r1 == r2); + } + { + // min > r1.maximum + Roaring r1 = Roaring::bitmapOf(3, 1, 2, 4); + r1.removeRangeClosed(5, 6); + Roaring r2 = Roaring::bitmapOf(3, 1, 2, 4); + assert_true(r1 == r2); + } + { + // min > max + Roaring r1 = Roaring::bitmapOf(3, 1, 2, 4); + r1.removeRangeClosed(2, 1); + Roaring r2 = Roaring::bitmapOf(3, 1, 2, 4); + assert_true(r1 == r2); + } +} + +DEFINE_TEST(test_cpp_add_range_64) { + { + // 32-bit integers + Roaring64Map r1; + r1.addRangeClosed(uint32_t(1), uint32_t(5)); + Roaring64Map r2; + for (uint32_t v = 1; v <= 5; ++v) { + r2.add(v); + } + assert_true(r1 == r2); + } + std::vector> ranges = { + {uint64_t(1) << 32, (uint64_t(1) << 32) + 10}, + {(uint64_t(1) << 32) - 10, (uint64_t(1) << 32) + 10}, + {(uint64_t(1) << 32) + 2, (uint64_t(1) << 32) - 2}}; + for (const auto &range : ranges) { + uint64_t min = range.first; + uint64_t max = range.second; + Roaring64Map r1; + r1.addRangeClosed(min, max); + Roaring64Map r2; + for (uint64_t v = min; v <= max; ++v) { + r2.add(v); + } + assert_true(r1 == r2); + } +} + +DEFINE_TEST(test_cpp_remove_range_64) { + { + // 32-bit integers + Roaring64Map r1 = + Roaring64Map::bitmapOf(3, uint64_t(1), uint64_t(2), uint64_t(4)); + r1.removeRangeClosed(uint32_t(2), uint32_t(3)); + Roaring64Map r2 = Roaring64Map::bitmapOf(2, uint64_t(1), uint64_t(4)); + assert_true(r1 == r2); + } + { + // min < r1.minimum, max > r1.maximum + Roaring64Map r1 = Roaring64Map::bitmapOf( + 3, uint64_t(1) << 32, uint64_t(2) << 32, uint64_t(4) << 32); + r1.removeRangeClosed(uint64_t(0), uint64_t(5) << 32); + assert_true(r1.isEmpty()); + } + { + // min < r1.minimum, max < r1.maximum, max does not exactly match an + // element + Roaring64Map r1 = Roaring64Map::bitmapOf( + 3, uint64_t(1) << 32, uint64_t(2) << 32, uint64_t(4) << 32); + r1.removeRangeClosed(uint64_t(0), uint64_t(3) << 32); + Roaring64Map r2 = Roaring64Map::bitmapOf(1, uint64_t(4) << 32); + assert_true(r1 == r2); + } + { + // min < r1.minimum, max < r1.maximum, max exactly matches the high bits + // of an element + Roaring64Map r1 = + Roaring64Map::bitmapOf(4, uint64_t(1) << 32, uint64_t(2) << 32, + (uint64_t(2) << 32) + 1, uint64_t(4) << 32); + r1.removeRangeClosed(uint64_t(0), uint64_t(2) << 32); + Roaring64Map r2 = Roaring64Map::bitmapOf(2, (uint64_t(2) << 32) + 1, + uint64_t(4) << 32); + assert_true(r1 == r2); + } + { + // min > r1.minimum, max > r1.maximum, min does not exactly match an + // element + Roaring64Map r1 = Roaring64Map::bitmapOf( + 3, uint64_t(1) << 32, uint64_t(2) << 32, uint64_t(4) << 32); + r1.removeRangeClosed(uint64_t(3) << 32, uint64_t(5) << 32); + Roaring64Map r2 = + Roaring64Map::bitmapOf(2, uint64_t(1) << 32, uint64_t(2) << 32); + assert_true(r1 == r2); + } + { + // min > r1.minimum, max > r1.maximum, min exactly matches the high bits + // of an element + Roaring64Map r1 = + Roaring64Map::bitmapOf(4, uint64_t(1) << 32, uint64_t(2) << 32, + (uint64_t(2) << 32) + 1, uint64_t(4) << 32); + r1.removeRangeClosed((uint64_t(2) << 32) + 1, uint64_t(5) << 32); + Roaring64Map r2 = + Roaring64Map::bitmapOf(2, uint64_t(1) << 32, uint64_t(2) << 32); + assert_true(r1 == r2); + } + { + // min > r1.minimum, max < r1.maximum, no elements between min and max + Roaring64Map r1 = Roaring64Map::bitmapOf( + 3, uint64_t(1) << 32, uint64_t(2) << 32, uint64_t(4) << 32); + r1.removeRangeClosed(uint64_t(3) << 32, (uint64_t(3) << 32) + 1); + Roaring64Map r2 = Roaring64Map::bitmapOf( + 3, uint64_t(1) << 32, uint64_t(2) << 32, uint64_t(4) << 32); + assert_true(r1 == r2); + } + { + // max < r1.minimum + Roaring64Map r1 = Roaring64Map::bitmapOf( + 3, uint64_t(1) << 32, uint64_t(2) << 32, uint64_t(4) << 32); + r1.removeRangeClosed(uint64_t(1), uint64_t(2)); + Roaring64Map r2 = Roaring64Map::bitmapOf( + 3, uint64_t(1) << 32, uint64_t(2) << 32, uint64_t(4) << 32); + assert_true(r1 == r2); + } + { + // min > r1.maximum + Roaring64Map r1 = Roaring64Map::bitmapOf( + 3, uint64_t(1) << 32, uint64_t(2) << 32, uint64_t(4) << 32); + r1.removeRangeClosed(uint64_t(5) << 32, uint64_t(6) << 32); + Roaring64Map r2 = Roaring64Map::bitmapOf( + 3, uint64_t(1) << 32, uint64_t(2) << 32, uint64_t(4) << 32); + assert_true(r1 == r2); + } + { + // min > max + Roaring64Map r1 = Roaring64Map::bitmapOf( + 3, uint64_t(1) << 32, uint64_t(2) << 32, uint64_t(4) << 32); + r1.removeRangeClosed(uint64_t(2) << 32, uint64_t(1) << 32); + Roaring64Map r2 = Roaring64Map::bitmapOf( + 3, uint64_t(1) << 32, uint64_t(2) << 32, uint64_t(4) << 32); + assert_true(r1 == r2); + } +} + DEFINE_TEST(test_cpp_clear_64) { Roaring64Map roaring; @@ -920,6 +1132,10 @@ int main() { cmocka_unit_test(test_example_cpp_64_false), cmocka_unit_test(test_cpp_add_remove_checked), cmocka_unit_test(test_cpp_add_remove_checked_64), + cmocka_unit_test(test_cpp_add_range), + cmocka_unit_test(test_cpp_remove_range), + cmocka_unit_test(test_cpp_add_range_64), + cmocka_unit_test(test_cpp_remove_range_64), cmocka_unit_test(test_run_compression_cpp_64_true), cmocka_unit_test(test_run_compression_cpp_64_false), cmocka_unit_test(test_run_compression_cpp_true), diff --git a/tests/roaring_checked.hh b/tests/roaring_checked.hh index 9c7da3099..b06320199 100644 --- a/tests/roaring_checked.hh +++ b/tests/roaring_checked.hh @@ -117,15 +117,17 @@ class Roaring { return ans; } - void addRange(const uint64_t x, const uint64_t y) { - plain.addRange(x, y); + void addRange(const uint64_t x, const uint64_t y) { if (x != y) { // repeat add_range_closed() cast and bounding logic - uint32_t min = static_cast(x); - uint32_t max = static_cast(y - 1); - if (min <= max) { - for (uint32_t val = max; val != min - 1; --val) - check.insert(val); - } + addRangeClosed(x, y - 1); + } + } + + void addRangeClosed(uint32_t min, uint32_t max) { + plain.addRangeClosed(min, max); + if (min <= max) { + for (uint32_t val = max; val != min - 1; --val) + check.insert(val); } } @@ -148,6 +150,19 @@ class Roaring { return ans; } + void removeRange(const uint64_t x, const uint64_t y) { + if (x != y) { // repeat remove_range_closed() cast and bounding logic + removeRangeClosed(x, y - 1); + } + } + + void removeRangeClosed(uint32_t min, uint32_t max) { + plain.removeRangeClosed(min, max); + if (min <= max) { + check.erase(check.lower_bound(min), check.upper_bound(max)); + } + } + uint32_t maximum() const { uint32_t ans = plain.maximum(); assert(check.empty() ? ans == 0 : ans == *check.rbegin()); From efcb83dcdf332f02cde058a48574f5b7b14f73fb Mon Sep 17 00:00:00 2001 From: Soerian Lieve Date: Fri, 26 Aug 2022 18:15:10 +0100 Subject: [PATCH 003/162] Check against maxbytes to read the size of the map (#384) Previously, readSafe did not check whether it could read the map size in the allotted maxbytes. In addition, it did not subtract the map size from maxbytes when reading. This fixes the above and adds some deserialization tests using binary files. The valid files were produced with the script in tests/cpp_unit_util.cpp. The invalid files were created manually. --- cpp/roaring64map.hh | 10 ++-- tests/cpp_unit.cpp | 74 ++++++++++++++++++++++++++- tests/cpp_unit_util.cpp | 49 ++++++++++++++++++ tests/testdata/64map32bitvals.bin | Bin 0 -> 48 bytes tests/testdata/64mapempty.bin | Bin 0 -> 8 bytes tests/testdata/64mapemptyinput.bin | 0 tests/testdata/64maphighvals.bin | Bin 0 -> 470 bytes tests/testdata/64mapinvalidsize.bin | Bin 0 -> 9 bytes tests/testdata/64mapkeytoosmall.bin | Bin 0 -> 11 bytes tests/testdata/64mapsizetoosmall.bin | Bin 0 -> 7 bytes tests/testdata/64mapspreadvals.bin | Bin 0 -> 408 bytes 11 files changed, 125 insertions(+), 8 deletions(-) create mode 100644 tests/cpp_unit_util.cpp create mode 100644 tests/testdata/64map32bitvals.bin create mode 100644 tests/testdata/64mapempty.bin create mode 100644 tests/testdata/64mapemptyinput.bin create mode 100644 tests/testdata/64maphighvals.bin create mode 100644 tests/testdata/64mapinvalidsize.bin create mode 100644 tests/testdata/64mapkeytoosmall.bin create mode 100644 tests/testdata/64mapsizetoosmall.bin create mode 100644 tests/testdata/64mapspreadvals.bin diff --git a/cpp/roaring64map.hh b/cpp/roaring64map.hh index 383271b05..65596f838 100644 --- a/cpp/roaring64map.hh +++ b/cpp/roaring64map.hh @@ -730,19 +730,17 @@ public: * space compared to the portable format (e.g., for very sparse bitmaps). */ static Roaring64Map readSafe(const char *buf, size_t maxbytes) { + if (maxbytes < sizeof(uint64_t)) { + ROARING_TERMINATE("ran out of bytes"); + } Roaring64Map result; - // get map size uint64_t map_size; std::memcpy(&map_size, buf, sizeof(uint64_t)); buf += sizeof(uint64_t); + maxbytes -= sizeof(uint64_t); for (uint64_t lcv = 0; lcv < map_size; lcv++) { - // get map key if(maxbytes < sizeof(uint32_t)) { -#if ROARING_EXCEPTIONS - throw std::runtime_error("ran out of bytes"); -#else ROARING_TERMINATE("ran out of bytes"); -#endif } uint32_t key; std::memcpy(&key, buf, sizeof(uint32_t)); diff --git a/tests/cpp_unit.cpp b/tests/cpp_unit.cpp index 09953d9d2..f7be3c922 100644 --- a/tests/cpp_unit.cpp +++ b/tests/cpp_unit.cpp @@ -13,9 +13,12 @@ #include +#include #include #include +#include +#include "config.h" #include "roaring.hh" using roaring::Roaring; // the C++ wrapper class @@ -1036,8 +1039,6 @@ DEFINE_TEST(test_cpp_flip) { Roaring r1 = Roaring::bitmapOf(3, 1, 3, 6); r1.flip(2, 5); Roaring r2 = Roaring::bitmapOf(4, 1, 2, 4, 6); - r1.printf(); - r2.printf(); assert_true(r1 == r2); } { @@ -1120,6 +1121,65 @@ DEFINE_TEST(test_cpp_flip_64) { } } +// Returns true on success, false on exception. +bool test64Deserialize(const std::string& filename) { + std::ifstream in(TEST_DATA_DIR + filename, std::ios::binary); + std::vector buf1(std::istreambuf_iterator(in), {}); + printf("Reading %lu bytes\n", buf1.size()); + Roaring64Map roaring; +#if ROARING_EXCEPTIONS + try { + roaring = Roaring64Map::readSafe(buf1.data(), buf1.size()); + } catch (...) { + return false; + } +#else + roaring = Roaring64Map::readSafe(buf1.data(), buf1.size()); +#endif + std::vector buf2(roaring.getSizeInBytes()); + assert_true(buf1.size() == buf2.size()); + assert_true(roaring.write(buf2.data()) == buf2.size()); + for (size_t i = 0; i < buf1.size(); ++i) { + assert_true(buf1[i] == buf2[i]); + } + return true; +} + +// The valid files were created with cpp_unit_util.cpp. +DEFINE_TEST(test_cpp_deserialize_64_empty) { + assert_true(test64Deserialize("64mapempty.bin")); +} + +DEFINE_TEST(test_cpp_deserialize_64_32bit_vals) { + assert_true(test64Deserialize("64map32bitvals.bin")); +} + +DEFINE_TEST(test_cpp_deserialize_64_spread_vals) { + assert_true(test64Deserialize("64mapspreadvals.bin")); +} + +DEFINE_TEST(test_cpp_deserialize_64_high_vals) { + assert_true(test64Deserialize("64maphighvals.bin")); +} + +#if ROARING_EXCEPTIONS +DEFINE_TEST(test_cpp_deserialize_64_empty_input) { + assert_false(test64Deserialize("64mapemptyinput.bin")); +} + +DEFINE_TEST(test_cpp_deserialize_64_size_too_small) { + assert_false(test64Deserialize("64mapsizetoosmall.bin")); +} + +DEFINE_TEST(test_cpp_deserialize_64_invalid_size) { + assert_false(test64Deserialize("64mapinvalidsize.bin")); +} + +DEFINE_TEST(test_cpp_deserialize_64_key_too_small) { + assert_false(test64Deserialize("64mapkeytoosmall.bin")); +} +#endif + int main() { roaring::misc::tellmeall(); const struct CMUnitTest tests[] = { @@ -1148,6 +1208,16 @@ int main() { cmocka_unit_test(test_cpp_frozen_64), cmocka_unit_test(test_cpp_flip), cmocka_unit_test(test_cpp_flip_64), + cmocka_unit_test(test_cpp_deserialize_64_empty), + cmocka_unit_test(test_cpp_deserialize_64_32bit_vals), + cmocka_unit_test(test_cpp_deserialize_64_spread_vals), + cmocka_unit_test(test_cpp_deserialize_64_high_vals), +#if ROARING_EXCEPTIONS + cmocka_unit_test(test_cpp_deserialize_64_empty_input), + cmocka_unit_test(test_cpp_deserialize_64_size_too_small), + cmocka_unit_test(test_cpp_deserialize_64_invalid_size), + cmocka_unit_test(test_cpp_deserialize_64_key_too_small), +#endif cmocka_unit_test(issue316), cmocka_unit_test(test_issue304), cmocka_unit_test(issue_336), diff --git a/tests/cpp_unit_util.cpp b/tests/cpp_unit_util.cpp new file mode 100644 index 000000000..7a0b0b553 --- /dev/null +++ b/tests/cpp_unit_util.cpp @@ -0,0 +1,49 @@ +#include +#include + +#include "roaring.hh" +#include "roaring64map.hh" + +using namespace roaring; + +void writeToFile(const Roaring64Map& roaring, const std::string& filename) { + std::vector buf(roaring.getSizeInBytes()); + roaring.write(buf.data()); + std::ofstream out(filename, std::ios::binary); + out.write(buf.data(), buf.size()); +} + +// Utility to create files with valid serialized Roaring64Maps. +int main() { + { + Roaring64Map roaring; + writeToFile(roaring, "64mapempty.bin"); + } + { + Roaring64Map roaring; + for (uint32_t v = 0; v < 10; ++v) { + roaring.add(v); + } + writeToFile(roaring, "64map32bitvals.bin"); + } + { + Roaring64Map roaring; + for (uint64_t high = 0; high < 10; ++high) { + for (uint64_t low = 0; low < 10; ++low) { + roaring.add((high << 32) + low); + } + } + writeToFile(roaring, "64mapspreadvals.bin"); + } + { + Roaring64Map roaring; + uint64_t max32 = (std::numeric_limits::max)(); + for (uint64_t high = max32 - 10; high <= max32; ++high) { + for (uint64_t low = max32 - 10; low <= max32; ++low) { + roaring.add((high << 32) + low); + } + } + writeToFile(roaring, "64maphighvals.bin"); + } + return EXIT_SUCCESS; +} diff --git a/tests/testdata/64map32bitvals.bin b/tests/testdata/64map32bitvals.bin new file mode 100644 index 0000000000000000000000000000000000000000..475b894417e44cff61d8810057fc1530cef05718 GIT binary patch literal 48 ocmZQ%KmaQP1_nkjmy9 literal 0 HcmV?d00001 diff --git a/tests/testdata/64mapemptyinput.bin b/tests/testdata/64mapemptyinput.bin new file mode 100644 index 000000000..e69de29bb diff --git a/tests/testdata/64maphighvals.bin b/tests/testdata/64maphighvals.bin new file mode 100644 index 0000000000000000000000000000000000000000..54abac71f271227c74a16f26ca7b0b0eae55bd8a GIT binary patch literal 470 zcma*jyA8k~6hqNROPY*A&HgL#hupj<3)qci=~TY#PXTf=3qB#*?OmueI)lk#b9h`; aSvFaASq@oFSuR;_Ssqz_pK8xL6y*yyRW=F$ literal 0 HcmV?d00001 diff --git a/tests/testdata/64mapinvalidsize.bin b/tests/testdata/64mapinvalidsize.bin new file mode 100644 index 0000000000000000000000000000000000000000..48a2754f69fcd1d54f423ee5b8bae5e2f8b0b91b GIT binary patch literal 9 McmZQz00TxY000#L3;+NC literal 0 HcmV?d00001 diff --git a/tests/testdata/64mapkeytoosmall.bin b/tests/testdata/64mapkeytoosmall.bin new file mode 100644 index 0000000000000000000000000000000000000000..3a768cf467e9a5e39ca199a3cd093a52a59490f1 GIT binary patch literal 11 OcmZQz00Tw_MlJvV8vqRe literal 0 HcmV?d00001 diff --git a/tests/testdata/64mapsizetoosmall.bin b/tests/testdata/64mapsizetoosmall.bin new file mode 100644 index 0000000000000000000000000000000000000000..cd2112d98f7d268da7add022b82f0eac791bb279 GIT binary patch literal 7 McmZQz00IFn001!n8vpS*SERgUMnOxf@r{zi~~v PF5QrBO1Grj(uH&%!J7vn literal 0 HcmV?d00001 From 693997497367645268568c4aa6fdfb05039c8f48 Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Fri, 26 Aug 2022 16:25:41 -0400 Subject: [PATCH 004/162] Introduce `roaring_bitmap_*_bulk` operations (#363) * implement bitmap contains multi * typo * fix commit and repair implementations * fix check result after call ra_advance, check the high 16 bits instean. * resolve comment and add document * add unit test for contains multi * add benchmark for contains_multi * fix unittest * fix unittest * fix unittest * add const to array length * fix unittest * add static inline declaration * remove declaration in .c * update codes via comments * Applying various fixes. * Add roaring_bitmap_add_bulk roaring_bitmap_add_bulk is a generalization of roaring_bitmap_add_many, caching the container for the last inserted item, and avoiding lookiing the container up if another item is inserted in the same container. Use the new function in the implementation of roaring_bitmap_add_many and roaring_bitmap_of * Add a test to add in bulk * Allow `roaring_bitmap_add_many` to be used with an unaligned ptr * Use the correct type for the container pointer in the bulk context struct * TMP: trying something * Fix RDTSC_FINAL for CLOCK_THREAD_CPUTIME_ID * Add a benchmark for add_bulk * clang-format * Don't load the whole context * Reorder tests * Improvements based on assembly output * Inline * Go back to using pointers into context * Add docs for optimization * Check the removals in the unit test * clang-format * Be smarter about benchmark clocks * Remove initialized bool * Posix should always have CLOCK_REALTIME * Posix is a lie * Implement a bulk contains function * Be more fair to add_many, don't count time building the array * Remove roaring_bitmap_contains_multi, use roaring_bitmap_contains_bulk * Actually run bulk add unit test * Fix incorrect behavior of roaring_bitmap_contains_bulk * Fix compliation as c++ * Add extra logging for error only on windows * Check if tests are being built with NDEBUG * Use cmocka's `assert_true`, which is always evaluated * Add more documentation to the `roaring_bulk_context_t` type Co-authored-by: arthur Co-authored-by: Daniel Lemire --- benchmarks/CMakeLists.txt | 1 + benchmarks/add_benchmark.c | 24 ++++- benchmarks/benchmark.h | 72 +++++--------- benchmarks/containsmulti_benchmark.c | 121 ++++++++++++++++++++++++ include/roaring/roaring.h | 59 +++++++++++- include/roaring/roaring_array.h | 4 +- src/roaring.c | 134 +++++++++++++++++++-------- src/roaring_array.c | 5 +- tests/toplevel_unit.c | 77 ++++++++++++++- 9 files changed, 394 insertions(+), 103 deletions(-) create mode 100644 benchmarks/containsmulti_benchmark.c diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 0d5fabcaa..4a3eb0e8e 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -12,6 +12,7 @@ if(NOT WIN32) add_c_benchmark(intersect_range_benchmark) target_link_libraries(add_benchmark m) add_c_benchmark(frozen_benchmark) + add_c_benchmark(containsmulti_benchmark) endif() add_c_benchmark(bitset_container_benchmark) add_c_benchmark(array_container_benchmark) diff --git a/benchmarks/add_benchmark.c b/benchmarks/add_benchmark.c index cc143e3dd..574b285c8 100644 --- a/benchmarks/add_benchmark.c +++ b/benchmarks/add_benchmark.c @@ -96,15 +96,33 @@ void run_test(uint32_t spanlen, uint32_t intvlen, double density, order_t order) printf(" %6.1f\n", array_min(results, num_passes)); printf(" roaring_bitmap_add_many():"); + for (int p = 0; p < num_passes; p++) { + roaring_bitmap_t *r = roaring_bitmap_create(); + uint32_t values[intvlen * count]; + for (int64_t i = 0; i < count; i++) { + for (uint32_t j = 0; j < intvlen; j++) { + values[i * intvlen + j] = offsets[i] + j; + } + } + RDTSC_START(cycles_start); + for (int64_t i = 0; i < count; i++) { + roaring_bitmap_add_many(r, intvlen, values + (i * intvlen)); + } + RDTSC_FINAL(cycles_final); + results[p] = (cycles_final - cycles_start) * 1.0 / count / intvlen; + roaring_bitmap_free(r); + } + printf(" %6.1f\n", array_min(results, num_passes)); + + printf(" roaring_bitmap_add_bulk():"); for (int p = 0; p < num_passes; p++) { roaring_bitmap_t *r = roaring_bitmap_create(); RDTSC_START(cycles_start); - uint32_t values[intvlen]; + roaring_bulk_context_t context = {0}; for (int64_t i = 0; i < count; i++) { for (uint32_t j = 0; j < intvlen; j++) { - values[j] = offsets[i] + j; + roaring_bitmap_add_bulk(r, &context, offsets[i] + j); } - roaring_bitmap_add_many(r, intvlen, values); } RDTSC_FINAL(cycles_final); results[p] = (cycles_final - cycles_start) * 1.0 / count / intvlen; diff --git a/benchmarks/benchmark.h b/benchmarks/benchmark.h index fee613fd9..e3a6ad166 100644 --- a/benchmarks/benchmark.h +++ b/benchmarks/benchmark.h @@ -37,69 +37,39 @@ (cycles) = ((uint64_t)cyc_high << 32) | cyc_low; \ } while (0) -#elif defined(__linux__) && defined(__GLIBC__) - -#include -#ifdef CLOCK_THREAD_CPUTIME_ID -#define RDTSC_START(cycles) \ - do { \ - struct timespec ts; \ - clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts); \ - cycles = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; \ - } while (0) - -#define RDTSC_FINAL(cycles) \ - do { \ - struct timespec ts; \ - clock_gettime(CLOCK_REALTIME, &ts); \ - cycles = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; \ - } while (0) - -#elif defined(CLOCK_REALTIME) // #ifdef CLOCK_THREAD_CPUTIME_ID -#define RDTSC_START(cycles) \ - do { \ - struct timespec ts; \ - clock_gettime(CLOCK_REALTIME, &ts); \ - cycles = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; \ - } while (0) - -#define RDTSC_FINAL(cycles) \ - do { \ - struct timespec ts; \ - clock_gettime(CLOCK_REALTIME, &ts); \ - cycles = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; \ - } while (0) - -#else -#define RDTSC_START(cycles) \ - do { \ - cycles = clock(); \ - } while(0) +#else // defined(CROARING_IS_X64) && defined(ROARING_INLINE_ASM) + +#if defined(CLOCK_THREAD_CPUTIME_ID) +#define RDTSC_CLOCK_ID CLOCK_THREAD_CPUTIME_ID +#elif defined(CLOCK_MONOTONIC) +#define RDTSC_CLOCK_ID CLOCK_MONOTONIC +#elif defined(CLOCK_REALTIME) +#define RDTSC_CLOCK_ID CLOCK_REALTIME +#endif -#define RDTSC_FINAL(cycles) \ - do { \ - cycles = clock(); \ - } while(0) +#if defined(RDTSC_CLOCK_ID) +#define RDTSC_START(cycles) \ + do { \ + struct timespec ts; \ + clock_gettime(RDTSC_CLOCK_ID, &ts); \ + cycles = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; \ + } while (0) -#endif // #ifdef CLOCK_THREAD_CPUTIME_ID +#define RDTSC_FINAL(cycles) RDTSC_START(cycles) -#else +#else // defined(RDTSC_CLOCK_ID) /** -* Other architectures do not support rdtsc ? +* Fall back to the `clock` function */ -#include - #define RDTSC_START(cycles) \ do { \ cycles = clock(); \ } while (0) -#define RDTSC_FINAL(cycles) \ - do { \ - cycles = clock(); \ - } while (0) +#define RDTSC_FINAL(cycles) RDTSC_START(cycles) +#endif #endif /* diff --git a/benchmarks/containsmulti_benchmark.c b/benchmarks/containsmulti_benchmark.c new file mode 100644 index 000000000..e92d82e5a --- /dev/null +++ b/benchmarks/containsmulti_benchmark.c @@ -0,0 +1,121 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include "benchmark.h" +#include "random.h" +#include "numbersfromtextfiles.h" + +void contains_multi_via_contains(roaring_bitmap_t* bm, const uint32_t* values, bool* results, const size_t count) { + for (size_t i = 0; i < count; ++i) { + results[i] = roaring_bitmap_contains(bm, values[i]); + } +} + +void contains_multi_bulk(roaring_bitmap_t* bm, const uint32_t* values, bool* results, const size_t count) { + roaring_bulk_context_t context = {0}; + for (size_t i = 0; i < count; ++i) { + results[i] = roaring_bitmap_contains_bulk(bm, &context, values[i]); + } +} + +int compare_uint32(const void* a, const void* b) { + uint32_t arg1 = *(const uint32_t*)a; + uint32_t arg2 = *(const uint32_t*)b; + if (arg1 < arg2) return -1; + if (arg1 > arg2) return 1; + return 0; +} + +int main(int argc, char* argv[]) { + (void)&read_all_integer_files; // suppress unused warning + + if (argc < 2) { + printf("Usage: %s ...\n", argv[0]); + printf("Example: %s ~/CRoaring/benchmarks/realdata/weather_sept_85/*\n", argv[0]); + return 1; + } + + size_t fields = argc-1; + uint32_t* values[argc]; + size_t count[argc]; + + roaring_bitmap_t* bm = roaring_bitmap_create(); + for (int i = 1; i < argc; i++) { + size_t t_count = 0; + uint32_t* t_values = read_integer_file(argv[i], &t_count); + if (t_count == 0) { + printf("No integers found in %s\n", argv[i]); + return 1; + } + roaring_bitmap_add_many(bm, t_count, t_values); + + shuffle_uint32(t_values, t_count); + + values[i-1] = t_values; + count[i-1] = t_count; + } + //roaring_bitmap_run_optimize(bm); + + printf("Data:\n"); + printf(" cardinality: %"PRIu64"\n", roaring_bitmap_get_cardinality(bm)); + printf(" buckets: %d\n", (int)bm->high_low_container.size); + printf(" range: %"PRIu32"-%"PRIu32"\n", roaring_bitmap_minimum(bm) >> 16, roaring_bitmap_maximum(bm) >> 16); + + const int num_passes = 10; + printf("Cycles/element: %d\n", num_passes); + uint64_t cycles_start, cycles_final; + + printf(" roaring_bitmap_contains:"); + for (int p = 0; p < num_passes; p++) { + bool result[count[p]]; + RDTSC_START(cycles_start); + contains_multi_via_contains(bm, values[p], result, count[p]); + RDTSC_FINAL(cycles_final); + printf(" %10f", (cycles_final - cycles_start) * 1.0 / count[p]); + } + printf("\n"); + + printf(" roaring_bitmap_contains_bulk:"); + for (int p = 0; p < num_passes; p++) { + bool result[count[p]]; + RDTSC_START(cycles_start); + contains_multi_bulk(bm, values[p], result, count[p]); + RDTSC_FINAL(cycles_final); + printf(" %10f", (cycles_final - cycles_start) * 1.0 / count[p]); + } + printf("\n"); + + // sort input array + for (size_t i = 0; i < fields; ++i) { + qsort(values[i], count[i], sizeof(uint32_t), compare_uint32); + } + + printf(" roaring_bitmap_contains with sorted input:"); + for (int p = 0; p < num_passes; p++) { + bool result[count[p]]; + RDTSC_START(cycles_start); + contains_multi_via_contains(bm, values[p], result, count[p]); + RDTSC_FINAL(cycles_final); + printf(" %10f", (cycles_final - cycles_start) * 1.0 / count[p]); + } + printf("\n"); + + printf(" roaring_bitmap_contains_bulk with sorted input:"); + for (int p = 0; p < num_passes; p++) { + bool result[count[p]]; + RDTSC_START(cycles_start); + contains_multi_bulk(bm, values[p], result, count[p]); + RDTSC_FINAL(cycles_final); + printf(" %10f", (cycles_final - cycles_start) * 1.0 / count[p]); + } + printf("\n"); + + roaring_bitmap_free(bm); + for (size_t i = 0; i < fields; ++i) { + free(values[i]); + } + return 0; +} diff --git a/include/roaring/roaring.h b/include/roaring/roaring.h index e82d05b1b..415152445 100644 --- a/include/roaring/roaring.h +++ b/include/roaring/roaring.h @@ -257,9 +257,48 @@ void roaring_bitmap_andnot_inplace(roaring_bitmap_t *r1, */ void roaring_bitmap_free(const roaring_bitmap_t *r); +/** + * A bit of context usable with `roaring_bitmap_*_bulk()` functions + * + * Should be initialized with `{0}` (or `memset()` to all zeros). + * Callers should treat it as an opaque type. + * + * A context may only be used with a single bitmap + * (unless re-initialized to zero), and any modification to a bitmap + * (other than modifications performed with `_bulk()` functions with the context + * passed) will invalidate any contexts associated with that bitmap. + */ +typedef struct roaring_bulk_context_s { + ROARING_CONTAINER_T *container; + int idx; + uint16_t key; + uint8_t typecode; +} roaring_bulk_context_t; + +/** + * Add an item, using context from a previous insert for speed optimization. + * + * `context` will be used to store information between calls to make bulk + * operations faster. `*context` should be zero-initialized before the first + * call to this function. + * + * Modifying the bitmap in any way (other than `-bulk` suffixed functions) + * will invalidate the stored context, calling this function with a non-zero + * context after doing any modification invokes undefined behavior. + * + * In order to exploit this optimization, the caller should call this function + * with values with the same "key" (high 16 bits of the value) consecutively. + */ +void roaring_bitmap_add_bulk(roaring_bitmap_t *r, + roaring_bulk_context_t *context, uint32_t val); + /** * Add value n_args from pointer vals, faster than repeatedly calling * `roaring_bitmap_add()` + * + * In order to exploit this optimization, the caller should attempt to keep + * values with the same "key" (high 16 bits of the value) as consecutive + * elements in `vals` */ void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args, const uint32_t *vals); @@ -335,6 +374,25 @@ bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end); +/** + * Check if an items is present, using context from a previous insert for speed + * optimization. + * + * `context` will be used to store information between calls to make bulk + * operations faster. `*context` should be zero-initialized before the first + * call to this function. + * + * Modifying the bitmap in any way (other than `-bulk` suffixed functions) + * will invalidate the stored context, calling this function with a non-zero + * context after doing any modification invokes undefined behavior. + * + * In order to exploit this optimization, the caller should call this function + * with values with the same "key" (high 16 bits of the value) consecutively. + */ +bool roaring_bitmap_contains_bulk(const roaring_bitmap_t *r, + roaring_bulk_context_t *context, + uint32_t val); + /** * Get the cardinality of the bitmap (number of elements). */ @@ -814,4 +872,3 @@ uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it, using namespace ::roaring::api; #endif #endif - diff --git a/include/roaring/roaring_array.h b/include/roaring/roaring_array.h index fd201662b..24ce7cad2 100644 --- a/include/roaring/roaring_array.h +++ b/include/roaring/roaring_array.h @@ -93,7 +93,9 @@ inline container_t *ra_get_container_at_index( /** * Retrieves the key at index i */ -uint16_t ra_get_key_at_index(const roaring_array_t *ra, uint16_t i); +inline uint16_t ra_get_key_at_index(const roaring_array_t *ra, uint16_t i) { + return ra->keys[i]; +} /** * Add a new key-value pair at index i diff --git a/src/roaring.c b/src/roaring.c index 303f727c1..cc717bb29 100644 --- a/src/roaring.c +++ b/src/roaring.c @@ -87,46 +87,91 @@ bool roaring_bitmap_init_with_capacity(roaring_bitmap_t *r, uint32_t cap) { return ra_init_with_capacity(&r->high_low_container, cap); } +static inline void add_bulk_impl(roaring_bitmap_t *r, + roaring_bulk_context_t *context, + uint32_t val) { + uint16_t key = val >> 16; + if (context->container == NULL || context->key != key) { + uint8_t typecode; + int idx; + context->container = containerptr_roaring_bitmap_add( + r, val, &typecode, &idx); + context->typecode = typecode; + context->idx = idx; + context->key = key; + } else { + // no need to seek the container, it is at hand + // because we already have the container at hand, we can do the + // insertion directly, bypassing the roaring_bitmap_add call + uint8_t new_typecode; + container_t *container2 = container_add( + context->container, val & 0xFFFF, context->typecode, &new_typecode); + if (container2 != context->container) { + // rare instance when we need to change the container type + container_free(context->container, context->typecode); + ra_set_container_at_index(&r->high_low_container, context->idx, + container2, new_typecode); + context->typecode = new_typecode; + context->container = container2; + } + } +} void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args, const uint32_t *vals) { - container_t *container = NULL; // hold value of last container touched - uint8_t typecode = 0; // typecode of last container touched - uint32_t prev = 0; // previous valued inserted - size_t i = 0; // index of value - int containerindex = 0; - if (n_args == 0) return; uint32_t val; - memcpy(&val, vals + i, sizeof(val)); - container = - containerptr_roaring_bitmap_add(r, val, &typecode, &containerindex); - prev = val; - i++; - for (; i < n_args; i++) { - memcpy(&val, vals + i, sizeof(val)); - if (((prev ^ val) >> 16) == - 0) { // no need to seek the container, it is at hand - // because we already have the container at hand, we can do the - // insertion - // automatically, bypassing the roaring_bitmap_add call - uint8_t newtypecode = typecode; - container_t *container2 = - container_add(container, val & 0xFFFF, typecode, &newtypecode); - if (container2 != container) { // rare instance when we need to - // change the container type - container_free(container, typecode); - ra_set_container_at_index(&r->high_low_container, - containerindex, container2, - newtypecode); - typecode = newtypecode; - container = container2; - } - } else { - container = containerptr_roaring_bitmap_add(r, val, &typecode, - &containerindex); + const uint32_t *start = vals; + const uint32_t *end = vals + n_args; + const uint32_t *current_val = start; + + if (n_args == 0) { + return; + } + + uint8_t typecode; + int idx; + container_t *container; + val = *current_val; + container = containerptr_roaring_bitmap_add(r, val, &typecode, &idx); + roaring_bulk_context_t context = {container, idx, (uint16_t)(val >> 16), typecode}; + + for (; current_val != end; current_val++) { + memcpy(&val, current_val, sizeof(val)); + add_bulk_impl(r, &context, val); + } +} + +void roaring_bitmap_add_bulk(roaring_bitmap_t *r, + roaring_bulk_context_t *context, uint32_t val) { + add_bulk_impl(r, context, val); +} + +bool roaring_bitmap_contains_bulk(const roaring_bitmap_t *r, + roaring_bulk_context_t *context, + uint32_t val) +{ + uint16_t key = val >> 16; + if (context->container == NULL || context->key != key) { + int32_t start_idx = -1; + if (context->container != NULL && context->key < key) { + start_idx = context->idx; + } + int idx = ra_advance_until(&r->high_low_container, key, start_idx); + if (idx == ra_get_size(&r->high_low_container)) { + return false; + } + uint8_t typecode; + context->container = ra_get_container_at_index(&r->high_low_container, idx, &typecode); + context->typecode = typecode; + context->idx = idx; + context->key = ra_get_key_at_index(&r->high_low_container, idx); + // ra_advance_until finds the next key >= the target, we found a later container. + if (context->key != key) { + return false; } - prev = val; } + // context is now set up + return container_contains(context->container, val & 0xFFFF, context->typecode); } roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals) { @@ -139,11 +184,12 @@ roaring_bitmap_t *roaring_bitmap_of(size_t n_args, ...) { // todo: could be greatly optimized but we do not expect this call to ever // include long lists roaring_bitmap_t *answer = roaring_bitmap_create(); + roaring_bulk_context_t context = {0}; va_list ap; va_start(ap, n_args); - for (size_t i = 1; i <= n_args; i++) { + for (size_t i = 0; i < n_args; i++) { uint32_t val = va_arg(ap, uint32_t); - roaring_bitmap_add(answer, val); + roaring_bitmap_add_bulk(answer, &context, val); } va_end(ap); return answer; @@ -1413,14 +1459,24 @@ size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf) { const char *bufaschar = (const char *)buf; - if (*(const unsigned char *)buf == CROARING_SERIALIZATION_ARRAY_UINT32) { + if (bufaschar[0] == CROARING_SERIALIZATION_ARRAY_UINT32) { /* This looks like a compressed set of uint32_t elements */ uint32_t card; memcpy(&card, bufaschar + 1, sizeof(uint32_t)); const uint32_t *elems = (const uint32_t *)(bufaschar + 1 + sizeof(uint32_t)); - - return roaring_bitmap_of_ptr(card, elems); + roaring_bitmap_t *bitmap = roaring_bitmap_create(); + if (bitmap == NULL) { + return NULL; + } + roaring_bulk_context_t context = {0}; + for (uint32_t i = 0; i < card; i++) { + // elems may not be aligned, read with memcpy + uint32_t elem; + memcpy(&elem, elems + i, sizeof(elem)); + roaring_bitmap_add_bulk(bitmap, &context, elem); + } + return bitmap; } else if (bufaschar[0] == CROARING_SERIALIZATION_CONTAINER) { return roaring_bitmap_portable_deserialize(bufaschar + 1); } else diff --git a/src/roaring_array.c b/src/roaring_array.c index 2e1b2c671..5151e7062 100644 --- a/src/roaring_array.c +++ b/src/roaring_array.c @@ -319,9 +319,8 @@ extern inline container_t *ra_get_container_at_index( const roaring_array_t *ra, uint16_t i, uint8_t *typecode); -uint16_t ra_get_key_at_index(const roaring_array_t *ra, uint16_t i) { - return ra->keys[i]; -} +extern inline uint16_t ra_get_key_at_index(const roaring_array_t *ra, + uint16_t i); extern inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x); diff --git a/tests/toplevel_unit.c b/tests/toplevel_unit.c index 3a4de4b3b..011cc4011 100644 --- a/tests/toplevel_unit.c +++ b/tests/toplevel_unit.c @@ -57,6 +57,63 @@ DEFINE_TEST(range_contains) { roaring_bitmap_free(bm); } +DEFINE_TEST(contains_bulk) { + roaring_bitmap_t *bm = roaring_bitmap_create(); + roaring_bulk_context_t context = {0}; + + // Ensure checking an empty bitmap is okay + assert_true(!roaring_bitmap_contains_bulk(bm, &context, 0)); + assert_true(!roaring_bitmap_contains_bulk(bm, &context, 0xFFFFFFFF)); + + // create RLE container from [0, 1000] + roaring_bitmap_add_range_closed(bm, 0, 1000); + + // add array container from 77000 + for (uint32_t i = 77000; i < 87000; i+=2) { + roaring_bitmap_add(bm, i); + } + // add bitset container from 132000 + for (uint32_t i = 132000; i < 140000; i+=2) { + roaring_bitmap_add(bm, i); + } + + roaring_bitmap_add(bm, UINT32_MAX); + + uint32_t values[] = { + 1000, // 1 + 1001, // 0 + 77000, // 1 + 77001, // 0 + 77002, // 1 + 1002, // 0 + 132000, // 1 + 132001, // 0 + 132002, // 1 + 77003, // 0 + UINT32_MAX, // 1 + UINT32_MAX - 1, // 0 + }; + size_t test_count = sizeof(values) / sizeof(values[0]); + + for (size_t i = 0; i < test_count; i++) { + roaring_bulk_context_t empty_context = {0}; + bool expected_contains = roaring_bitmap_contains(bm, values[i]); + assert_true(expected_contains == roaring_bitmap_contains_bulk(bm, &empty_context, values[i])); + assert_true(expected_contains == roaring_bitmap_contains_bulk(bm, &context, values[i])); + + if (expected_contains) { + assert_int_equal(context.key, values[i] >> 16); + } + if (context.container != NULL) { + assert_in_range(context.idx, 0, bm->high_low_container.size - 1); + assert_ptr_equal(context.container, bm->high_low_container.containers[context.idx]); + assert_int_equal(context.key, bm->high_low_container.keys[context.idx]); + assert_int_equal(context.typecode, bm->high_low_container.typecodes[context.idx]); + } + } + roaring_bitmap_free(bm); +} + DEFINE_TEST(is_really_empty) { roaring_bitmap_t *bm = roaring_bitmap_create(); assert_true(roaring_bitmap_is_empty(bm)); @@ -94,10 +151,6 @@ void can_copy_empty(bool copy_on_write) { roaring_bitmap_free(bm2); } - - - - bool check_serialization(roaring_bitmap_t *bitmap) { const int32_t size = roaring_bitmap_portable_size_in_bytes(bitmap); char *data = (char *)malloc(size); @@ -109,7 +162,6 @@ bool check_serialization(roaring_bitmap_t *bitmap) { return ret; } - DEFINE_TEST(issue245) { roaring_bitmap_t *bitmap = roaring_bitmap_create(); const uint32_t targetEntries = 2048; @@ -888,6 +940,19 @@ DEFINE_TEST(test_addremove) { roaring_bitmap_free(bm); } +DEFINE_TEST(test_addremove_bulk) { + roaring_bitmap_t *bm = roaring_bitmap_create(); + roaring_bulk_context_t context = {0}; + for (uint32_t value = 33057; value < 147849; value += 8) { + roaring_bitmap_add_bulk(bm, &context, value); + } + for (uint32_t value = 33057; value < 147849; value += 8) { + assert_true(roaring_bitmap_remove_checked(bm, value)); + } + assert_true(roaring_bitmap_is_empty(bm)); + roaring_bitmap_free(bm); +} + DEFINE_TEST(test_addremoverun) { roaring_bitmap_t *bm = roaring_bitmap_create(); for (uint32_t value = 33057; value < 147849; value += 8) { @@ -4182,6 +4247,7 @@ int main() { cmocka_unit_test(issue208), cmocka_unit_test(issue208b), cmocka_unit_test(range_contains), + cmocka_unit_test(contains_bulk), cmocka_unit_test(inplaceorwide), cmocka_unit_test(test_contains_range), cmocka_unit_test(check_range_contains_from_end), @@ -4206,6 +4272,7 @@ int main() { cmocka_unit_test(test_maximum_minimum), cmocka_unit_test(test_stats), cmocka_unit_test(test_addremove), + cmocka_unit_test(test_addremove_bulk), cmocka_unit_test(test_addremoverun), cmocka_unit_test(test_basic_add), cmocka_unit_test(test_remove_withrun), From d56295437c33f3651a7c7d0977d992899da3ea55 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 26 Aug 2022 16:27:05 -0400 Subject: [PATCH 005/162] Preparing new release --- CMakeLists.txt | 6 +++--- include/roaring/roaring_version.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3d386aeab..007d911b6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,10 +17,10 @@ if(CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_C_COMPILER_VERSION VERSIO endif() set(ROARING_LIB_NAME roaring) set(PROJECT_VERSION_MAJOR 0) -set(PROJECT_VERSION_MINOR 6) +set(PROJECT_VERSION_MINOR 7) set(PROJECT_VERSION_PATCH 0) -set(ROARING_LIB_VERSION "0.6.0" CACHE STRING "Roaring library version") -set(ROARING_LIB_SOVERSION "4" CACHE STRING "Roaring library soversion") +set(ROARING_LIB_VERSION "0.7.0" CACHE STRING "Roaring library version") +set(ROARING_LIB_SOVERSION "5" CACHE STRING "Roaring library soversion") option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON) if(NOT ROARING_EXCEPTIONS) diff --git a/include/roaring/roaring_version.h b/include/roaring/roaring_version.h index 8b37799c6..fe719d5f5 100644 --- a/include/roaring/roaring_version.h +++ b/include/roaring/roaring_version.h @@ -1,10 +1,10 @@ // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand #ifndef ROARING_INCLUDE_ROARING_VERSION #define ROARING_INCLUDE_ROARING_VERSION -#define ROARING_VERSION "0.6.0" +#define ROARING_VERSION "0.7.0" enum { ROARING_VERSION_MAJOR = 0, - ROARING_VERSION_MINOR = 6, + ROARING_VERSION_MINOR = 7, ROARING_VERSION_REVISION = 0 }; #endif // ROARING_INCLUDE_ROARING_VERSION From f73706df96d428f1a5e1deb175082b4474c03a14 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 26 Aug 2022 16:31:42 -0400 Subject: [PATCH 006/162] Update vs17-ci.yml --- .github/workflows/vs17-ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/vs17-ci.yml b/.github/workflows/vs17-ci.yml index cc5b8451b..8d6e0d65e 100644 --- a/.github/workflows/vs17-ci.yml +++ b/.github/workflows/vs17-ci.yml @@ -12,7 +12,7 @@ jobs: include: - {gen: Visual Studio 17 2022, arch: Win32} - {gen: Visual Studio 17 2022, arch: x64} - steps: + steps: - name: checkout uses: actions/checkout@v2 - name: Configure @@ -24,4 +24,4 @@ jobs: - name: Run basic tests run: | cd build - ctest -C Release --output-on-failure \ No newline at end of file + ctest -C Release --output-on-failure From 74345633dcb55415e1c12307b90bf70066c0a8c9 Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Mon, 29 Aug 2022 11:14:38 -0400 Subject: [PATCH 007/162] Use a single binary search for array contains_range (#385) * Use a single binary search for array contains_range * Correct logic --- include/roaring/containers/array.h | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/include/roaring/containers/array.h b/include/roaring/containers/array.h index 47bd93185..758703569 100644 --- a/include/roaring/containers/array.h +++ b/include/roaring/containers/array.h @@ -369,18 +369,24 @@ void array_container_offset(const array_container_t *c, //* Check whether a range of values from range_start (included) to range_end (excluded) is present. */ static inline bool array_container_contains_range(const array_container_t *arr, uint32_t range_start, uint32_t range_end) { - + const int32_t range_count = range_end - range_start; const uint16_t rs_included = range_start; const uint16_t re_included = range_end - 1; - const uint16_t *carr = (const uint16_t *) arr->array; - - const int32_t start = advanceUntil(carr, -1, arr->cardinality, rs_included); - const int32_t end = advanceUntil(carr, start - 1, arr->cardinality, re_included); + // Empty range is always included + if (range_count <= 0) { + return true; + } + if (range_count > arr->cardinality) { + return false; + } - return (start < arr->cardinality) && (end < arr->cardinality) - && (((uint16_t)(end - start)) == re_included - rs_included) - && (carr[start] == rs_included) && (carr[end] == re_included); + const int32_t start = binarySearch(arr->array, arr->cardinality, rs_included); + // If this sorted array contains all items in the range: + // * the start item must be found + // * the last item in range range_count must exist, and be the expected end value + return (start >= 0) && (arr->cardinality >= start + range_count) && + (arr->array[start + range_count - 1] == re_included); } /* Returns the smallest value (assumes not empty) */ From 6c25108834be21735655db1bdde317d7133047f7 Mon Sep 17 00:00:00 2001 From: Eric Sproul Date: Wed, 7 Sep 2022 11:46:57 -0400 Subject: [PATCH 008/162] Fix truncated MIT license text (#389) Make the copy in license-comment.h match. --- LICENSE | 3 ++- src/license-comment.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/LICENSE b/LICENSE index 8f567d348..8b0ad80d7 100644 --- a/LICENSE +++ b/LICENSE @@ -232,4 +232,5 @@ PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR -IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER \ No newline at end of file +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/src/license-comment.h b/src/license-comment.h index 43d200549..d50fb11d4 100644 --- a/src/license-comment.h +++ b/src/license-comment.h @@ -46,7 +46,7 @@ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR * IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE + * DEALINGS IN THE SOFTWARE. * * SPDX-License-Identifier: MIT */ From 6bdbd81ec094f4ffc255b4c95673a6c1f573bbb9 Mon Sep 17 00:00:00 2001 From: Greg Sadetsky Date: Wed, 7 Sep 2022 11:47:55 -0400 Subject: [PATCH 009/162] Fix link in Readme (#388) --- tests/testdata/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testdata/README.md b/tests/testdata/README.md index cecbb6981..62e730066 100644 --- a/tests/testdata/README.md +++ b/tests/testdata/README.md @@ -1,4 +1,4 @@ # test data These bitmaps were generated from Java : -https://github.com/RoaringBitmap/RoaringBitmap/blob/master/examples/SerializeToDiskExample.java +https://github.com/RoaringBitmap/RoaringBitmap/blob/master/examples/src/main/java/SerializeToDiskExample.java From 697c014af3f3f8d85d42d99739d6436e5b6bb809 Mon Sep 17 00:00:00 2001 From: Soerian Lieve Date: Wed, 7 Sep 2022 16:49:36 +0100 Subject: [PATCH 010/162] Add doublechecked::Roaring64Map + tests, fix issues found by it (#387) * Use roaring::Roaring inside namespace * Check for invalid range in Roaring64Map::flip * Add doublechecked::Roaring64Map and tests * Skip empty values in Roaring64Map::isSubset * Fix an overflow when run_end == uint16_t max Previously, when run_end == uint16_t max value, this would loop forever. --- cpp/roaring64map.hh | 9 +- src/containers/convert.c | 3 +- tests/cpp_random_unit.cpp | 231 ++++++++++++++- tests/cpp_unit.cpp | 20 ++ tests/roaring64map_checked.hh | 515 ++++++++++++++++++++++++++++++++++ 5 files changed, 762 insertions(+), 16 deletions(-) create mode 100644 tests/roaring64map_checked.hh diff --git a/cpp/roaring64map.hh b/cpp/roaring64map.hh index 65596f838..1e51824c3 100644 --- a/cpp/roaring64map.hh +++ b/cpp/roaring64map.hh @@ -18,10 +18,11 @@ A C++ header for 64-bit Roaring Bitmaps, implemented by way of a map of many #include #include "roaring.hh" -using roaring::Roaring; namespace roaring { +using roaring::Roaring; + class Roaring64MapSetBitForwardIterator; class Roaring64MapSetBitBiDirectionalIterator; @@ -436,6 +437,9 @@ public: */ bool isSubset(const Roaring64Map &r) const { for (const auto &map_entry : roarings) { + if (map_entry.second.isEmpty()) { + continue; + } auto roaring_iter = r.roarings.find(map_entry.first); if (roaring_iter == r.roarings.cend()) return false; @@ -522,6 +526,9 @@ public: * areas outside the range are passed through unchanged. */ void flip(uint64_t range_start, uint64_t range_end) { + if (range_start >= range_end) { + return; + } uint32_t start_high = highBytes(range_start); uint32_t start_low = lowBytes(range_start); uint32_t end_high = highBytes(range_end); diff --git a/src/containers/convert.c b/src/containers/convert.c index b60ac4773..a87babff0 100644 --- a/src/containers/convert.c +++ b/src/containers/convert.c @@ -101,10 +101,11 @@ container_t *convert_to_bitset_or_array_container( for (int rlepos = 0; rlepos < rc->n_runs; ++rlepos) { uint16_t run_start = rc->runs[rlepos].value; uint16_t run_end = run_start + rc->runs[rlepos].length; - for (uint16_t run_value = run_start; run_value <= run_end; + for (uint16_t run_value = run_start; run_value < run_end; ++run_value) { answer->array[answer->cardinality++] = run_value; } + answer->array[answer->cardinality++] = run_end; } assert(card == answer->cardinality); *resulttype = ARRAY_CONTAINER_TYPE; diff --git a/tests/cpp_random_unit.cpp b/tests/cpp_random_unit.cpp index 37000ebf5..0cc29c6e1 100644 --- a/tests/cpp_random_unit.cpp +++ b/tests/cpp_random_unit.cpp @@ -1,9 +1,10 @@ // // cpp_random_unit.cpp // -// The `roaring_checked.hh` variation of the C++ wrapper for roaring bitmaps -// keeps a C++ `std::set` in sync with changes made using the object's methods. -// That class has the same name (Roaring) and is in namespace `doublecheck`. +// The `roaring_checked.hh` / `roaring64map_checked.hh variations of the C++ +// wrapper for roaring bitmaps keep a C++ `std::set` in sync with changes made +// using the object's methods. Those classes have the same name and are in +// namespace `doublecheck`. // // This test generates bitsets with randomized content and runs through the // various operations with them. @@ -19,22 +20,23 @@ // https://www.llvm.org/docs/LibFuzzer.html // -#include #include #include #include #include #include -#include +#include +#include #include #include "roaring_checked.hh" using doublechecked::Roaring; // so `Roaring` means `doublecheck::Roaring` +#include "roaring64map_checked.hh" +using doublechecked::Roaring64Map; #include "test.h" - // The tests can run as long as one wants. Ideally, the sanitizer options // for `address` and `undefined behavior` should be enabled (see the CMake // option ROARING_SANITIZE). @@ -46,12 +48,12 @@ const unsigned long NUM_STEPS = 1000; // const int NUM_ROARS = 30; -// If we generated data fully at random in the uint32_t space, then sets would -// be unlikely to intersect very often. Use a rolling focal point to kind of -// distribute the values near enough to each other to be likely to interfere. -// +// If we generated data fully at random in the uint32_t / uint64_t space, then +// sets would be unlikely to intersect very often. Use a rolling focal point to +// kind of distribute the values near enough to each other to be likely to +// interfere. uint32_t gravity; - +uint64_t gravity64; Roaring make_random_bitset() { Roaring r; @@ -74,7 +76,7 @@ Roaring make_random_bitset() { case 3: { uint32_t start = gravity + (rand() % 50) - 25; - r.flip(start, rand() % 50); + r.flip(start, start + rand() % 50); break; } case 4: { // tests remove(), select(), rank() @@ -97,6 +99,53 @@ Roaring make_random_bitset() { return r; } +Roaring64Map make_random_bitset64() { + Roaring64Map r; + int num_ops = rand() % 100; + for (int i = 0; i < num_ops; ++i) { + switch (rand() % 5) { + case 0: + r.add(gravity64); + break; + + case 1: { + uint64_t start = gravity64 + (rand() % 50) - 25; + r.addRange(start, start + rand() % 100); + break; + } + + case 2: { + uint64_t start = gravity64 + (rand() % 10) - 5; + r.removeRange(start, start + rand() % 5); + break; + } + + case 3: { + uint64_t start = gravity64 + (rand() % 50) - 25; + r.flip(start, start + rand() % 50); + break; + } + + case 4: { // tests remove(), select(), rank() + uint64_t card = r.cardinality(); + if (card != 0) { + uint64_t rnk = rand() % card; + uint64_t element; + assert_true(r.select(rnk, &element)); + assert_int_equal(rnk + 1, r.rank(element)); + r.remove(rnk); + } + break; + } + + default: + assert_true(false); + } + gravity64 += (rand() % 200) - 100; + } + assert_true(r.does_std_set_match_roaring()); + return r; +} DEFINE_TEST(sanity_check_doublechecking) { Roaring r; @@ -120,6 +169,26 @@ DEFINE_TEST(sanity_check_doublechecking) { assert_true(r.does_std_set_match_roaring()); } +DEFINE_TEST(sanity_check_doublechecking_64) { + Roaring64Map r; + while (r.isEmpty()) r = make_random_bitset64(); + + // Pick a random element out of the guaranteed non-empty bitset + // + uint64_t rnk = rand() % r.cardinality(); + uint64_t element; + assert_true(r.select(rnk, &element)); + + // Deliberately get check (the std::set) out of sync to ensure match fails + // + r.check.erase(element); + assert_false(r.does_std_set_match_roaring()); + + // Put the std::set back in sync so the destructor doesn't assert + // + r.check.insert(element); + assert_true(r.does_std_set_match_roaring()); +} DEFINE_TEST(random_doublecheck_test) { // @@ -233,7 +302,7 @@ DEFINE_TEST(random_doublecheck_test) { gravity = element; } uint32_t start = gravity + (rand() % 50) - 25; - out.flip(start, rand() % 50); + out.flip(start, start + rand() % 50); break; } default: @@ -286,13 +355,147 @@ DEFINE_TEST(random_doublecheck_test) { } } +DEFINE_TEST(random_doublecheck_test_64) { + // + // Make a group of bitsets to choose from when performing operations. + // + std::vector roars; + for (int i = 0; i < NUM_ROARS; ++i) + roars.insert(roars.end(), make_random_bitset64()); + + for (unsigned long step = 0; step < NUM_STEPS; ++step) { + // + // Each step modifies the chosen `out` bitset...possibly just + // overwriting it completely. + // + Roaring64Map &out = roars[rand() % NUM_ROARS]; + + // The left and right bitsets may be used as inputs for operations. + // They can be a reference to the same object as out, or can be + // references to each other (which is good to test those conditions). + // + const Roaring64Map &left = roars[rand() % NUM_ROARS]; + const Roaring64Map &right = roars[rand() % NUM_ROARS]; + +#ifdef ROARING_CPP_RANDOM_PRINT_STATUS + printf("[%lu]: %llu %llu %llu\n", step, + static_cast(left.cardinality()), + static_cast(right.cardinality()), + static_cast(out.cardinality())); +#endif + + int op = rand() % 6; + + switch (op) { + case 0: { // AND + out = left & right; + if (&out != &left) assert_true(out.isSubset(left)); + if (&out != &right) assert_true(out.isSubset(right)); + break; + } + + case 1: { // ANDNOT + out = left - right; + if (&out != &left) assert_true(out.isSubset(left)); + break; + } + + case 2: { // OR + out = left | right; + if (&out != &left) assert_true(left.isSubset(out)); + if (&out != &right) assert_true(right.isSubset(out)); + break; + } + + case 3: { // XOR + out = left ^ right; + break; + } + + case 4: { // FASTUNION + const Roaring64Map *inputs[3] = {&out, &left, &right}; + out = Roaring64Map::fastunion( + 3, inputs); // result checked internally + break; + } + + case 5: { // FLIP + uint64_t card = out.cardinality(); + if (card != 0) { // pick gravity point inside set somewhere + uint64_t rnk = rand() % card; + uint64_t element; + assert_true(out.select(rnk, &element)); + assert_int_equal(rnk + 1, out.rank(element)); + gravity64 = element; + } + uint64_t start = gravity64 + (rand() % 50) - 25; + out.flip(start, start + rand() % 50); + break; + } + + default: + assert_true(false); + } + + // Periodically apply a post-processing step to the out bitset + // + int post = rand() % 15; + switch (post) { + case 0: + out.removeRunCompression(); + break; + + case 1: + out.runOptimize(); + break; + + case 2: + out.shrinkToFit(); + break; + + default: + break; + } + + // Explicitly ask if the `std::set` matches the roaring bitmap in out + // + assert_true(out.does_std_set_match_roaring()); + + // Do some arbitrary query operations. No need to test the results, as + // the doublecheck code ensures the `std::set` matches internally. + // + out.isEmpty(); + out.minimum(); + out.maximum(); + for (int i = -50; i < 50; ++i) { + out.contains(gravity64 + i); + } + + // When doing random intersections, the tendency is that sets will + // lose all their data points over time. So empty sets are usually + // re-seeded with more data, but a few get through to test empty cases. + // + if (out.isEmpty() && (rand() % 10 != 0)) out = make_random_bitset64(); + } +} int main() { + uint64_t seed = time(nullptr); + srand(seed); + printf("Seed: %lu\n", seed); + gravity = rand() % 10000; // starting focal point + // Make the 64-bit gravity focus around the edge of a 32-bit value to better + // test edge cases. + gravity64 = (static_cast(rand()) << 32) + rand() % 20000 - 10000; + const struct CMUnitTest tests[] = { cmocka_unit_test(sanity_check_doublechecking), - cmocka_unit_test(random_doublecheck_test)}; + cmocka_unit_test(sanity_check_doublechecking_64), + cmocka_unit_test(random_doublecheck_test), + cmocka_unit_test(random_doublecheck_test_64), + }; return cmocka_run_group_tests(tests, NULL, NULL); } diff --git a/tests/cpp_unit.cpp b/tests/cpp_unit.cpp index f7be3c922..7fe65fadb 100644 --- a/tests/cpp_unit.cpp +++ b/tests/cpp_unit.cpp @@ -1121,6 +1121,24 @@ DEFINE_TEST(test_cpp_flip_64) { } } +DEFINE_TEST(test_cpp_is_subset_64) { + Roaring64Map r1 = Roaring64Map::bitmapOf(1, uint64_t(1)); + Roaring64Map r2 = Roaring64Map::bitmapOf(1, uint64_t(1) << 32); + Roaring64Map r3 = r1 & r2; + assert_true(r3.isSubset(r1)); + assert_true(r3.isSubset(r2)); +} + +DEFINE_TEST(test_cpp_remove_run_compression) { + Roaring r; + uint32_t max = (std::numeric_limits::max)(); + for (uint32_t i = max - 10; i != 0; ++i) { + r.add(i); + } + r.runOptimize(); + r.removeRunCompression(); +} + // Returns true on success, false on exception. bool test64Deserialize(const std::string& filename) { std::ifstream in(TEST_DATA_DIR + filename, std::ios::binary); @@ -1222,6 +1240,8 @@ int main() { cmocka_unit_test(test_issue304), cmocka_unit_test(issue_336), cmocka_unit_test(issue_372), + cmocka_unit_test(test_cpp_is_subset_64), + cmocka_unit_test(test_cpp_remove_run_compression), }; return cmocka_run_group_tests(tests, NULL, NULL); } diff --git a/tests/roaring64map_checked.hh b/tests/roaring64map_checked.hh new file mode 100644 index 000000000..8455d3e9b --- /dev/null +++ b/tests/roaring64map_checked.hh @@ -0,0 +1,515 @@ +// +// roaring64map_checked.hh +// +// PURPOSE: +// +// This file implements a class which maintains a `class Roaring64Map` bitset in +// sync with a C++ `std::set` of 64-bit integers. It asserts if it ever +// notices a difference between the result the roaring bitset gives and the +// result that the set would give. +// +// The doublechecked class is a drop-in replacement for the plain C++ class. +// Hence any codebase that uses that class could act as a test...if it wished. +// +// USAGE: +// +// The checked class has the same name (Roaring64Map) in `namespace doublechecked`. +// So switching between versions could be done easily with a command-line +// `-D` setting for a #define, e.g.: +// +// #ifdef ROARING_DOUBLECHECK_CPP +// #include "roaring64map_checked.hh" +// using doublechecked::Roaring64Map; +// #else +// #include "roaring64map.hh" +// #endif + +#ifndef INCLUDE_ROARING_64_MAP_CHECKED_HH_ +#define INCLUDE_ROARING_64_MAP_CHECKED_HH_ + +#include + +#include +#include +#include +#include + +#include // sorted set, typically a red-black tree implementation +#include + +#define ROARING_CPP_NAMESPACE unchecked // can't be overridden if global +#include "roaring64map.hh" // contains Roaring64Map unchecked class + +namespace doublechecked { // put the checked class in its own namespace + +class Roaring64Map { + public: // members public to allow tests access to them + roaring::Roaring64Map plain; // ordinary Roaring64Map bitset wrapper class + std::set check; // contents kept in sync with `plain` + + public: + Roaring64Map() : plain() { + } + + Roaring64Map(size_t n, const uint32_t *data) : plain (n, data) { + for (size_t i = 0; i < n; ++i) + check.insert(data[i]); + } + + Roaring64Map(const Roaring64Map &r) { + plain = r.plain; + check = r.check; + } + + Roaring64Map(Roaring64Map &&r) noexcept { + plain = std::move(r.plain); + check = std::move(r.check); + } + + // This constructor is unique to doublecheck::Roaring64Map(), for making a + // doublechecked version from an unchecked version. Note that this alone + // is somewhat toothless for checking...e.g. running an operation and then + // accepting that all the values in it were correct doesn't do much. So + // the results of such constructions should be validated another way. + // + Roaring64Map(roaring::Roaring64Map &&other_plain) { + plain = std::move(other_plain); + for (auto value : plain) + check.insert(value); + } + + // Note: This does not call `::Roaring64Map::bitmapOf()` because variadics can't + // forward their parameters. But this is all the code does, so it's fine. + // + static Roaring64Map bitmapOf(size_t n, ...) { + doublechecked::Roaring64Map ans; + va_list vl; + va_start(vl, n); + for (size_t i = 0; i < n; i++) { + ans.add(va_arg(vl, uint32_t)); + } + va_end(vl); + return ans; + } + + void add(uint32_t x) { + plain.add(x); + check.insert(x); + } + void add(uint64_t x) { + plain.add(x); + check.insert(x); + } + + bool addChecked(uint32_t x) { + bool ans = plain.addChecked(x); + bool was_in_set = check.insert(x).second; // insert -> pair + assert(ans == was_in_set); + (void)was_in_set; // unused besides assert + return ans; + } + bool addChecked(uint64_t x) { + bool ans = plain.addChecked(x); + bool was_in_set = check.insert(x).second; // insert -> pair + assert(ans == was_in_set); + (void)was_in_set; // unused besides assert + return ans; + } + + void addRange(const uint64_t x, const uint64_t y) { + if (x != y) { // repeat add_range_closed() cast and bounding logic + addRangeClosed(x, y - 1); + } + } + + void addRangeClosed(uint32_t min, uint32_t max) { + plain.addRangeClosed(min, max); + if (min <= max) { + for (uint32_t val = max; val != min - 1; --val) + check.insert(val); + } + } + void addRangeClosed(uint64_t min, uint64_t max) { + plain.addRangeClosed(min, max); + if (min <= max) { + for (uint64_t val = max; val != min - 1; --val) + check.insert(val); + } + } + + void addMany(size_t n_args, const uint32_t *vals) { + plain.addMany(n_args, vals); + for (size_t i = 0; i < n_args; ++i) + check.insert(vals[i]); + } + void addMany(size_t n_args, const uint64_t *vals) { + plain.addMany(n_args, vals); + for (size_t i = 0; i < n_args; ++i) + check.insert(vals[i]); + } + + void remove(uint32_t x) { + plain.remove(x); + check.erase(x); + } + void remove(uint64_t x) { + plain.remove(x); + check.erase(x); + } + + bool removeChecked(uint32_t x) { + bool ans = plain.removeChecked(x); + size_t num_removed = check.erase(x); + assert(ans == (num_removed == 1)); + (void)num_removed; // unused besides assert + return ans; + } + bool removeChecked(uint64_t x) { + bool ans = plain.removeChecked(x); + size_t num_removed = check.erase(x); + assert(ans == (num_removed == 1)); + (void)num_removed; // unused besides assert + return ans; + } + + void removeRange(const uint64_t x, const uint64_t y) { + if (x != y) { // repeat remove_range_closed() cast and bounding logic + removeRangeClosed(x, y - 1); + } + } + + void removeRangeClosed(uint32_t min, uint32_t max) { + plain.removeRangeClosed(min, max); + if (min <= max) { + check.erase(check.lower_bound(min), check.upper_bound(max)); + } + } + void removeRangeClosed(uint64_t min, uint64_t max) { + plain.removeRangeClosed(min, max); + if (min <= max) { + check.erase(check.lower_bound(min), check.upper_bound(max)); + } + } + + uint64_t maximum() const { + uint64_t ans = plain.maximum(); + assert(check.empty() ? ans == 0 : ans == *check.rbegin()); + return ans; + } + + uint64_t minimum() const { + uint64_t ans = plain.minimum(); + assert(check.empty() + ? ans == (std::numeric_limits::max)() + : ans == *check.begin()); + return ans; + } + + bool contains(uint32_t x) const { + bool ans = plain.contains(x); + assert(ans == (check.find(x) != check.end())); + return ans; + } + bool contains(uint64_t x) const { + bool ans = plain.contains(x); + assert(ans == (check.find(x) != check.end())); + return ans; + } + + + // This method is exclusive to `doublechecked::Roaring64Map` + // + bool does_std_set_match_roaring() const { + auto it_check = check.begin(); + auto it_check_end = check.end(); + auto it_plain = plain.begin(); + auto it_plain_end = plain.end(); + + for (; it_check != it_check_end; ++it_check, ++it_plain) { + if (it_plain == it_plain_end) + return false; + if (*it_check != *it_plain) + return false; + } + return it_plain == plain.end(); // should have visited all values + } + + ~Roaring64Map() { + assert(does_std_set_match_roaring()); // always check on destructor + } + + Roaring64Map &operator=(const Roaring64Map &r) { + plain = r.plain; + check = r.check; + return *this; + } + + Roaring64Map &operator=(Roaring64Map &&r) noexcept { + plain = std::move(r.plain); + check = std::move(r.check); + return *this; + } + + Roaring64Map &operator&=(const Roaring64Map &r) { + plain &= r.plain; + + auto it = check.begin(); + auto r_it = r.check.begin(); + while (it != check.end() && r_it != r.check.end()) { + if (*it < *r_it) { it = check.erase(it); } + else if (*r_it < *it) { ++r_it; } + else { ++it; ++r_it; } // overlapped + } + check.erase(it, check.end()); // erase rest of check not in r.check + + return *this; + } + + Roaring64Map &operator-=(const Roaring64Map &r) { + plain -= r.plain; + + for (auto value : r.check) + check.erase(value); // Note std::remove() is not for ordered sets + + return *this; + } + + Roaring64Map &operator|=(const Roaring64Map &r) { + plain |= r.plain; + + check.insert(r.check.begin(), r.check.end()); // won't add duplicates + + return *this; + } + + Roaring64Map &operator^=(const Roaring64Map &r) { + plain ^= r.plain; + + auto it = check.begin(); + auto it_end = check.end(); + auto r_it = r.check.begin(); + auto r_it_end = r.check.end(); + if (it == it_end) { check = r.check; } // this empty + else if (r_it == r_it_end) { } // r empty + else if (*it > *r.check.rbegin() || *r_it > *check.rbegin()) { + check.insert(r.check.begin(), r.check.end()); // obvious disjoint + } else while (r_it != r_it_end) { // may overlap + if (it == it_end) { check.insert(*r_it); ++r_it; } + else if (*it == *r_it) { // remove overlapping value + it = check.erase(it); // returns *following* iterator + ++r_it; + } + else if (*it < *r_it) { ++it; } // keep value from this + else { check.insert(*r_it); ++r_it; } // add value from r + } + + return *this; + } + + void swap(Roaring64Map &r) { + std::swap(r.plain, plain); + std::swap(r.check, check); + } + + uint64_t cardinality() const { + uint64_t ans = plain.cardinality(); + assert(ans == check.size()); + return ans; + } + + bool isEmpty() const { + bool ans = plain.isEmpty(); + assert(ans == check.empty()); + return ans; + } + + bool isSubset(const Roaring64Map &r) const { // is `this` subset of `r`? + bool ans = plain.isSubset(r.plain); + assert(ans == std::includes( + r.check.begin(), r.check.end(), // containing range + check.begin(), check.end() // range to test for containment + )); + return ans; + } + + bool isStrictSubset(const Roaring64Map &r) const { // is `this` subset of `r`? + bool ans = plain.isStrictSubset(r.plain); + assert(ans == (std::includes( + r.check.begin(), r.check.end(), // containing range + check.begin(), check.end() // range to test for containment + ) && r.check.size() > check.size())); + return ans; + } + + void toUint64Array(uint64_t *ans) const { + plain.toUint64Array(ans); + // TBD: doublecheck + } + + bool operator==(const Roaring64Map &r) const { + bool ans = (plain == r.plain); + assert(ans == (check == r.check)); + return ans; + } + + void flip(uint64_t range_start, uint64_t range_end) { + plain.flip(range_start, range_end); + + if (range_start < range_end) { + auto hint = check.lower_bound(range_start); // *hint stays as >= i + auto it_end = check.end(); + for (uint64_t i = range_start; i < range_end; ++i) { + if (hint == it_end || *hint > i) // i not present, so add + check.insert(hint, i); // leave hint past i + else // *hint == i, must adjust hint and erase + hint = check.erase(hint); // returns *following* iterator + } + } + } + + bool removeRunCompression() { + return plain.removeRunCompression(); + } + + bool runOptimize() { + return plain.runOptimize(); + } + + size_t shrinkToFit() { + return plain.shrinkToFit(); + } + + void iterate(roaring::api::roaring_iterator64 iterator, void *ptr) const { + plain.iterate(iterator, ptr); + assert(does_std_set_match_roaring()); // checks equivalent iteration + } + + bool select(uint64_t rnk, uint64_t *element) const { + bool ans = plain.select(rnk, element); + + auto it = check.begin(); + auto it_end = check.end(); + for (uint64_t i = 0; it != it_end && i < rnk; ++i) + ++it; + assert(ans == (it != it_end) && (ans ? *it == *element : true)); + + return ans; + } + + uint64_t rank(uint64_t x) const { + uint64_t ans = plain.rank(x); + + uint64_t count = 0; + auto it = check.begin(); + auto it_end = check.end(); + for (; it != it_end && *it <= x; ++it) + ++count; + assert(ans == count); + + return ans; + } + + size_t write(char *buf, bool portable = true) const { + return plain.write(buf, portable); + } + + static Roaring64Map read(const char *buf, bool portable = true) { + auto plain = roaring::Roaring64Map::read(buf, portable); + return Roaring64Map(std::move(plain)); + } + + static Roaring64Map readSafe(const char *buf, size_t maxbytes) { + auto plain = roaring::Roaring64Map::readSafe(buf, maxbytes); + return Roaring64Map(std::move(plain)); + } + + size_t getSizeInBytes(bool portable = true) const { + return plain.getSizeInBytes(portable); + } + + Roaring64Map operator&(const Roaring64Map &o) const { + Roaring64Map ans(plain & o.plain); + + Roaring64Map inplace(*this); + assert(ans == (inplace &= o)); // validate against in-place version + + return ans; + } + + Roaring64Map operator-(const Roaring64Map &o) const { + Roaring64Map ans(plain - o.plain); + + Roaring64Map inplace(*this); + assert(ans == (inplace -= o)); // validate against in-place version + + return ans; + } + + Roaring64Map operator|(const Roaring64Map &o) const { + Roaring64Map ans(plain | o.plain); + + Roaring64Map inplace(*this); + assert(ans == (inplace |= o)); // validate against in-place version + + return ans; + } + + Roaring64Map operator^(const Roaring64Map &o) const { + Roaring64Map ans(plain ^ o.plain); + + Roaring64Map inplace(*this); + assert(ans == (inplace ^= o)); // validate against in-place version + + return ans; + } + + void setCopyOnWrite(bool val) { + plain.setCopyOnWrite(val); + } + + void printf() const { + plain.printf(); + } + + std::string toString() const { + return plain.toString(); + } + + bool getCopyOnWrite() const { + return plain.getCopyOnWrite(); + } + + static Roaring64Map fastunion(size_t n, const Roaring64Map **inputs) { + auto plain_inputs = new const roaring::Roaring64Map*[n]; + for (size_t i = 0; i < n; ++i) + plain_inputs[i] = &inputs[i]->plain; + Roaring64Map ans(roaring::Roaring64Map::fastunion(n, plain_inputs)); + delete[] plain_inputs; + + if (n == 0) + assert(ans.cardinality() == 0); + else { + Roaring64Map temp = *inputs[0]; + for (size_t i = 1; i < n; ++i) + temp |= *inputs[i]; + assert(temp == ans); + } + + return ans; + } + + typedef roaring::Roaring64MapSetBitForwardIterator const_iterator; + + const_iterator begin() const { + return roaring::Roaring64MapSetBitForwardIterator(plain); + } + + const_iterator &end() const { + static roaring::Roaring64MapSetBitForwardIterator e(plain, true); + return e; + } +}; + +} // end `namespace doublechecked` + +#endif // INCLUDE_ROARING_64_MAP_CHECKED_HH_ From b9e137425be17a55eea0635c87f01acefcdeeda1 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Wed, 7 Sep 2022 11:50:47 -0400 Subject: [PATCH 011/162] Preparing release --- CMakeLists.txt | 4 ++-- include/roaring/roaring_version.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 007d911b6..7320d9848 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,8 +18,8 @@ endif() set(ROARING_LIB_NAME roaring) set(PROJECT_VERSION_MAJOR 0) set(PROJECT_VERSION_MINOR 7) -set(PROJECT_VERSION_PATCH 0) -set(ROARING_LIB_VERSION "0.7.0" CACHE STRING "Roaring library version") +set(PROJECT_VERSION_PATCH 1) +set(ROARING_LIB_VERSION "0.7.1" CACHE STRING "Roaring library version") set(ROARING_LIB_SOVERSION "5" CACHE STRING "Roaring library soversion") option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON) diff --git a/include/roaring/roaring_version.h b/include/roaring/roaring_version.h index fe719d5f5..33cff59fa 100644 --- a/include/roaring/roaring_version.h +++ b/include/roaring/roaring_version.h @@ -1,10 +1,10 @@ // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand #ifndef ROARING_INCLUDE_ROARING_VERSION #define ROARING_INCLUDE_ROARING_VERSION -#define ROARING_VERSION "0.7.0" +#define ROARING_VERSION "0.7.1" enum { ROARING_VERSION_MAJOR = 0, ROARING_VERSION_MINOR = 7, - ROARING_VERSION_REVISION = 0 + ROARING_VERSION_REVISION = 1 }; #endif // ROARING_INCLUDE_ROARING_VERSION From c68ae9dd4f5dfc9fe94d1023f7cdeb61a4db0aee Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Wed, 26 Oct 2022 16:51:51 -0400 Subject: [PATCH 012/162] Update README.md --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 4589a8cbd..50f2b24d2 100644 --- a/README.md +++ b/README.md @@ -651,10 +651,11 @@ https://groups.google.com/forum/#!forum/roaring-bitmaps # References about Roaring -- Daniel Lemire, Owen Kaser, Nathan Kurz, Luca Deri, Chris O'Hara, François Saint-Jacques, Gregory Ssi-Yan-Kai, Roaring Bitmaps: Implementation of an Optimized Software Library, Software: Practice and Experience (to appear) [arXiv:1709.07821](https://arxiv.org/abs/1709.07821) +- Daniel Lemire, Owen Kaser, Nathan Kurz, Luca Deri, Chris O'Hara, François Saint-Jacques, Gregory Ssi-Yan-Kai, Roaring Bitmaps: Implementation of an Optimized Software Library, Software: Practice and Experience Volume 48, Issue 4 April 2018 Pages 867-895 + [arXiv:1709.07821](https://arxiv.org/abs/1709.07821) - Samy Chambi, Daniel Lemire, Owen Kaser, Robert Godin, Better bitmap performance with Roaring bitmaps, Software: Practice and Experience Volume 46, Issue 5, pages 709–719, May 2016 -http://arxiv.org/abs/1402.6407 This paper used data from http://lemire.me/data/realroaring2014.html -- Daniel Lemire, Gregory Ssi-Yan-Kai, Owen Kaser, Consistently faster and smaller compressed bitmaps with Roaring, Software: Practice and Experience (accepted in 2016, to appear) http://arxiv.org/abs/1603.06549 +http://arxiv.org/abs/1402.6407 +- Daniel Lemire, Gregory Ssi-Yan-Kai, Owen Kaser, Consistently faster and smaller compressed bitmaps with Roaring, Software: Practice and Experience Volume 46, Issue 11, pages 1547-1569, November 2016 http://arxiv.org/abs/1603.06549 - Samy Chambi, Daniel Lemire, Robert Godin, Kamel Boukhalfa, Charles Allen, Fangjin Yang, Optimizing Druid with Roaring bitmaps, IDEAS 2016, 2016. http://r-libre.teluq.ca/950/ From a591cf3854713dfe1cb4a2bf159e413b92d95ca6 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Thu, 27 Oct 2022 08:41:57 -0400 Subject: [PATCH 013/162] Update README.md --- README.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 50f2b24d2..705ddb790 100644 --- a/README.md +++ b/README.md @@ -651,11 +651,9 @@ https://groups.google.com/forum/#!forum/roaring-bitmaps # References about Roaring -- Daniel Lemire, Owen Kaser, Nathan Kurz, Luca Deri, Chris O'Hara, François Saint-Jacques, Gregory Ssi-Yan-Kai, Roaring Bitmaps: Implementation of an Optimized Software Library, Software: Practice and Experience Volume 48, Issue 4 April 2018 Pages 867-895 - [arXiv:1709.07821](https://arxiv.org/abs/1709.07821) +- Daniel Lemire, Owen Kaser, Nathan Kurz, Luca Deri, Chris O'Hara, François Saint-Jacques, Gregory Ssi-Yan-Kai, Roaring Bitmaps: Implementation of an Optimized Software Library, Software: Practice and Experience Volume 48, Issue 4 April 2018 Pages 867-895 [arXiv:1709.07821](https://arxiv.org/abs/1709.07821) - Samy Chambi, Daniel Lemire, Owen Kaser, Robert Godin, Better bitmap performance with Roaring bitmaps, -Software: Practice and Experience Volume 46, Issue 5, pages 709–719, May 2016 -http://arxiv.org/abs/1402.6407 -- Daniel Lemire, Gregory Ssi-Yan-Kai, Owen Kaser, Consistently faster and smaller compressed bitmaps with Roaring, Software: Practice and Experience Volume 46, Issue 11, pages 1547-1569, November 2016 http://arxiv.org/abs/1603.06549 +Software: Practice and Experience Volume 46, Issue 5, pages 709–719, May 2016 [arXiv:1402.6407](http://arxiv.org/abs/1402.6407) +- Daniel Lemire, Gregory Ssi-Yan-Kai, Owen Kaser, Consistently faster and smaller compressed bitmaps with Roaring, Software: Practice and Experience Volume 46, Issue 11, pages 1547-1569, November 2016 [arXiv:1603.06549](http://arxiv.org/abs/1603.06549) - Samy Chambi, Daniel Lemire, Robert Godin, Kamel Boukhalfa, Charles Allen, Fangjin Yang, Optimizing Druid with Roaring bitmaps, IDEAS 2016, 2016. http://r-libre.teluq.ca/950/ From 6ef3a39f6db9ae0c9644918e8f8ae839090e8538 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 4 Nov 2022 11:00:45 -0400 Subject: [PATCH 014/162] Fix for issue 394. (#395) --- amalgamation.sh | 4 ++-- include/roaring/isadetection.h | 6 ++++++ include/roaring/portability.h | 12 ++++++++++-- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/amalgamation.sh b/amalgamation.sh index 1281384ef..849f7327d 100755 --- a/amalgamation.sh +++ b/amalgamation.sh @@ -43,12 +43,12 @@ $SCRIPTPATH/cpp/roaring64map.hh " # internal .h files => These are used in the implementation but aren't part of -# the API. They're all embedded at the head of the amalgamated C file, and +# the API. They are all embedded at the head of the amalgamated C file, and # need to be in this order. # ALL_PRIVATE_H=" -$SCRIPTPATH/include/roaring/isadetection.h $SCRIPTPATH/include/roaring/portability.h +$SCRIPTPATH/include/roaring/isadetection.h $SCRIPTPATH/include/roaring/containers/perfparameters.h $SCRIPTPATH/include/roaring/containers/container_defs.h $SCRIPTPATH/include/roaring/array_util.h diff --git a/include/roaring/isadetection.h b/include/roaring/isadetection.h index cfea20070..732903756 100644 --- a/include/roaring/isadetection.h +++ b/include/roaring/isadetection.h @@ -46,9 +46,15 @@ POSSIBILITY OF SUCH DAMAGE. #ifndef ROARING_ISADETECTION_H #define ROARING_ISADETECTION_H +// isadetection.h does not define any macro (except for ROARING_ISADETECTION_H). + #include #include #include + +// We need portability.h to be included first, see +// https://github.com/RoaringBitmap/CRoaring/issues/394 +#include #if CROARING_REGULAR_VISUAL_STUDIO #include #elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID) diff --git a/include/roaring/portability.h b/include/roaring/portability.h index a72dcf6bc..3f43e97fe 100644 --- a/include/roaring/portability.h +++ b/include/roaring/portability.h @@ -46,7 +46,6 @@ #define _XOPEN_SOURCE 700 #endif // !(defined(_XOPEN_SOURCE)) || (_XOPEN_SOURCE < 700) -#include "isadetection.h" #include #include #include // will provide posix_memalign with _POSIX_C_SOURCE as defined above @@ -327,4 +326,13 @@ static inline int hamming(uint64_t x) { #define CROARING_UNTARGET_REGION #endif -#endif /* INCLUDE_PORTABILITY_H_ */ + +// We need portability.h to be included first, +// but we also always want isadetection.h to be +// included (right after). +// See https://github.com/RoaringBitmap/CRoaring/issues/394 +// There is no scenario where we want portability.h to +// be included, but not isadetection.h: the latter is a +// strict requirement. +#include // include it last! +#endif /* INCLUDE_PORTABILITY_H_ */ \ No newline at end of file From e6106589a86f7f9dcbef560ffd285f8534bacb3d Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 4 Nov 2022 11:02:15 -0400 Subject: [PATCH 015/162] Preparing release. --- CMakeLists.txt | 4 ++-- include/roaring/roaring_version.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7320d9848..ecd15f912 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,8 +18,8 @@ endif() set(ROARING_LIB_NAME roaring) set(PROJECT_VERSION_MAJOR 0) set(PROJECT_VERSION_MINOR 7) -set(PROJECT_VERSION_PATCH 1) -set(ROARING_LIB_VERSION "0.7.1" CACHE STRING "Roaring library version") +set(PROJECT_VERSION_PATCH 2) +set(ROARING_LIB_VERSION "0.7.2" CACHE STRING "Roaring library version") set(ROARING_LIB_SOVERSION "5" CACHE STRING "Roaring library soversion") option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON) diff --git a/include/roaring/roaring_version.h b/include/roaring/roaring_version.h index 33cff59fa..12f856758 100644 --- a/include/roaring/roaring_version.h +++ b/include/roaring/roaring_version.h @@ -1,10 +1,10 @@ // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand #ifndef ROARING_INCLUDE_ROARING_VERSION #define ROARING_INCLUDE_ROARING_VERSION -#define ROARING_VERSION "0.7.1" +#define ROARING_VERSION "0.7.2" enum { ROARING_VERSION_MAJOR = 0, ROARING_VERSION_MINOR = 7, - ROARING_VERSION_REVISION = 1 + ROARING_VERSION_REVISION = 2 }; #endif // ROARING_INCLUDE_ROARING_VERSION From 6e67f976626196c1aa0d435aaccfe070b99457ce Mon Sep 17 00:00:00 2001 From: Corey Kosak Date: Sun, 6 Nov 2022 14:09:56 -0500 Subject: [PATCH 016/162] Unit test cleanups: change assert -> assert_true, a couple other small things (#396) --- tests/toplevel_unit.c | 190 +++++++++++++++++++++--------------------- 1 file changed, 95 insertions(+), 95 deletions(-) diff --git a/tests/toplevel_unit.c b/tests/toplevel_unit.c index 011cc4011..05052ff31 100644 --- a/tests/toplevel_unit.c +++ b/tests/toplevel_unit.c @@ -31,7 +31,7 @@ static inline uint32_t minimum_uint32(uint32_t a, uint32_t b) { } // arrays expected to both be sorted. -static int array_equals(uint32_t *a1, int32_t size1, uint32_t *a2, +static int array_equals(const uint32_t *a1, int32_t size1, const uint32_t *a2, int32_t size2) { if (size1 != size2) return 0; for (int i = 0; i < size1; ++i) { @@ -135,24 +135,24 @@ void can_copy_empty(bool copy_on_write) { roaring_bitmap_t *bm1 = roaring_bitmap_create(); roaring_bitmap_set_copy_on_write(bm1, copy_on_write); roaring_bitmap_t *bm2 = roaring_bitmap_copy(bm1); - assert(roaring_bitmap_get_cardinality(bm1) == 0); - assert(roaring_bitmap_get_cardinality(bm2) == 0); - assert(roaring_bitmap_is_empty(bm1)); - assert(roaring_bitmap_is_empty(bm2)); + assert_true(roaring_bitmap_get_cardinality(bm1) == 0); + assert_true(roaring_bitmap_get_cardinality(bm2) == 0); + assert_true(roaring_bitmap_is_empty(bm1)); + assert_true(roaring_bitmap_is_empty(bm2)); roaring_bitmap_add(bm1, 3); roaring_bitmap_add(bm2, 5); - assert(roaring_bitmap_get_cardinality(bm1) == 1); - assert(roaring_bitmap_get_cardinality(bm2) == 1); - assert(roaring_bitmap_contains(bm1,3)); - assert(roaring_bitmap_contains(bm2,5)); - assert(!roaring_bitmap_contains(bm2,3)); - assert(!roaring_bitmap_contains(bm1,5)); + assert_true(roaring_bitmap_get_cardinality(bm1) == 1); + assert_true(roaring_bitmap_get_cardinality(bm2) == 1); + assert_true(roaring_bitmap_contains(bm1,3)); + assert_true(roaring_bitmap_contains(bm2,5)); + assert_true(!roaring_bitmap_contains(bm2,3)); + assert_true(!roaring_bitmap_contains(bm1,5)); roaring_bitmap_free(bm1); roaring_bitmap_free(bm2); } bool check_serialization(roaring_bitmap_t *bitmap) { - const int32_t size = roaring_bitmap_portable_size_in_bytes(bitmap); + const size_t size = roaring_bitmap_portable_size_in_bytes(bitmap); char *data = (char *)malloc(size); roaring_bitmap_portable_serialize(bitmap, data); roaring_bitmap_t *deserializedBitmap = roaring_bitmap_portable_deserialize(data); @@ -197,7 +197,7 @@ DEFINE_TEST(issue208) { roaring_bitmap_add(r, i); } uint32_t rank = roaring_bitmap_rank(r, 63); - assert(rank == 32); + assert_true(rank == 32); roaring_bitmap_free(r); } @@ -212,7 +212,7 @@ DEFINE_TEST(issue208b) { for (uint32_t i = 65536 - 64; i < 65536; i++) { uint32_t expected = i - (65536 - 64) + 8196 / 2 + 1; uint32_t rank = roaring_bitmap_rank(r, i); - assert(rank == expected); + assert_true(rank == expected); } roaring_bitmap_free(r); } @@ -245,12 +245,12 @@ void can_add_to_copies(bool copy_on_write) { roaring_bitmap_set_copy_on_write(bm1, copy_on_write); roaring_bitmap_add(bm1, 3); roaring_bitmap_t *bm2 = roaring_bitmap_copy(bm1); - assert(roaring_bitmap_get_cardinality(bm1) == 1); - assert(roaring_bitmap_get_cardinality(bm2) == 1); + assert_true(roaring_bitmap_get_cardinality(bm1) == 1); + assert_true(roaring_bitmap_get_cardinality(bm2) == 1); roaring_bitmap_add(bm2, 4); roaring_bitmap_add(bm1, 5); - assert(roaring_bitmap_get_cardinality(bm1) == 2); - assert(roaring_bitmap_get_cardinality(bm2) == 2); + assert_true(roaring_bitmap_get_cardinality(bm1) == 2); + assert_true(roaring_bitmap_get_cardinality(bm2) == 2); roaring_bitmap_free(bm1); roaring_bitmap_free(bm2); } @@ -271,7 +271,7 @@ void convert_all_containers(roaring_bitmap_t* r, uint8_t dst_type) { r->high_low_container.containers[i] = dst_container; r->high_low_container.typecodes[i] = ARRAY_CONTAINER_TYPE; } - assert(r->high_low_container.typecodes[i] == ARRAY_CONTAINER_TYPE); + assert_true(r->high_low_container.typecodes[i] == ARRAY_CONTAINER_TYPE); // second step: convert ARRAY to dst_type if (dst_type == BITSET_CONTAINER_TYPE) { @@ -287,7 +287,7 @@ void convert_all_containers(roaring_bitmap_t* r, uint8_t dst_type) { r->high_low_container.containers[i] = dst_container; r->high_low_container.typecodes[i] = RUN_CONTAINER_TYPE; } - assert(r->high_low_container.typecodes[i] == dst_type); + assert_true(r->high_low_container.typecodes[i] == dst_type); } } @@ -519,7 +519,7 @@ DEFINE_TEST(check_iterate_to_end) { roaring_init_iterator(r1, &iterator); uint64_t count = 0; while(iterator.has_value) { - assert(iterator.current_value + (s - count) == bignumber); + assert_true(iterator.current_value + (s - count) == bignumber); count++; roaring_advance_uint32_iterator(&iterator); } @@ -539,7 +539,7 @@ DEFINE_TEST(check_iterate_to_beginning) { uint64_t count = 0; while(iterator.has_value) { count++; - assert(iterator.current_value + count == bignumber); + assert_true(iterator.current_value + count == bignumber); roaring_previous_uint32_iterator(&iterator); } assert_true(count == s); @@ -645,14 +645,14 @@ void test_example(bool copy_on_write) { // we can also go in reverse and go from arrays to bitmaps uint64_t card1 = roaring_bitmap_get_cardinality(r1); uint32_t *arr1 = (uint32_t *)malloc(card1 * sizeof(uint32_t)); - assert(arr1 != NULL); + assert_true(arr1 != NULL); roaring_bitmap_to_uint32_array(r1, arr1); // we can go from arrays to bitmaps from "offset" by "limit" size_t offset = 100; size_t limit = 1000; uint32_t *arr3 = (uint32_t *)malloc(limit * sizeof(uint32_t)); - assert(arr3 != NULL); + assert_true(arr3 != NULL); roaring_bitmap_range_uint32_array(r1, offset, limit, arr3); free(arr3); @@ -721,10 +721,10 @@ void test_example(bool copy_on_write) { roaring_bitmap_free(t); // we can also check whether there is a bitmap at a memory location without reading it size_t sizeofbitmap = roaring_bitmap_portable_deserialize_size(serializedbytes,expectedsize); - assert(sizeofbitmap == expectedsize); // sizeofbitmap would be zero if no bitmap were found + assert_true(sizeofbitmap == expectedsize); // sizeofbitmap would be zero if no bitmap were found // we can also read the bitmap "safely" by specifying a byte size limit: t = roaring_bitmap_portable_deserialize_safe(serializedbytes,expectedsize); - assert(roaring_bitmap_equals(r1, t)); // what we recover is equal + assert_true(roaring_bitmap_equals(r1, t)); // what we recover is equal roaring_bitmap_free(t); free(serializedbytes); @@ -904,19 +904,19 @@ void can_remove_from_copies(bool copy_on_write) { roaring_bitmap_set_copy_on_write(bm1, copy_on_write); roaring_bitmap_add(bm1, 3); roaring_bitmap_t *bm2 = roaring_bitmap_copy(bm1); - assert(roaring_bitmap_get_cardinality(bm1) == 1); - assert(roaring_bitmap_get_cardinality(bm2) == 1); + assert_true(roaring_bitmap_get_cardinality(bm1) == 1); + assert_true(roaring_bitmap_get_cardinality(bm2) == 1); roaring_bitmap_add(bm2, 4); roaring_bitmap_add(bm1, 5); - assert(roaring_bitmap_get_cardinality(bm1) == 2); - assert(roaring_bitmap_get_cardinality(bm2) == 2); + assert_true(roaring_bitmap_get_cardinality(bm1) == 2); + assert_true(roaring_bitmap_get_cardinality(bm2) == 2); roaring_bitmap_remove(bm1, 5); - assert(roaring_bitmap_get_cardinality(bm1) == 1); + assert_true(roaring_bitmap_get_cardinality(bm1) == 1); roaring_bitmap_remove(bm1, 4); - assert(roaring_bitmap_get_cardinality(bm1) == 1); - assert(roaring_bitmap_get_cardinality(bm2) == 2); + assert_true(roaring_bitmap_get_cardinality(bm1) == 1); + assert_true(roaring_bitmap_get_cardinality(bm2) == 2); roaring_bitmap_remove(bm2, 4); - assert(roaring_bitmap_get_cardinality(bm2) == 1); + assert_true(roaring_bitmap_get_cardinality(bm2) == 1); roaring_bitmap_free(bm1); roaring_bitmap_free(bm2); } @@ -1265,8 +1265,8 @@ DEFINE_TEST(test_portable_serialize) { arr2 = (uint32_t *)malloc(card2 * sizeof(uint32_t)); roaring_bitmap_to_uint32_array(r2, arr2); - assert(array_equals(arr1, card1, arr2, card2)); - assert(roaring_bitmap_equals(r1, r2)); + assert_true(array_equals(arr1, card1, arr2, card2)); + assert_true(roaring_bitmap_equals(r1, r2)); free(arr1); free(arr2); free(serialized); @@ -1531,9 +1531,9 @@ DEFINE_TEST(test_intersection_array_x_array) { DEFINE_TEST(test_intersection_array_x_array_inplace) { roaring_bitmap_t *r1 = roaring_bitmap_create(); - assert(r1); + assert_true(r1); roaring_bitmap_t *r2 = roaring_bitmap_create(); - assert(r2); + assert_true(r2); for (uint32_t i = 0; i < 100; ++i) { roaring_bitmap_add(r1, 2 * i); @@ -1554,9 +1554,9 @@ DEFINE_TEST(test_intersection_array_x_array_inplace) { DEFINE_TEST(test_intersection_bitset_x_bitset) { roaring_bitmap_t *r1 = roaring_bitmap_create(); - assert(r1); + assert_true(r1); roaring_bitmap_t *r2 = roaring_bitmap_create(); - assert(r2); + assert_true(r2); for (uint32_t i = 0; i < 20000; ++i) { roaring_bitmap_add(r1, 2 * i); @@ -1586,9 +1586,9 @@ DEFINE_TEST(test_intersection_bitset_x_bitset) { DEFINE_TEST(test_intersection_bitset_x_bitset_inplace) { roaring_bitmap_t *r1 = roaring_bitmap_create(); - assert(r1); + assert_true(r1); roaring_bitmap_t *r2 = roaring_bitmap_create(); - assert(r2); + assert_true(r2); for (uint32_t i = 0; i < 20000; ++i) { roaring_bitmap_add(r1, 2 * i); @@ -1612,10 +1612,10 @@ DEFINE_TEST(test_intersection_bitset_x_bitset_inplace) { void test_union(bool copy_on_write) { roaring_bitmap_t *r1 = roaring_bitmap_create(); roaring_bitmap_set_copy_on_write(r1, copy_on_write); - assert(r1); + assert_true(r1); roaring_bitmap_t *r2 = roaring_bitmap_create(); roaring_bitmap_set_copy_on_write(r2, copy_on_write); - assert(r2); + assert_true(r2); for (uint32_t i = 0; i < 100; ++i) { roaring_bitmap_add(r1, 2 * i); @@ -1652,8 +1652,8 @@ static roaring_bitmap_t *gen_bitmap(double start_density, for (int i = 0; i < universe_size; i += run_length) { d = start_density + i * density_gradient; double r = our_rand() / (double)OUR_RAND_MAX; - assert(r <= 1.0); - assert(r >= 0); + assert_true(r <= 1.0); + assert_true(r >= 0); if (r < d && !(i >= blank_range_start && i < blank_range_end)) for (int j = 0; j < run_length; ++j) roaring_bitmap_add(ans, i + j); } @@ -2611,7 +2611,7 @@ DEFINE_TEST(test_bitset_to_run) { } roaring_bitmap_t *r1 = make_roaring_from_array(ans, ans_ctr); - assert(roaring_bitmap_run_optimize(r1)); + assert_true(roaring_bitmap_run_optimize(r1)); uint64_t card = roaring_bitmap_get_cardinality(r1); uint32_t *arr = (uint32_t *)malloc(card * sizeof(uint32_t)); @@ -3197,8 +3197,8 @@ DEFINE_TEST(test_rand_flips) { double f3 = our_rand() / (double)OUR_RAND_MAX; int pos = (int)(f1 * f2 * f3 * range); // denser at the start, sparser at end - assert(pos < range); - assert(pos >= 0); + assert_true(pos < range); + assert_true(pos >= 0); roaring_bitmap_add(r, pos); input[pos] = 1; } @@ -3253,8 +3253,8 @@ DEFINE_TEST(test_inplace_rand_flips) { double f3 = our_rand() / (double)OUR_RAND_MAX; int pos = (int)(f1 * f2 * f3 * range); // denser at the start, sparser at end - assert(pos < range); - assert(pos >= 0); + assert_true(pos < range); + assert_true(pos >= 0); roaring_bitmap_add(r, pos); input[pos] = 1; } @@ -3340,7 +3340,7 @@ DEFINE_TEST(select_test) { double f3 = our_rand() / (double)OUR_RAND_MAX; uint32_t pos = (uint32_t)(f1 * f2 * f3 * range); // denser at the start, sparser at end - assert(pos < range); + assert_true(pos < range); roaring_bitmap_add(r, pos); input[pos] = 1; } @@ -3646,7 +3646,7 @@ void test_iterator_generate_data(uint32_t **values_out, uint32_t *count_out) { // max allowed value values[count++] = UINT32_MAX; - assert(count <= capacity); + assert_true(count <= capacity); *values_out = values; *count_out = count; } @@ -3660,8 +3660,8 @@ void read_compare(roaring_bitmap_t* r, const uint32_t* ref_values, uint32_t ref_ uint32_t* buffer = (uint32_t*)malloc( sizeof(uint32_t) * (step == UINT32_MAX ? 65536 : step)); while (ref_count > 0) { - assert(iter->has_value == true); - assert(iter->current_value == ref_values[0]); + assert_true(iter->has_value == true); + assert_true(iter->current_value == ref_values[0]); uint32_t num_ask = step; if (step == UINT32_MAX) { @@ -3676,20 +3676,20 @@ void read_compare(roaring_bitmap_t* r, const uint32_t* ref_values, uint32_t ref_ } uint32_t num_got = roaring_read_uint32_iterator(iter, buffer, num_ask); - assert(num_got == minimum_uint32(num_ask, ref_count)); + assert_true(num_got == minimum_uint32(num_ask, ref_count)); for (uint32_t i = 0; i < num_got; i++) { - assert(ref_values[i] == buffer[i]); + assert_true(ref_values[i] == buffer[i]); } ref_values += num_got; ref_count -= num_got; } - assert(iter->has_value == false); - assert(iter->current_value == UINT32_MAX); + assert_true(iter->has_value == false); + assert_true(iter->current_value == UINT32_MAX); - assert(roaring_read_uint32_iterator(iter, buffer, step) == 0); - assert(iter->has_value == false); - assert(iter->current_value == UINT32_MAX); + assert_true(roaring_read_uint32_iterator(iter, buffer, step) == 0); + assert_true(iter->has_value == false); + assert_true(iter->current_value == UINT32_MAX); free(buffer); roaring_free_uint32_iterator(iter); @@ -3750,13 +3750,13 @@ void test_previous_iterator(uint8_t type) { uint32_t count = 0; do { - assert(iterator.has_value); + assert_true(iterator.has_value); ++count; - assert((int64_t)ref_count - (int64_t)count >= 0); // sanity check - assert(ref_values[ref_count - count] == iterator.current_value); + assert_true((int64_t)ref_count - (int64_t)count >= 0); // sanity check + assert_true(ref_values[ref_count - count] == iterator.current_value); } while (roaring_previous_uint32_iterator(&iterator)); - assert(ref_count == count); + assert_true(ref_count == count); roaring_bitmap_free(r); free(ref_values); @@ -3795,11 +3795,11 @@ void test_iterator_reuse_retry_count(int retry_count){ } // sanity checks - assert(roaring_bitmap_contains(with_edges, 0)); - assert(roaring_bitmap_contains(with_edges, UINT32_MAX)); - assert(!roaring_bitmap_contains(without_edges, 0)); - assert(!roaring_bitmap_contains(without_edges, UINT32_MAX)); - assert(roaring_bitmap_get_cardinality(with_edges) - 2 == roaring_bitmap_get_cardinality(without_edges)); + assert_true(roaring_bitmap_contains(with_edges, 0)); + assert_true(roaring_bitmap_contains(with_edges, UINT32_MAX)); + assert_true(!roaring_bitmap_contains(without_edges, 0)); + assert_true(!roaring_bitmap_contains(without_edges, UINT32_MAX)); + assert_true(roaring_bitmap_get_cardinality(with_edges) - 2 == roaring_bitmap_get_cardinality(without_edges)); const roaring_bitmap_t* bitmaps[] = {with_edges, without_edges}; int num_bitmaps = sizeof(bitmaps) / sizeof(bitmaps[0]); @@ -3807,7 +3807,7 @@ void test_iterator_reuse_retry_count(int retry_count){ for (int i = 0; i < num_bitmaps; ++i){ roaring_uint32_iterator_t iterator; roaring_init_iterator(bitmaps[i], &iterator); - assert(iterator.has_value); + assert_true(iterator.has_value); uint32_t first_value = iterator.current_value; uint32_t count = 0; @@ -3815,7 +3815,7 @@ void test_iterator_reuse_retry_count(int retry_count){ count++; roaring_advance_uint32_iterator(&iterator); } - assert(count == roaring_bitmap_get_cardinality(bitmaps[i])); + assert_true(count == roaring_bitmap_get_cardinality(bitmaps[i])); // Test advancing the iterator more times than necessary for (int retry = 0; retry < retry_count; ++retry) { @@ -3829,7 +3829,7 @@ void test_iterator_reuse_retry_count(int retry_count){ count++; roaring_previous_uint32_iterator(&iterator); } - assert(count == roaring_bitmap_get_cardinality(bitmaps[i])); + assert_true(count == roaring_bitmap_get_cardinality(bitmaps[i])); // Test decrement the iterator more times than necessary for (int retry = 0; retry < retry_count; ++retry) { @@ -3837,8 +3837,8 @@ void test_iterator_reuse_retry_count(int retry_count){ } roaring_advance_uint32_iterator(&iterator); - assert(iterator.has_value); - assert(first_value == iterator.current_value); + assert_true(iterator.has_value); + assert_true(first_value == iterator.current_value); } @@ -4006,8 +4006,8 @@ DEFINE_TEST(test_add_range) { roaring_bitmap_set_copy_on_write(r1, true); roaring_bitmap_t *r2 = roaring_bitmap_copy(r1); roaring_bitmap_add_range(r1, 0, 1); - assert(roaring_bitmap_get_cardinality(r1) == 1); - assert(roaring_bitmap_get_cardinality(r2) == 1); + assert_true(roaring_bitmap_get_cardinality(r1) == 1); + assert_true(roaring_bitmap_get_cardinality(r2) == 1); roaring_bitmap_free(r2); roaring_bitmap_free(r1); } @@ -4157,9 +4157,9 @@ DEFINE_TEST(test_remove_many) { sbs_add_range(sbs, 0, 65535); for (uint32_t v = 0; v <= 65535; v++) { sbs_remove_many(sbs, 1, &v); - assert(roaring_bitmap_get_cardinality(sbs->roaring) == 65535-v); + assert_true(roaring_bitmap_get_cardinality(sbs->roaring) == 65535-v); } - assert(sbs_is_empty(sbs)); + assert_true(sbs_is_empty(sbs)); sbs_free(sbs); } @@ -4172,22 +4172,22 @@ DEFINE_TEST(test_range_cardinality) { roaring_bitmap_add_range(r, s*2, s*10); // single container (minhb == maxhb) - assert(roaring_bitmap_range_cardinality(r, s*2, s*3) == s); - assert(roaring_bitmap_range_cardinality(r, s*2+100, s*3) == s-100); - assert(roaring_bitmap_range_cardinality(r, s*2, s*3-200) == s-200); - assert(roaring_bitmap_range_cardinality(r, s*2+100, s*3-200) == s-300); + assert_true(roaring_bitmap_range_cardinality(r, s*2, s*3) == s); + assert_true(roaring_bitmap_range_cardinality(r, s*2+100, s*3) == s-100); + assert_true(roaring_bitmap_range_cardinality(r, s*2, s*3-200) == s-200); + assert_true(roaring_bitmap_range_cardinality(r, s*2+100, s*3-200) == s-300); // multiple containers (maxhb > minhb) - assert(roaring_bitmap_range_cardinality(r, s*2, s*5) == s*3); - assert(roaring_bitmap_range_cardinality(r, s*2+100, s*5) == s*3-100); - assert(roaring_bitmap_range_cardinality(r, s*2, s*5-200) == s*3-200); - assert(roaring_bitmap_range_cardinality(r, s*2+100, s*5-200) == s*3-300); + assert_true(roaring_bitmap_range_cardinality(r, s*2, s*5) == s*3); + assert_true(roaring_bitmap_range_cardinality(r, s*2+100, s*5) == s*3-100); + assert_true(roaring_bitmap_range_cardinality(r, s*2, s*5-200) == s*3-200); + assert_true(roaring_bitmap_range_cardinality(r, s*2+100, s*5-200) == s*3-300); // boundary checks - assert(roaring_bitmap_range_cardinality(r, s*20, s*21) == 0); - assert(roaring_bitmap_range_cardinality(r, 100, 100) == 0); - assert(roaring_bitmap_range_cardinality(r, 0, s*7) == s*5); - assert(roaring_bitmap_range_cardinality(r, s*7, UINT64_MAX) == s*3); + assert_true(roaring_bitmap_range_cardinality(r, s*20, s*21) == 0); + assert_true(roaring_bitmap_range_cardinality(r, 100, 100) == 0); + assert_true(roaring_bitmap_range_cardinality(r, 0, s*7) == s*5); + assert_true(roaring_bitmap_range_cardinality(r, s*7, UINT64_MAX) == s*3); roaring_bitmap_free(r); } @@ -4200,8 +4200,8 @@ void frozen_serialization_compare(roaring_bitmap_t *r1) { const roaring_bitmap_t *r2 = roaring_bitmap_frozen_view(buf, num_bytes); - assert(roaring_bitmap_equals(r1, r2)); - assert(roaring_bitmap_frozen_view(buf+1, num_bytes-1) == NULL); + assert_true(roaring_bitmap_equals(r1, r2)); + assert_true(roaring_bitmap_frozen_view(buf+1, num_bytes-1) == NULL); roaring_bitmap_free(r1); roaring_bitmap_free(r2); @@ -4232,7 +4232,7 @@ DEFINE_TEST(test_frozen_serialization_max_containers) { for (int64_t i = 0; i < 65536; i++) { roaring_bitmap_add(r, 65536 * i); } - assert(r->high_low_container.size == 65536); + assert_true(r->high_low_container.size == 65536); frozen_serialization_compare(r); } From 690341827477d47c47910986f4070b3276fe01b9 Mon Sep 17 00:00:00 2001 From: Corey Kosak Date: Mon, 7 Nov 2022 14:30:04 -0500 Subject: [PATCH 017/162] Improve efficiency of outer map operations for Roaring64Map (#390) * Improve efficiency of overloaded operators of outer map operations * typo * Change to snake case --- cpp/roaring64map.hh | 237 ++++++++++++++++++++++++++++++++++++++------ tests/cpp_unit.cpp | 98 ++++++++++++++++++ 2 files changed, 304 insertions(+), 31 deletions(-) diff --git a/cpp/roaring64map.hh b/cpp/roaring64map.hh index 1e51824c3..a769317cf 100644 --- a/cpp/roaring64map.hh +++ b/cpp/roaring64map.hh @@ -309,16 +309,58 @@ public: } /** - * Compute the intersection between the current bitmap and the provided - * bitmap, writing the result in the current bitmap. The provided bitmap - * is not modified. + * Compute the intersection of the current bitmap and the provided bitmap, + * writing the result in the current bitmap. The provided bitmap is not + * modified. */ - Roaring64Map &operator&=(const Roaring64Map &r) { - for (auto &map_entry : roarings) { - if (r.roarings.count(map_entry.first) == 1) - map_entry.second &= r.roarings.at(map_entry.first); - else - map_entry.second = Roaring(); + Roaring64Map &operator&=(const Roaring64Map &other) { + if (this == &other) { + // ANDing *this with itself is a no-op. + return *this; + } + + // Logic table summarizing what to do when a given outer key is + // present vs. absent from self and other. + // + // self other (self & other) work to do + // -------------------------------------------- + // absent absent empty None + // absent present empty None + // present absent empty Erase self + // present present empty or not Intersect self with other, but + // erase self if result is empty. + // + // Because there is only work to do when a key is present in 'self', the + // main for loop iterates over entries in 'self'. + + decltype(roarings.begin()) self_next; + for (auto self_iter = roarings.begin(); self_iter != roarings.end(); + self_iter = self_next) { + // Do the 'next' operation now, so we don't have to worry about + // invalidation of self_iter down below with the 'erase' operation. + self_next = std::next(self_iter); + + auto self_key = self_iter->first; + auto &self_bitmap = self_iter->second; + + auto other_iter = other.roarings.find(self_key); + if (other_iter == other.roarings.end()) { + // 'other' doesn't have self_key. In the logic table above, + // this reflects the case (self.present & other.absent). + // So, erase self. + roarings.erase(self_iter); + continue; + } + + // Both sides have self_key. In the logic table above, this reflects + // the case (self.present & other.present). So, intersect self with + // other. + const auto &other_bitmap = other_iter->second; + self_bitmap &= other_bitmap; + if (self_bitmap.isEmpty()) { + // ...but if intersection is empty, remove it altogether. + roarings.erase(self_iter); + } } return *this; } @@ -328,44 +370,177 @@ public: * bitmap, writing the result in the current bitmap. The provided bitmap * is not modified. */ - Roaring64Map &operator-=(const Roaring64Map &r) { - for (auto &map_entry : roarings) { - if (r.roarings.count(map_entry.first) == 1) - map_entry.second -= r.roarings.at(map_entry.first); + Roaring64Map &operator-=(const Roaring64Map &other) { + if (this == &other) { + // Subtracting *this from itself results in the empty map. + roarings.clear(); + return *this; + } + + // Logic table summarizing what to do when a given outer key is + // present vs. absent from self and other. + // + // self other (self - other) work to do + // -------------------------------------------- + // absent absent empty None + // absent present empty None + // present absent unchanged None + // present present empty or not Subtract other from self, but + // erase self if result is empty + // + // Because there is only work to do when a key is present in both 'self' + // and 'other', the main while loop ping-pongs back and forth until it + // finds the next key that is the same on both sides. + + auto self_iter = roarings.begin(); + auto other_iter = other.roarings.cbegin(); + + while (self_iter != roarings.end() && + other_iter != other.roarings.cend()) { + auto self_key = self_iter->first; + auto other_key = other_iter->first; + if (self_key < other_key) { + // Because self_key is < other_key, advance self_iter to the + // first point where self_key >= other_key (or end). + self_iter = roarings.lower_bound(other_key); + continue; + } + + if (self_key > other_key) { + // Because self_key is > other_key, advance other_iter to the + // first point where other_key >= self_key (or end). + other_iter = other.roarings.lower_bound(self_key); + continue; + } + + // Both sides have self_key. In the logic table above, this reflects + // the case (self.present & other.present). So subtract other from + // self. + auto &self_bitmap = self_iter->second; + const auto &other_bitmap = other_iter->second; + self_bitmap -= other_bitmap; + + if (self_bitmap.isEmpty()) { + // ...but if subtraction is empty, remove it altogether. + self_iter = roarings.erase(self_iter); + } else { + ++self_iter; + } + ++other_iter; } return *this; } /** - * Compute the union between the current bitmap and the provided bitmap, + * Compute the union of the current bitmap and the provided bitmap, * writing the result in the current bitmap. The provided bitmap is not * modified. * * See also the fastunion function to aggregate many bitmaps more quickly. */ - Roaring64Map &operator|=(const Roaring64Map &r) { - for (const auto &map_entry : r.roarings) { - if (roarings.count(map_entry.first) == 0) { - roarings[map_entry.first] = map_entry.second; - roarings[map_entry.first].setCopyOnWrite(copyOnWrite); - } else - roarings[map_entry.first] |= map_entry.second; + Roaring64Map &operator|=(const Roaring64Map &other) { + if (this == &other) { + // ORing *this with itself is a no-op. + return *this; + } + + // Logic table summarizing what to do when a given outer key is + // present vs. absent from self and other. + // + // self other (self | other) work to do + // -------------------------------------------- + // absent absent empty None + // absent present not empty Copy other to self and set flags + // present absent unchanged None + // present present not empty self |= other + // + // Because there is only work to do when a key is present in 'other', + // the main for loop iterates over entries in 'other'. + + for (const auto &other_entry : other.roarings) { + const auto &other_bitmap = other_entry.second; + + // Try to insert other_bitmap into self at other_key. We take + // advantage of the fact that std::map::insert will not overwrite an + // existing entry. + auto insert_result = roarings.insert(other_entry); + auto self_iter = insert_result.first; + auto insert_happened = insert_result.second; + auto &self_bitmap = self_iter->second; + + if (insert_happened) { + // Key was not present in self, so insert was performed above. + // In the logic table above, this reflects the case + // (self.absent | other.present). Because the copy has already + // happened, thanks to the 'insert' operation above, we just + // need to set the copyOnWrite flag. + self_bitmap.setCopyOnWrite(copyOnWrite); + continue; + } + + // Both sides have self_key, and the insert was not performed. In + // the logic table above, this reflects the case + // (self.present & other.present). So OR other into self. + self_bitmap |= other_bitmap; } return *this; } /** - * Compute the symmetric union between the current bitmap and the provided - * bitmap, writing the result in the current bitmap. The provided bitmap - * is not modified. + * Compute the XOR of the current bitmap and the provided bitmap, writing + * the result in the current bitmap. The provided bitmap is not modified. */ - Roaring64Map &operator^=(const Roaring64Map &r) { - for (const auto &map_entry : r.roarings) { - if (roarings.count(map_entry.first) == 0) { - roarings[map_entry.first] = map_entry.second; - roarings[map_entry.first].setCopyOnWrite(copyOnWrite); - } else - roarings[map_entry.first] ^= map_entry.second; + Roaring64Map &operator^=(const Roaring64Map &other) { + if (this == &other) { + // XORing *this with itself results in the empty map. + roarings.clear(); + return *this; + } + + // Logic table summarizing what to do when a given outer key is + // present vs. absent from self and other. + // + // self other (self ^ other) work to do + // -------------------------------------------- + // absent absent empty None + // absent present non-empty Copy other to self and set flags + // present absent unchanged None + // present present empty or not XOR other into self, but erase self + // if result is empty. + // + // Because there is only work to do when a key is present in 'other', + // the main for loop iterates over entries in 'other'. + + for (const auto &other_entry : other.roarings) { + const auto &other_bitmap = other_entry.second; + + // Try to insert other_bitmap into self at other_key. We take + // advantage of the fact that std::map::insert will not overwrite an + // existing entry. + auto insert_result = roarings.insert(other_entry); + auto self_iter = insert_result.first; + auto insert_happened = insert_result.second; + auto &self_bitmap = self_iter->second; + + if (insert_happened) { + // Key was not present in self, so insert was performed above. + // In the logic table above, this reflects the case + // (self.absent ^ other.present). Because the copy has already + // happened, thanks to the 'insert' operation above, we just + // need to set the copyOnWrite flag. + self_bitmap.setCopyOnWrite(copyOnWrite); + continue; + } + + // Both sides have self_key, and the insert was not performed. In + // the logic table above, this reflects the case + // (self.present ^ other.present). So XOR other into self. + self_bitmap ^= other_bitmap; + + if (self_bitmap.isEmpty()) { + // ...but if intersection is empty, remove it altogether. + roarings.erase(self_iter); + } } return *this; } diff --git a/tests/cpp_unit.cpp b/tests/cpp_unit.cpp index 7fe65fadb..f01557456 100644 --- a/tests/cpp_unit.cpp +++ b/tests/cpp_unit.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include @@ -25,6 +26,8 @@ using roaring::Roaring; // the C++ wrapper class #include "roaring64map.hh" using roaring::Roaring64Map; // C++ class extended for 64-bit numbers +#include "roaring64map_checked.hh" + #include "test.h" static_assert(std::is_nothrow_move_constructible::value, @@ -828,6 +831,97 @@ DEFINE_TEST(test_cpp_remove_range_64) { } } +std::pair + make_two_big_roaring64_maps() { + // Insert a large number of pseudorandom numbers into two sets. + const uint32_t randomSeed = 0xdeadbeef; + const size_t numValues = 1000000; // 1 million + + doublechecked::Roaring64Map roaring1; + doublechecked::Roaring64Map roaring2; + + std::default_random_engine engine(randomSeed); + std::uniform_int_distribution rng; + + for (size_t i = 0; i < numValues; ++i) { + auto value = rng(engine); + auto choice = rng(engine) % 4; + switch (choice) { + case 0: { + // Value is added only to set 1. + roaring1.add(value); + break; + } + + case 1: { + // Value is added only to set 2. + roaring2.add(value); + break; + } + + case 2: { + // Value is added to both sets. + roaring1.add(value); + roaring2.add(value); + break; + } + + case 3: { + // Value is added to set 1, and a slightly different value + // is added to set 2. This makes it likely that they are in + // the same "outer" bin, but at a different "inner" position. + roaring1.add(value); + roaring2.add(value + 1); + break; + } + + default: + assert_true(false); + } + } + return std::make_pair(std::move(roaring1), std::move(roaring2)); +} + +DEFINE_TEST(test_cpp_union_64) { + auto two_maps = make_two_big_roaring64_maps(); + + auto &lhs = two_maps.first; + const auto &rhs = two_maps.second; + + lhs |= rhs; + assert_true(lhs.does_std_set_match_roaring()); +} + +DEFINE_TEST(test_cpp_intersect_64) { + auto two_maps = make_two_big_roaring64_maps(); + + auto &lhs = two_maps.first; + const auto &rhs = two_maps.second; + + lhs &= rhs; + assert_true(lhs.does_std_set_match_roaring()); +} + +DEFINE_TEST(test_cpp_difference_64) { + auto two_maps = make_two_big_roaring64_maps(); + + auto &lhs = two_maps.first; + const auto &rhs = two_maps.second; + + lhs -= rhs; + assert_true(lhs.does_std_set_match_roaring()); +} + +DEFINE_TEST(test_cpp_xor_64) { + auto two_maps = make_two_big_roaring64_maps(); + + auto &lhs = two_maps.first; + const auto &rhs = two_maps.second; + + lhs ^= rhs; + assert_true(lhs.does_std_set_match_roaring()); +} + DEFINE_TEST(test_cpp_clear_64) { Roaring64Map roaring; @@ -1218,6 +1312,10 @@ int main() { cmocka_unit_test(test_run_compression_cpp_64_false), cmocka_unit_test(test_run_compression_cpp_true), cmocka_unit_test(test_run_compression_cpp_false), + cmocka_unit_test(test_cpp_union_64), + cmocka_unit_test(test_cpp_intersect_64), + cmocka_unit_test(test_cpp_difference_64), + cmocka_unit_test(test_cpp_xor_64), cmocka_unit_test(test_cpp_clear_64), cmocka_unit_test(test_cpp_move_64), cmocka_unit_test(test_roaring64_iterate_multi_roaring), From df61baee67c8ac6393c39993d0138552992de0f5 Mon Sep 17 00:00:00 2001 From: Uku Raudvere Date: Tue, 8 Nov 2022 15:39:54 +0200 Subject: [PATCH 018/162] Use ISO 8601 UTC dates as amalgamation timestamps (#401) --- amalgamation.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/amalgamation.sh b/amalgamation.sh index 849f7327d..ed3e54000 100755 --- a/amalgamation.sh +++ b/amalgamation.sh @@ -5,7 +5,7 @@ ######################################################################## SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" -timestamp=$(date) # capture to label files with their generation time +timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ") # capture to label files with their generation time function newline { echo "" From d569c3cc5abd679860d6096db8c9a6bb87508b25 Mon Sep 17 00:00:00 2001 From: Corey Kosak Date: Tue, 8 Nov 2022 17:08:34 -0500 Subject: [PATCH 019/162] Improve remove-type operations (#398) * Improve remove-type operations * Respond to review feedback. * snake case --- cpp/roaring64map.hh | 161 ++++++++++++++++++++++++++++------ tests/cpp_unit.cpp | 89 ++++++++++++++++++- tests/roaring64map_checked.hh | 27 ++++-- 3 files changed, 244 insertions(+), 33 deletions(-) diff --git a/cpp/roaring64map.hh b/cpp/roaring64map.hh index a769317cf..6dfe6ec35 100644 --- a/cpp/roaring64map.hh +++ b/cpp/roaring64map.hh @@ -182,31 +182,73 @@ public: } /** - * Remove value x + * Removes value x. + */ + void remove(uint32_t x) { + auto iter = roarings.begin(); + // Since x is a uint32_t, highbytes(x) == 0. The inner bitmap we are + // looking for, if it exists, will be at the first slot of 'roarings'. + if (iter == roarings.end() || iter->first != 0) { + return; + } + auto &bitmap = iter->second; + bitmap.remove(x); + eraseIfEmpty(iter); + } + + /** + * Removes value x. */ - void remove(uint32_t x) { roarings[0].remove(x); } void remove(uint64_t x) { - auto roaring_iter = roarings.find(highBytes(x)); - if (roaring_iter != roarings.cend()) - roaring_iter->second.remove(lowBytes(x)); + auto iter = roarings.find(highBytes(x)); + if (iter == roarings.end()) { + return; + } + auto &bitmap = iter->second; + bitmap.remove(lowBytes(x)); + eraseIfEmpty(iter); } /** - * Remove value x - * Returns true if a new value was removed, false if the value was not existing. + * Removes value x + * Returns true if a new value was removed, false if the value was not + * present. */ bool removeChecked(uint32_t x) { - return roarings[0].removeChecked(x); + auto iter = roarings.begin(); + // Since x is a uint32_t, highbytes(x) == 0. The inner bitmap we are + // looking for, if it exists, will be at the first slot of 'roarings'. + if (iter == roarings.end() || iter->first != 0) { + return false; + } + auto &bitmap = iter->second; + if (!bitmap.removeChecked(x)) { + return false; + } + eraseIfEmpty(iter); + return true; } + + /** + * Remove value x + * Returns true if a new value was removed, false if the value was not + * present. + */ bool removeChecked(uint64_t x) { - auto roaring_iter = roarings.find(highBytes(x)); - if (roaring_iter != roarings.cend()) - return roaring_iter->second.removeChecked(lowBytes(x)); - return false; + auto iter = roarings.find(highBytes(x)); + if (iter == roarings.end()) { + return false; + } + auto &bitmap = iter->second; + if (!bitmap.removeChecked(lowBytes(x))) { + return false; + } + eraseIfEmpty(iter); + return true; } /** - * Remove all values in range [min, max) + * Removes all values in the half-open interval [min, max). */ void removeRange(uint64_t min, uint64_t max) { if (min >= max) { @@ -216,11 +258,24 @@ public: } /** - * Remove all values in range [min, max] + * Removes all values in the closed interval [min, max]. */ void removeRangeClosed(uint32_t min, uint32_t max) { - return roarings[0].removeRangeClosed(min, max); + auto iter = roarings.begin(); + // Since min and max are uint32_t, highbytes(min or max) == 0. The inner + // bitmap we are looking for, if it exists, will be at the first slot of + // 'roarings'. + if (iter == roarings.end() || iter->first != 0) { + return; + } + auto &bitmap = iter->second; + bitmap.removeRangeClosed(min, max); + eraseIfEmpty(iter); } + + /** + * Removes all values in the closed interval [min, max]. + */ void removeRangeClosed(uint64_t min, uint64_t max) { if (min > max) { return; @@ -230,35 +285,75 @@ public: uint32_t end_high = highBytes(max); uint32_t end_low = lowBytes(max); + // We put std::numeric_limits<>::max in parentheses to avoid a + // clash with the Windows.h header under Windows. + const uint32_t uint32_max = (std::numeric_limits::max)(); + + // If the outer map is empty, end_high is less than the first key, + // or start_high is greater than the last key, then exit now because + // there is no work to do. if (roarings.empty() || end_high < roarings.cbegin()->first || start_high > (roarings.crbegin())->first) { return; } + // If we get here, start_iter points to the first entry in the outer map + // with key >= start_high. Such an entry is known to exist (i.e. the + // iterator will not be equal to end()) because start_high <= the last + // key in the map (thanks to the above if statement). auto start_iter = roarings.lower_bound(start_high); + // end_iter points to the first entry in the outer map with + // key >= end_high, if such a key exists. Otherwise, it equals end(). auto end_iter = roarings.lower_bound(end_high); + + // Note that the 'lower_bound' method will find the start and end slots, + // if they exist; otherwise it will find the next-higher slots. + // In the case where 'start' landed on an existing slot, we need to do a + // partial erase of that slot, and likewise for 'end'. But all the slots + // in between can be fully erased. More precisely: + // + // 1. If the start point falls on an existing entry, there are two + // subcases: + // a. if the end point falls on that same entry, remove the closed + // interval [start_low, end_low] from that entry and we are done. + // b. Otherwise, remove the closed interval [start_low, uint32_max] + // from that entry, advance start_iter, and fall through to step 2. + // 2. Completely erase all slots in the half-open interval + // [start_iter, end_iter) + // 3. If the end point falls on an existing entry, remove the closed + // interval [0, end_high] from it. + + // Step 1. If the start point falls on an existing entry... if (start_iter->first == start_high) { + auto &start_inner = start_iter->second; + // 1a. if the end point falls on that same entry... if (start_iter == end_iter) { - start_iter->second.removeRangeClosed(start_low, end_low); + start_inner.removeRangeClosed(start_low, end_low); + eraseIfEmpty(start_iter); return; } - // we put std::numeric_limits<>::max/min in parenthesis - // to avoid a clash with the Windows.h header under Windows - start_iter->second.removeRangeClosed( - start_low, (std::numeric_limits::max)()); - start_iter++; + + // 1b. Otherwise, remove the closed range [start_low, uint32_max]... + start_inner.removeRangeClosed(start_low, uint32_max); + // Advance start_iter, but keep the old value so we can check the + // bitmap we just modified for emptiness and erase if it necessary. + auto temp = start_iter++; + eraseIfEmpty(temp); } + // 2. Completely erase all slots in the half-open interval... roarings.erase(start_iter, end_iter); - if (end_iter != roarings.cend() && end_iter->first == end_high) { - end_iter->second.removeRangeClosed( - (std::numeric_limits::min)(), end_low); + // 3. If the end point falls on an existing entry... + if (end_iter != roarings.end() && end_iter->first == end_high) { + auto &end_inner = end_iter->second; + end_inner.removeRangeClosed(0, end_low); + eraseIfEmpty(end_iter); } } /** - * Clear the bitmap + * Clears the bitmap. */ void clear() { roarings.clear(); @@ -1225,7 +1320,8 @@ public: const_iterator end() const; private: - std::map roarings{}; // The empty constructor silences warnings from pedantic static analyzers. + typedef std::map roarings_t; + roarings_t roarings{}; // The empty constructor silences warnings from pedantic static analyzers. bool copyOnWrite{false}; static uint32_t highBytes(const uint64_t in) { return uint32_t(in >> 32); } static uint32_t lowBytes(const uint64_t in) { return uint32_t(in); } @@ -1250,6 +1346,17 @@ private: roarings.emplace(key, std::move(value)); #endif } + + /** + * Erases the entry pointed to by 'iter' from the 'roarings' map. Warning: + * this invalidates 'iter'. + */ + void eraseIfEmpty(roarings_t::iterator iter) { + const auto &bitmap = iter->second; + if (bitmap.isEmpty()) { + roarings.erase(iter); + } + } }; /** @@ -1259,7 +1366,7 @@ class Roaring64MapSetBitForwardIterator { public: typedef std::forward_iterator_tag iterator_category; typedef uint64_t *pointer; - typedef uint64_t &reference_type; + typedef uint64_t &reference; typedef uint64_t value_type; typedef int64_t difference_type; typedef Roaring64MapSetBitForwardIterator type_of_iterator; diff --git a/tests/cpp_unit.cpp b/tests/cpp_unit.cpp index f01557456..a2f7598d1 100644 --- a/tests/cpp_unit.cpp +++ b/tests/cpp_unit.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -229,6 +230,67 @@ void test_roaring64_iterate_multi_roaring(void) { assert_true(iterate_count == 2); } +namespace { +bool roaringEqual(const Roaring64Map &actual, + std::initializer_list expected) { + return expected.size() == actual.cardinality() && + std::equal(expected.begin(), expected.end(), actual.begin()); +} +} // namespace + +DEFINE_TEST(test_roaring64_remove_32) { + Roaring64Map roaring; + + // A specific test to make sure we don't get slots confused. + // Specifically, we make Roaring64Map with only one slot (namely slot 5) + // with values {100, 200, 300} in its inner bitmap. Then we do a 32-bit + // remove of 100 from slot 0. A correct implementation of 'remove' would + // be a no-op. + const uint64_t b5 = uint64_t(5) << 32; + Roaring64Map r; + r.add(b5 + 100); + r.add(b5 + 200); + r.add(b5 + 300); + r.remove(uint32_t(100)); + + // No change + assert_true(roaringEqual(r, {b5 + 100, b5 + 200, b5 + 300})); +} + +DEFINE_TEST(test_roaring64_add_and_remove) { + Roaring64Map r; + + const uint64_t b5 = uint64_t(5) << 32; + + // 32-bit adds + r.add(300u); + r.add(200u); + r.add(100u); + assert_true(roaringEqual(r, {100, 200, 300})); + + // 64-bit adds + r.add(uint64_t(200)); // Duplicate + r.add(uint64_t(400)); // New + r.add(b5 + 400); // All new + r.add(b5 + 300); + r.add(b5 + 200); + r.add(b5 + 100); + assert_true(roaringEqual(r, + {100, 200, 300, 400, b5 + 100, b5 + 200, b5 + 300, b5 + 400})); + + // 32-bit removes + r.remove(200u); // Exists. + r.remove(500u); // Doesn't exist + assert_true(roaringEqual(r, + {100, 300, 400, b5 + 100, b5 + 200, b5 + 300, b5 + 400})); + + // 64-bit removes + r.remove(b5 + 100); // Exists. + r.remove(b5 + 500); // Doesn't exist + assert_true(roaringEqual(r, + {100, 300, 400, b5 + 200, b5 + 300, b5 + 400})); +} + DEFINE_TEST(test_roaring64_iterate_multi_roaring) { test_roaring64_iterate_multi_roaring(); } @@ -736,7 +798,7 @@ DEFINE_TEST(test_cpp_add_range_64) { } } -DEFINE_TEST(test_cpp_remove_range_64) { +DEFINE_TEST(test_cpp_remove_range_closed_64) { { // 32-bit integers Roaring64Map r1 = @@ -831,6 +893,28 @@ DEFINE_TEST(test_cpp_remove_range_64) { } } +DEFINE_TEST(test_cpp_remove_range_64) { + // Because removeRange delegates to removeRangeClosed, we do most of the + // unit testing in test_cpp_remove_range_closed_64(). We just do a couple of + // sanity checks here. + Roaring64Map r1; + auto b5 = uint64_t(5) << 32; + + auto uint64_max = std::numeric_limits::max(); + + r1.add(0u); // 32-bit add + r1.add(b5 + 1000); // arbitrary 64 bit add + r1.add(b5 + 1001); // arbitrary 64 bit add + r1.add(uint64_max - 1000); + r1.add(uint64_max); // highest possible bit + + // Half-open interval: result should be the set {0, maxUint64} + r1.removeRange(1, uint64_max); + + Roaring64Map r2 = Roaring64Map::bitmapOf(2, uint64_t(0), uint64_max); + assert_true(r1 == r2); +} + std::pair make_two_big_roaring64_maps() { // Insert a large number of pseudorandom numbers into two sets. @@ -1307,6 +1391,7 @@ int main() { cmocka_unit_test(test_cpp_add_range), cmocka_unit_test(test_cpp_remove_range), cmocka_unit_test(test_cpp_add_range_64), + cmocka_unit_test(test_cpp_remove_range_closed_64), cmocka_unit_test(test_cpp_remove_range_64), cmocka_unit_test(test_run_compression_cpp_64_true), cmocka_unit_test(test_run_compression_cpp_64_false), @@ -1319,6 +1404,8 @@ int main() { cmocka_unit_test(test_cpp_clear_64), cmocka_unit_test(test_cpp_move_64), cmocka_unit_test(test_roaring64_iterate_multi_roaring), + cmocka_unit_test(test_roaring64_remove_32), + cmocka_unit_test(test_roaring64_add_and_remove), cmocka_unit_test(test_cpp_bidirectional_iterator_64), cmocka_unit_test(test_cpp_frozen), cmocka_unit_test(test_cpp_frozen_64), diff --git a/tests/roaring64map_checked.hh b/tests/roaring64map_checked.hh index 8455d3e9b..2a3c97cb7 100644 --- a/tests/roaring64map_checked.hh +++ b/tests/roaring64map_checked.hh @@ -172,22 +172,39 @@ class Roaring64Map { return ans; } - void removeRange(const uint64_t x, const uint64_t y) { - if (x != y) { // repeat remove_range_closed() cast and bounding logic - removeRangeClosed(x, y - 1); + void removeRange(const uint64_t min, const uint64_t max) { + plain.removeRange(min, max); + if (min < max) { + // Points to the first entry with key >= min, or end + auto start = check.lower_bound(min); + // Points to the first entry with key >= max, or end. + auto end = check.lower_bound(max); + // Removes the half-open interval [start, end) (i.e. does not include max). + check.erase(start, end); } } void removeRangeClosed(uint32_t min, uint32_t max) { plain.removeRangeClosed(min, max); if (min <= max) { - check.erase(check.lower_bound(min), check.upper_bound(max)); + // Points to the first entry with key >= min, or end + auto start = check.lower_bound(min); + // Points to the first entry with key > max, or end. + auto end = check.upper_bound(max); + // Removes the half-open interval [start, end) (i.e. includes max). + check.erase(start, end); } } + void removeRangeClosed(uint64_t min, uint64_t max) { plain.removeRangeClosed(min, max); if (min <= max) { - check.erase(check.lower_bound(min), check.upper_bound(max)); + // Points to the first entry with key >= min, or end + auto start = check.lower_bound(min); + // Points to the first entry with key > max, or end. + auto end = check.upper_bound(max); + // Removes the half-open interval [start, end) (i.e. includes max). + check.erase(start, end); } } From cea238c86c1334c1dce99f95dc070612cf63815e Mon Sep 17 00:00:00 2001 From: Corey Kosak Date: Tue, 8 Nov 2022 19:46:01 -0500 Subject: [PATCH 020/162] Improve the code readability of Roaring64Map::printf() and Roaring64Map::toString() (#399) --- cpp/roaring64map.hh | 120 ++++++++++++++++---------------------------- tests/cpp_unit.cpp | 38 ++++++++++++++ 2 files changed, 81 insertions(+), 77 deletions(-) diff --git a/cpp/roaring64map.hh b/cpp/roaring64map.hh index 6dfe6ec35..0af6f35f7 100644 --- a/cpp/roaring64map.hh +++ b/cpp/roaring64map.hh @@ -5,10 +5,12 @@ A C++ header for 64-bit Roaring Bitmaps, implemented by way of a map of many #ifndef INCLUDE_ROARING_64_MAP_HH_ #define INCLUDE_ROARING_64_MAP_HH_ +#include #include #include // for va_list handling in bitmapOf() #include // for std::printf() in the printf() method #include // for std::memcpy() +#include #include #include #include @@ -1193,90 +1195,27 @@ public: } /** - * Print the content of the bitmap + * Print the contents of the bitmap to stdout. + * Note: this method adds a final newline, but toString() does not. */ void printf() const { - if (!isEmpty()) { - auto map_iter = roarings.cbegin(); - while (map_iter->second.isEmpty()) ++map_iter; - struct iter_data { - uint32_t high_bits{}; - char first_char{'{'}; - } outer_iter_data; - outer_iter_data.high_bits = roarings.begin()->first; - map_iter->second.iterate( - [](uint32_t low_bits, void *inner_iter_data) -> bool { - std::printf("%c%llu", - ((iter_data *)inner_iter_data)->first_char, - (long long unsigned)uniteBytes( - ((iter_data *)inner_iter_data)->high_bits, - low_bits)); - ((iter_data *)inner_iter_data)->first_char = ','; - return true; - }, - (void *)&outer_iter_data); - std::for_each( - ++map_iter, roarings.cend(), - [](const std::pair &map_entry) { - map_entry.second.iterate( - [](uint32_t low_bits, void *high_bits) -> bool { - std::printf(",%llu", - (long long unsigned)uniteBytes( - *(uint32_t *)high_bits, low_bits)); - return true; - }, - (void *)&map_entry.first); - }); - } else - std::printf("{"); - std::printf("}\n"); + auto sink = [](const std::string &s) { + fputs(s.c_str(), stdout); + }; + printToSink(sink); + sink("\n"); } /** - * Print the content of the bitmap into a string + * Print the contents of the bitmap into a string. */ std::string toString() const { - struct iter_data { - std::string str{}; // The empty constructor silences warnings from pedantic static analyzers. - uint32_t high_bits{0}; - char first_char{'{'}; - } outer_iter_data; - if (!isEmpty()) { - auto map_iter = roarings.cbegin(); - while (map_iter->second.isEmpty()) ++map_iter; - outer_iter_data.high_bits = roarings.begin()->first; - map_iter->second.iterate( - [](uint32_t low_bits, void *inner_iter_data) -> bool { - ((iter_data *)inner_iter_data)->str += - ((iter_data *)inner_iter_data)->first_char; - ((iter_data *)inner_iter_data)->str += std::to_string( - uniteBytes(((iter_data *)inner_iter_data)->high_bits, - low_bits)); - ((iter_data *)inner_iter_data)->first_char = ','; - return true; - }, - (void *)&outer_iter_data); - std::for_each( - ++map_iter, roarings.cend(), - [&outer_iter_data]( - const std::pair &map_entry) { - outer_iter_data.high_bits = map_entry.first; - map_entry.second.iterate( - [](uint32_t low_bits, void *inner_iter_data) -> bool { - ((iter_data *)inner_iter_data)->str += - ((iter_data *)inner_iter_data)->first_char; - ((iter_data *)inner_iter_data)->str += - std::to_string(uniteBytes( - ((iter_data *)inner_iter_data)->high_bits, - low_bits)); - return true; - }, - (void *)&outer_iter_data); - }); - } else - outer_iter_data.str = '{'; - outer_iter_data.str += '}'; - return outer_iter_data.str; + std::string result; + auto sink = [&result](const std::string &s) { + result += s; + }; + printToSink(sink); + return result; } /** @@ -1347,6 +1286,33 @@ private: #endif } + /** + * Prints the contents of the bitmap to a caller-provided sink function. + */ + void printToSink(const std::function &sink) const { + sink("{"); + + // Storage for snprintf. Big enough to store the decimal representation + // of the largest uint64_t value and trailing \0. + char buffer[32]; + const char *separator = ""; + // Reusable, and therefore avoids many repeated heap allocations. + std::string callback_string; + for (const auto &entry : roarings) { + auto high_bits = entry.first; + const auto &bitmap = entry.second; + for (const auto low_bits : bitmap) { + auto value = uniteBytes(high_bits, low_bits); + snprintf(buffer, sizeof(buffer), "%" PRIu64, value); + callback_string = separator; + callback_string.append(buffer); + sink(callback_string); + separator = ","; + } + } + sink("}"); + } + /** * Erases the entry pointed to by 'iter' from the 'roarings' map. Warning: * this invalidates 'iter'. diff --git a/tests/cpp_unit.cpp b/tests/cpp_unit.cpp index a2f7598d1..7ff47a3ce 100644 --- a/tests/cpp_unit.cpp +++ b/tests/cpp_unit.cpp @@ -1307,6 +1307,43 @@ DEFINE_TEST(test_cpp_is_subset_64) { assert_true(r3.isSubset(r2)); } +DEFINE_TEST(test_cpp_to_string) { + // test toString + const auto b5 = uint64_t(5) << 32; + const auto uint32_max = std::numeric_limits::max(); + const auto uint64_max = std::numeric_limits::max(); + + { + // 32-bit test. + Roaring a; + assert_string_equal("{}", a.toString().c_str()); + + a.add(1); + assert_string_equal("{1}", a.toString().c_str()); + + a.add(2); + a.add(3); + a.add(uint32_max); + assert_string_equal("{1,2,3,4294967295}", a.toString().c_str()); + } + + { + // 64-bit test. + Roaring64Map r; + assert_string_equal("{}", r.toString().c_str()); + + r.add(b5 + 100); + assert_string_equal("{21474836580}", r.toString().c_str()); + + r.add(1u); + r.add(2u); + r.add(uint32_max); + r.add(uint64_max); + assert_string_equal("{1,2,4294967295,21474836580,18446744073709551615}", + r.toString().c_str()); + } +} + DEFINE_TEST(test_cpp_remove_run_compression) { Roaring r; uint32_t max = (std::numeric_limits::max)(); @@ -1426,6 +1463,7 @@ int main() { cmocka_unit_test(issue_336), cmocka_unit_test(issue_372), cmocka_unit_test(test_cpp_is_subset_64), + cmocka_unit_test(test_cpp_to_string), cmocka_unit_test(test_cpp_remove_run_compression), }; return cmocka_run_group_tests(tests, NULL, NULL); From 4dbe48f5437c854f13ab82860e9b7331e13b861b Mon Sep 17 00:00:00 2001 From: Corey Kosak Date: Thu, 10 Nov 2022 11:24:03 -0500 Subject: [PATCH 021/162] RoaringMap64::select(): remove illegal pointer cast; throw exception on unreachable code path. (#400) * RoaringMap64::select(): remove illegal pointer cast; throw exception on unreachable code path. * Respond to review feedback: ROARING_TERMINATE * Respond to review feedback --- cpp/roaring64map.hh | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/cpp/roaring64map.hh b/cpp/roaring64map.hh index 0af6f35f7..a1786eaf9 100644 --- a/cpp/roaring64map.hh +++ b/cpp/roaring64map.hh @@ -898,20 +898,29 @@ public: } /** - * If the size of the roaring bitmap is strictly greater than rank, then - * this function returns true and set element to the element of given - * rank. Otherwise, it returns false. + * Selects the value at index 'rank' in the bitmap, where the smallest value + * is at index 0. If 'rank' < cardinality(), returns true with *element set + * to the element of the specified rank. Otherwise, returns false and the + * contents of *element are unspecified. */ - bool select(uint64_t rnk, uint64_t *element) const { + bool select(uint64_t rank, uint64_t *element) const { for (const auto &map_entry : roarings) { - uint64_t sub_cardinality = (uint64_t)map_entry.second.cardinality(); - if (rnk < sub_cardinality) { - *element = ((uint64_t)map_entry.first) << 32; - // assuming little endian - return map_entry.second.select((uint32_t)rnk, - ((uint32_t *)element)); + auto key = map_entry.first; + const auto &bitmap = map_entry.second; + + uint64_t sub_cardinality = bitmap.cardinality(); + if (rank < sub_cardinality) { + uint32_t low_bytes; + // Casting rank to uint32_t is safe because + // rank < sub_cardinality and sub_cardinality <= 2^32. + if (!bitmap.select((uint32_t)rank, &low_bytes)) { + ROARING_TERMINATE("Logic error: bitmap.select() " + "returned false despite rank < cardinality()"); + } + *element = uniteBytes(key, low_bytes); + return true; } - rnk -= sub_cardinality; + rank -= sub_cardinality; } return false; } From aad6512b71ad9803b8bd261bb2a781af93f1dd15 Mon Sep 17 00:00:00 2001 From: Ole Sasse Date: Fri, 11 Nov 2022 15:29:21 +0100 Subject: [PATCH 022/162] Fix a bug in containsRange that triggered an assert (#404) --- src/roaring.c | 3 +-- tests/cpp_unit.cpp | 10 ++++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/roaring.c b/src/roaring.c index cc717bb29..7479b4720 100644 --- a/src/roaring.c +++ b/src/roaring.c @@ -2880,7 +2880,7 @@ bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_sta int32_t is = ra_get_index(&r->high_low_container, hb_rs); int32_t ie = ra_get_index(&r->high_low_container, hb_re); ie = (ie < 0 ? -ie - 1 : ie); - if ((is < 0) || ((ie - is) != span)) { + if ((is < 0) || ((ie - is) != span) || ie >= hlc_sz) { return false; } const uint32_t lb_rs = range_start & 0xFFFF; @@ -2894,7 +2894,6 @@ bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_sta if (!container_contains_range(c, lb_rs, 1 << 16, type)) { return false; } - assert(ie < hlc_sz); // would indicate an algorithmic bug c = ra_get_container_at_index(&r->high_low_container, ie, &type); if (!container_contains_range(c, 0, lb_re, type)) { return false; diff --git a/tests/cpp_unit.cpp b/tests/cpp_unit.cpp index 7ff47a3ce..4f4bf4a5f 100644 --- a/tests/cpp_unit.cpp +++ b/tests/cpp_unit.cpp @@ -1413,6 +1413,15 @@ DEFINE_TEST(test_cpp_deserialize_64_key_too_small) { } #endif +DEFINE_TEST(test_cpp_contains_range_interleaved_containers) { + Roaring roaring; + // Range from last position in first container up to second position in 3rd container. + roaring.addRange(0xFFFF, 0x1FFFF + 2); + // Query from last position in 2nd container up to second position in 4th container. + // There is no 4th container in the bitmap. + roaring.containsRange(0x1FFFF, 0x2FFFF + 2); +} + int main() { roaring::misc::tellmeall(); const struct CMUnitTest tests[] = { @@ -1465,6 +1474,7 @@ int main() { cmocka_unit_test(test_cpp_is_subset_64), cmocka_unit_test(test_cpp_to_string), cmocka_unit_test(test_cpp_remove_run_compression), + cmocka_unit_test(test_cpp_contains_range_interleaved_containers), }; return cmocka_run_group_tests(tests, NULL, NULL); } From c36081337974f3784e4c4cfecc55ad5c3be80966 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 11 Nov 2022 09:35:55 -0500 Subject: [PATCH 023/162] Preparing new release --- CMakeLists.txt | 4 ++-- include/roaring/roaring_version.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ecd15f912..58ed46dc5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,8 +18,8 @@ endif() set(ROARING_LIB_NAME roaring) set(PROJECT_VERSION_MAJOR 0) set(PROJECT_VERSION_MINOR 7) -set(PROJECT_VERSION_PATCH 2) -set(ROARING_LIB_VERSION "0.7.2" CACHE STRING "Roaring library version") +set(PROJECT_VERSION_PATCH 3) +set(ROARING_LIB_VERSION "0.7.3" CACHE STRING "Roaring library version") set(ROARING_LIB_SOVERSION "5" CACHE STRING "Roaring library soversion") option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON) diff --git a/include/roaring/roaring_version.h b/include/roaring/roaring_version.h index 12f856758..128727194 100644 --- a/include/roaring/roaring_version.h +++ b/include/roaring/roaring_version.h @@ -1,10 +1,10 @@ // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand #ifndef ROARING_INCLUDE_ROARING_VERSION #define ROARING_INCLUDE_ROARING_VERSION -#define ROARING_VERSION "0.7.2" +#define ROARING_VERSION "0.7.3" enum { ROARING_VERSION_MAJOR = 0, ROARING_VERSION_MINOR = 7, - ROARING_VERSION_REVISION = 2 + ROARING_VERSION_REVISION = 3 }; #endif // ROARING_INCLUDE_ROARING_VERSION From b2bbd46c9f1e415e0d97a2796b6f11b1a2bb7503 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Tue, 15 Nov 2022 09:01:29 -0500 Subject: [PATCH 024/162] Update roaring64map.hh --- cpp/roaring64map.hh | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/cpp/roaring64map.hh b/cpp/roaring64map.hh index a1786eaf9..1654aef53 100644 --- a/cpp/roaring64map.hh +++ b/cpp/roaring64map.hh @@ -1,6 +1,10 @@ -/* -A C++ header for 64-bit Roaring Bitmaps, implemented by way of a map of many -32-bit Roaring Bitmaps. +/** + * A C++ header for 64-bit Roaring Bitmaps, + * implemented by way of a map of many + * 32-bit Roaring Bitmaps. + * + * Reference (format specification) : + * https://github.com/RoaringBitmap/RoaringFormatSpec#extention-for-64-bit-implementations */ #ifndef INCLUDE_ROARING_64_MAP_HH_ #define INCLUDE_ROARING_64_MAP_HH_ From ff8aca1087e35ff8f46ff4d22e0dd657227ca944 Mon Sep 17 00:00:00 2001 From: Corey Kosak Date: Tue, 15 Nov 2022 18:23:56 -0500 Subject: [PATCH 025/162] Update flip-type operations of Roaring64Map (#402) * Update flip-type operations of Roaring64Map * Create private helper method 'ensureRangePopulated' * typo * Respond to review feedback. * If the caller invokes (half-open interval) flip() with a range that falls completely into slot 0, delegate to 32-bit flipClosed() rather than 64-bit flipClosed(). * Revert "If the caller invokes (half-open interval) flip() with a range that falls" This reverts commit 7662b3a79ae83b14dd7f95ec6ecc0a1466a619cb. * typo --- cpp/roaring.hh | 16 ++- cpp/roaring64map.hh | 156 ++++++++++++++++++---- tests/cpp_unit.cpp | 313 ++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 428 insertions(+), 57 deletions(-) diff --git a/cpp/roaring.hh b/cpp/roaring.hh index 3853ae7b6..c193691ba 100644 --- a/cpp/roaring.hh +++ b/cpp/roaring.hh @@ -99,7 +99,7 @@ public: } /** - * Construct a bitmap from a list of integer values. + * Construct a bitmap from a list of uint32_t values. */ static Roaring bitmapOf(size_t n, ...) { Roaring ans; @@ -345,14 +345,22 @@ public: } /** - * Compute the negation of the roaring bitmap within a specified interval. - * interval: [range_start, range_end). - * Areas outside the range are passed through unchanged. + * Compute the negation of the roaring bitmap within the half-open interval + * [range_start, range_end). Areas outside the interval are unchanged. */ void flip(uint64_t range_start, uint64_t range_end) { api::roaring_bitmap_flip_inplace(&roaring, range_start, range_end); } + /** + * Compute the negation of the roaring bitmap within the closed interval + * [range_start, range_end]. Areas outside the interval are unchanged. + */ + void flipClosed(uint32_t range_start, uint32_t range_end) { + api::roaring_bitmap_flip_inplace( + &roaring, range_start, uint64_t(range_end) + 1); + } + /** * Remove run-length encoding even when it is more space efficient. * Return whether a change was applied. diff --git a/cpp/roaring64map.hh b/cpp/roaring64map.hh index 1654aef53..d9e4ec464 100644 --- a/cpp/roaring64map.hh +++ b/cpp/roaring64map.hh @@ -85,7 +85,7 @@ public: Roaring64Map &operator=(Roaring64Map &&r) noexcept = default; /** - * Construct a bitmap from a list of integer values. + * Construct a bitmap from a list of uint64_t values. */ static Roaring64Map bitmapOf(size_t n...) { Roaring64Map ans; @@ -798,39 +798,98 @@ public: } /** - * Compute the negation of the roaring bitmap within a specified interval. - * areas outside the range are passed through unchanged. + * Computes the negation of the roaring bitmap within the half-open interval + * [min, max). Areas outside the interval are unchanged. */ - void flip(uint64_t range_start, uint64_t range_end) { - if (range_start >= range_end) { + void flip(uint64_t min, uint64_t max) { + if (min >= max) { + return; + } + flipClosed(min, max - 1); + } + + /** + * Computes the negation of the roaring bitmap within the closed interval + * [min, max]. Areas outside the interval are unchanged. + */ + void flipClosed(uint32_t min, uint32_t max) { + auto iter = roarings.begin(); + // Since min and max are uint32_t, highbytes(min or max) == 0. The inner + // bitmap we are looking for, if it exists, will be at the first slot of + // 'roarings'. If it does not exist, we have to create it. + if (iter == roarings.end() || iter->first != 0) { + iter = roarings.emplace_hint(iter, std::piecewise_construct, + std::forward_as_tuple(0), + std::forward_as_tuple()); + auto &bitmap = iter->second; + bitmap.setCopyOnWrite(copyOnWrite); + } + auto &bitmap = iter->second; + bitmap.flipClosed(min, max); + eraseIfEmpty(iter); + } + + /** + * Computes the negation of the roaring bitmap within the closed interval + * [min, max]. Areas outside the interval are unchanged. + */ + void flipClosed(uint64_t min, uint64_t max) { + if (min > max) { return; } - uint32_t start_high = highBytes(range_start); - uint32_t start_low = lowBytes(range_start); - uint32_t end_high = highBytes(range_end); - uint32_t end_low = lowBytes(range_end); + uint32_t start_high = highBytes(min); + uint32_t start_low = lowBytes(min); + uint32_t end_high = highBytes(max); + uint32_t end_low = lowBytes(max); + // We put std::numeric_limits<>::max in parentheses to avoid a + // clash with the Windows.h header under Windows. + const uint32_t uint32_max = (std::numeric_limits::max)(); + + // Fill in any nonexistent slots with empty Roarings. This simplifies + // the logic below, allowing it to simply iterate over the map between + // 'start_high' and 'end_high' in a linear fashion. + auto current_iter = ensureRangePopulated(start_high, end_high); + + // If start and end land on the same inner bitmap, then we can do the + // whole operation in one call. if (start_high == end_high) { - roarings[start_high].flip(start_low, end_low); + auto &bitmap = current_iter->second; + bitmap.flipClosed(start_low, end_low); + eraseIfEmpty(current_iter); return; } - // we put std::numeric_limits<>::max/min in parentheses - // to avoid a clash with the Windows.h header under Windows - // flip operates on the range [lower_bound, upper_bound) - const uint64_t max_upper_bound = - static_cast((std::numeric_limits::max)()) + 1; - roarings[start_high].flip(start_low, max_upper_bound); - roarings[start_high++].setCopyOnWrite(copyOnWrite); - - for (; start_high <= highBytes(range_end) - 1; ++start_high) { - roarings[start_high].flip((std::numeric_limits::min)(), - max_upper_bound); - roarings[start_high].setCopyOnWrite(copyOnWrite); + + // Because start and end don't land on the same inner bitmap, + // we need to do this in multiple steps: + // 1. Partially flip the first bitmap in the closed interval + // [start_low, uint32_max] + // 2. Flip intermediate bitmaps completely: [0, uint32_max] + // 3. Partially flip the last bitmap in the closed interval + // [0, end_low] + + auto num_intermediate_bitmaps = end_high - start_high - 1; + + // 1. Partially flip the first bitmap. + { + auto &bitmap = current_iter->second; + bitmap.flipClosed(start_low, uint32_max); + auto temp = current_iter++; + eraseIfEmpty(temp); } - roarings[start_high].flip((std::numeric_limits::min)(), - end_low); - roarings[start_high].setCopyOnWrite(copyOnWrite); + // 2. Flip intermediate bitmaps completely. + for (uint32_t i = 0; i != num_intermediate_bitmaps; ++i) { + auto &bitmap = current_iter->second; + bitmap.flipClosed(0, uint32_max); + auto temp = current_iter++; + eraseIfEmpty(temp); + } + + // 3. Partially flip the last bitmap. + auto &bitmap = current_iter->second; + bitmap.flipClosed(0, end_low); + eraseIfEmpty(current_iter); } /** @@ -1336,6 +1395,53 @@ private: roarings.erase(iter); } } + + /** + * Ensure that every key in the closed interval [start_high, end_high] + * refers to a Roaring bitmap rather being an empty slot. Inserts empty + * Roaring bitmaps if necessary. The interval must be valid and non-empty. + * Returns an iterator to the bitmap at start_high. + */ + roarings_t::iterator ensureRangePopulated(uint32_t start_high, + uint32_t end_high) { + if (start_high > end_high) { + ROARING_TERMINATE("Logic error: start_high > end_high"); + } + // next_populated_iter points to the first entry in the outer map with + // key >= start_high, or end(). + auto next_populated_iter = roarings.lower_bound(start_high); + + // Use uint64_t to avoid an infinite loop when end_high == uint32_max. + roarings_t::iterator start_iter{}; // Definitely assigned in loop. + for (uint64_t slot = start_high; slot <= end_high; ++slot) { + roarings_t::iterator slot_iter; + if (next_populated_iter != roarings.end() && + next_populated_iter->first == slot) { + // 'slot' index has caught up to next_populated_iter. + // Note it here and advance next_populated_iter. + slot_iter = next_populated_iter++; + } else { + // 'slot' index has not yet caught up to next_populated_iter. + // Make a fresh entry {key = 'slot', value = Roaring()}, insert + // it just prior to next_populated_iter, and set its copy + // on write flag. We take pains to use emplace_hint and + // piecewise_construct to minimize effort. + slot_iter = roarings.emplace_hint( + next_populated_iter, std::piecewise_construct, + std::forward_as_tuple(uint32_t(slot)), + std::forward_as_tuple()); + auto &bitmap = slot_iter->second; + bitmap.setCopyOnWrite(copyOnWrite); + } + + // Make a note of the iterator of the starting slot. It will be + // needed for the return value. + if (slot == start_high) { + start_iter = slot_iter; + } + } + return start_iter; + } }; /** diff --git a/tests/cpp_unit.cpp b/tests/cpp_unit.cpp index 4f4bf4a5f..057024fea 100644 --- a/tests/cpp_unit.cpp +++ b/tests/cpp_unit.cpp @@ -34,6 +34,14 @@ using roaring::Roaring64Map; // C++ class extended for 64-bit numbers static_assert(std::is_nothrow_move_constructible::value, "Expected Roaring to be no except move constructable"); + +namespace { +// We put std::numeric_limits<>::max in parentheses to avoid a +// clash with the Windows.h header under Windows. +const auto uint32_max = (std::numeric_limits::max)(); +const auto uint64_max = (std::numeric_limits::max)(); +} // namespace + bool roaring_iterator_sumall(uint32_t value, void *param) { *(uint32_t *)param += value; return true; // we always process all values @@ -900,8 +908,6 @@ DEFINE_TEST(test_cpp_remove_range_64) { Roaring64Map r1; auto b5 = uint64_t(5) << 32; - auto uint64_max = std::numeric_limits::max(); - r1.add(0u); // 32-bit add r1.add(b5 + 1000); // arbitrary 64 bit add r1.add(b5 + 1001); // arbitrary 64 bit add @@ -1072,7 +1078,7 @@ DEFINE_TEST(test_cpp_frozen) { Roaring r1; r1.add(0); - r1.add(UINT32_MAX); + r1.add(uint32_max); r1.add(1000); r1.add(2000); r1.add(100000); @@ -1153,7 +1159,7 @@ DEFINE_TEST(test_cpp_frozen_64) { Roaring64Map r1; r1.add((uint64_t)0); - r1.add((uint64_t)UINT32_MAX); + r1.add((uint64_t)uint32_max); r1.add((uint64_t)1000); r1.add((uint64_t)2000); r1.add((uint64_t)100000); @@ -1212,6 +1218,13 @@ DEFINE_TEST(test_cpp_frozen_64) { } DEFINE_TEST(test_cpp_flip) { + { + // flipping an empty map works as expected + Roaring r1; + r1.flip(2, 5); + Roaring r2 = Roaring::bitmapOf(3, 2, 3, 4); + assert_true(r1 == r2); + } { // nothing is affected outside of the given range Roaring r1 = Roaring::bitmapOf(3, 1, 3, 6); @@ -1235,11 +1248,8 @@ DEFINE_TEST(test_cpp_flip) { } { // uint32 max can be flipped - Roaring r1 = - Roaring::bitmapOf(1, (std::numeric_limits::max)()); - r1.flip( - (std::numeric_limits::max)(), - static_cast((std::numeric_limits::max)()) + 1); + Roaring r1 = Roaring::bitmapOf(1, uint32_max); + r1.flip(uint32_max, static_cast(uint32_max) + 1); assert_true(r1.isEmpty()); } { @@ -1251,32 +1261,221 @@ DEFINE_TEST(test_cpp_flip) { } } +DEFINE_TEST(test_cpp_flip_closed) { + { + // flipping an empty map works as expected + Roaring r1; + r1.flipClosed(2, 5); + Roaring r2 = Roaring::bitmapOf(4, 2, 3, 4, 5); + assert_true(r1 == r2); + } + { + // nothing is affected outside of the given range + Roaring r1 = Roaring::bitmapOf(3, 1, 3, 6); + r1.flipClosed(2, 4); + Roaring r2 = Roaring::bitmapOf(4, 1, 2, 4, 6); + assert_true(r1 == r2); + } + { + // given range can go outside of existing range + Roaring r1 = Roaring::bitmapOf(2, 1, 3); + r1.flipClosed(0, 4); + Roaring r2 = Roaring::bitmapOf(3, 0, 2, 4); + assert_true(r1 == r2); + } + { + // range end is inclusive + Roaring r1 = Roaring::bitmapOf(2, 1, 3); + r1.flipClosed(1, 2); + Roaring r2 = Roaring::bitmapOf(2, 2, 3); + assert_true(r1 == r2); + } + { + // uint32 max can be flipped + Roaring r1 = Roaring::bitmapOf(1, uint32_max); + r1.flipClosed(uint32_max, uint32_max); + assert_true(r1.isEmpty()); + } + { + // empty range does nothing + Roaring r1 = Roaring::bitmapOf(2, 2, 3); + Roaring r2 = r1; + r1.flipClosed(2, 1); + assert_true(r1 == r2); + } +} + + DEFINE_TEST(test_cpp_flip_64) { + { + // 32-bit test + { + // flipping an empty map works as expected + Roaring64Map r1; + r1.flip(2, 5); + auto r2 = Roaring64Map::bitmapOf( + 3, uint64_t(2), uint64_t(3), uint64_t(4)); + assert_true(r1 == r2); + } + { + // nothing is affected outside of the given range + auto r1 = Roaring64Map::bitmapOf( + 3, uint64_t(1), uint64_t(3), uint64_t(6)); + r1.flip(uint32_t(2), uint32_t(5)); + Roaring64Map r2 = Roaring64Map::bitmapOf( + 4, uint64_t(1), uint64_t(2), uint64_t(4), uint64_t(6)); + assert_true(r1 == r2); + } + { + // given range can go outside of existing range + auto r1 = Roaring64Map::bitmapOf(2, uint64_t(1), uint64_t(3)); + r1.flip(uint32_t(0), uint32_t(5)); + auto r2 = Roaring64Map::bitmapOf( + 3, uint64_t(0), uint64_t(2), uint64_t(4)); + assert_true(r1 == r2); + } + { + // range end is exclusive + auto r1 = Roaring64Map::bitmapOf(2, uint64_t(1), uint64_t(3)); + r1.flip(uint32_t(1), uint32_t(3)); + auto r2 = Roaring64Map::bitmapOf(2, uint64_t(2), uint64_t(3)); + assert_true(r1 == r2); + } + { + // uint32 max can be flipped + auto r1 = Roaring64Map::bitmapOf(1, uint64_t(uint32_max)); + r1.flip(uint32_max, uint64_t(uint32_max) + 1); + assert_true(r1.isEmpty()); + } + { + // empty range does nothing + auto r1 = Roaring64Map::bitmapOf(2, uint64_t(2), uint64_t(3)); + auto r2 = r1; + r1.flip(uint32_t(2), uint32_t(2)); + assert_true(r1 == r2); + } + } + + const auto b1 = uint64_t(1) << 32; + const auto b2 = uint64_t(2) << 32; + { // nothing is affected outside of the given range - Roaring64Map r1 = Roaring64Map::bitmapOf(3, (((uint64_t)1) << 32) - 3, ((uint64_t)1) << 32, - (((uint64_t)1) << 32) + 3); - r1.flip((((uint64_t)1) << 32) - 2, (((uint64_t)1) << 32) + 2); + Roaring64Map r1 = Roaring64Map::bitmapOf(3, b1 - 3, b1, b1 + 3); + r1.flip(b1 - 2, b1 + 2); Roaring64Map r2 = Roaring64Map::bitmapOf( - 5, (((uint64_t)1) << 32) - 3, (((uint64_t)1) << 32) - 2, (((uint64_t)1) << 32) - 1, - (((uint64_t)1) << 32) + 1, (((uint64_t)1) << 32) + 3); + 5, b1 - 3, b1 - 2, b1 - 1, b1 + 1, b1 + 3); assert_true(r1 == r2); } { // given range can go outside of existing range - Roaring64Map r1 = Roaring64Map::bitmapOf(2, (((uint64_t)1) << 32) - 2, ((uint64_t)1) << 32); - r1.flip((((uint64_t)1) << 32) - 3, (((uint64_t)1) << 32) + 2); + Roaring64Map r1 = Roaring64Map::bitmapOf(2, b1 - 2, b1); + r1.flip(b1 - 3, b1 + 2); Roaring64Map r2 = Roaring64Map::bitmapOf( - 3, (((uint64_t)1) << 32) - 3, (((uint64_t)1) << 32) - 1, (((uint64_t)1) << 32) + 1); + 3, b1 - 3, b1 - 1, b1 + 1); assert_true(r1 == r2); } { // range end is exclusive + Roaring64Map r1 = Roaring64Map::bitmapOf(2, b2 - 1, b2 + 2); + r1.flip(b2 - 1, b2 + 2); + Roaring64Map r2; + for (uint64_t i = b2; i <= b2 + 2; ++i) { + r2.add(i); + } + assert_true(r1 == r2); + } + { + // uint32 max can be flipped Roaring64Map r1 = - Roaring64Map::bitmapOf(2, (((uint64_t)2) << 32) - 1, (((uint64_t)2) << 32) + 2); - r1.flip((((uint64_t)2) << 32) - 1, (((uint64_t)2) << 32) + 2); + Roaring64Map::bitmapOf(1, static_cast(uint32_max)); + r1.flip(uint32_max, static_cast(uint32_max) + 1); + assert_true(r1.isEmpty()); + } + { + // empty range does nothing + Roaring64Map r1 = Roaring64Map::bitmapOf(2, b1 - 1, b1); + Roaring64Map r2 = r1; + r1.flip(b1 - 1, b1 - 1); + assert_true(r1 == r2); + } +} + +DEFINE_TEST(test_cpp_flip_closed_64) { + { + // 32-bit test + { + // flipping an empty map works as expected + Roaring64Map r1; + r1.flipClosed(uint32_t(2), uint32_t(5)); + auto r2 = Roaring64Map::bitmapOf( + 4, uint64_t(2), uint64_t(3), uint64_t(4), uint64_t(5)); + assert_true(r1 == r2); + } + { + // nothing is affected outside of the given range + auto r1 = Roaring64Map::bitmapOf( + 3, uint64_t(1), uint64_t(3), uint64_t(6)); + r1.flipClosed(uint32_t(2), uint32_t(4)); + Roaring64Map r2 = Roaring64Map::bitmapOf( + 4, uint64_t(1), uint64_t(2), uint64_t(4), uint64_t(6)); + assert_true(r1 == r2); + } + { + // given range can go outside of existing range + auto r1 = Roaring64Map::bitmapOf(2, uint64_t(1), uint64_t(3)); + r1.flipClosed(uint32_t(0), uint32_t(4)); + auto r2 = Roaring64Map::bitmapOf( + 3, uint64_t(0), uint64_t(2), uint64_t(4)); + assert_true(r1 == r2); + } + { + // range end is inclusive + auto r1 = Roaring64Map::bitmapOf(2, uint64_t(1), uint64_t(3)); + r1.flipClosed(uint32_t(1), uint32_t(2)); + auto r2 = Roaring64Map::bitmapOf(2, uint64_t(2), uint64_t(3)); + assert_true(r1 == r2); + } + { + // uint32 max can be flipped + auto r1 = Roaring64Map::bitmapOf(1, uint64_t(uint32_max)); + r1.flipClosed(uint32_max, uint32_max); + assert_true(r1.isEmpty()); + } + { + // empty range does nothing + auto r1 = Roaring64Map::bitmapOf(2, uint64_t(2), uint64_t(3)); + auto r2 = r1; + r1.flipClosed(uint32_t(2), uint32_t(1)); + assert_true(r1 == r2); + } + } + + const auto b1 = uint64_t(1) << 32; + const auto b2 = uint64_t(2) << 32; + + { + // nothing is affected outside of the given range + Roaring64Map r1 = Roaring64Map::bitmapOf(3, b1 - 3, b1, b1 + 3); + r1.flipClosed(b1 - 2, b1 + 1); + Roaring64Map r2 = Roaring64Map::bitmapOf( + 5, b1 - 3, b1 - 2, b1 - 1, b1 + 1, b1 + 3); + assert_true(r1 == r2); + } + { + // given range can go outside of existing range + Roaring64Map r1 = Roaring64Map::bitmapOf(2, b1 - 2, b1); + r1.flipClosed(b1 - 3, b1 + 1); + Roaring64Map r2 = Roaring64Map::bitmapOf( + 3, b1 - 3, b1 - 1, b1 + 1); + assert_true(r1 == r2); + } + { + // range end is inclusive + Roaring64Map r1 = Roaring64Map::bitmapOf(2, b2 - 1, b2 + 2); + r1.flipClosed(b2 - 1, b2 + 1); Roaring64Map r2; - for (uint64_t i = (((uint64_t)2) << 32); i <= (((uint64_t)2) << 32) + 2; ++i) { + for (uint64_t i = b2; i <= b2 + 2; ++i) { r2.add(i); } assert_true(r1 == r2); @@ -1284,21 +1483,78 @@ DEFINE_TEST(test_cpp_flip_64) { { // uint32 max can be flipped Roaring64Map r1 = - Roaring64Map::bitmapOf(1, static_cast((std::numeric_limits::max)())); - r1.flip( - (std::numeric_limits::max)(), - static_cast((std::numeric_limits::max)()) + 1); + Roaring64Map::bitmapOf(1, static_cast(uint32_max)); + r1.flipClosed(uint32_max, uint32_max); assert_true(r1.isEmpty()); } { // empty range does nothing - Roaring64Map r1 = Roaring64Map::bitmapOf(2, (((uint64_t)1) << 32) - 1, ((uint64_t)1) << 32); + Roaring64Map r1 = Roaring64Map::bitmapOf(2, b1 - 1, b1); Roaring64Map r2 = r1; - r1.flip((((uint64_t)1) << 32) - 1, (((uint64_t)1) << 32) - 1); + r1.flipClosed(b1 - 1, b1 - 2); assert_true(r1 == r2); } } +DEFINE_TEST(test_combinatoric_flip_many_64) { + // Given 'num_slots_to_test' outer slots, we repeatedly seed a Roaring64Map + // with all combinations of present and absent outer slots (basically the + // powerset of {0...num_slots_to_test - 1}), then we add_range_closed + // and see if the cardinality is what we expect. + // + // For example (assuming num_slots_to_test = 5), the iterations of the outer + // loop represent these sets: + // 1. {} + // 2. {0} + // 3. {1} + // 4. {0, 1} + // 5. {2} + // 6. {0, 2} + // 7. {1, 2} + // 8. {0, 1, 2} + // 9. {3} + // and so forth... + // + // For example, in step 6 (representing set {0, 2}) we set a bit somewhere + // in slot 0 and we set another bit somehwere in slot 2. The purpose of this + // is to make sure 'flipClosed' does the right thing when it encounters + // an arbitrary mix of present and absent slots. Then we call + // 'flipClosed' over the whole range and confirm that the cardinality + // is what we expect. + const uint32_t num_slots_to_test = 5; + const uint32_t base_slot = 50; + + const uint32_t bitmask_limit = 1 << num_slots_to_test; + + for (uint32_t bitmask = 0; bitmask < bitmask_limit; ++bitmask) { + Roaring64Map roaring; + uint32_t num_one_bits = 0; + + // The 1-bits in 'bitmask' indicate which slots we want to seed + // with a value. + for (uint32_t bit_index = 0; bit_index < num_slots_to_test; ++bit_index) { + if ((bitmask & (1 << bit_index)) == 0) { + continue; + } + auto slot = base_slot + bit_index; + auto value = (uint64_t(slot) << 32) + 0x1234567 + bit_index; + roaring.add(value); + ++num_one_bits; + } + + auto first_bucket = uint64_t(base_slot) << 32; + auto last_bucket = uint64_t(base_slot + num_slots_to_test - 1) << 32; + + roaring.flipClosed(first_bucket, last_bucket + uint32_max); + + // Slots not initalized with a bit will now have cardinality 2^32 + // Slots initialized with a bit will have cardinality 2^32 - 1 + auto expected_cardinality = num_slots_to_test * (uint64_t(1) << 32) + - num_one_bits; + assert_int_equal(expected_cardinality, roaring.cardinality()); + } +} + DEFINE_TEST(test_cpp_is_subset_64) { Roaring64Map r1 = Roaring64Map::bitmapOf(1, uint64_t(1)); Roaring64Map r2 = Roaring64Map::bitmapOf(1, uint64_t(1) << 32); @@ -1310,8 +1566,6 @@ DEFINE_TEST(test_cpp_is_subset_64) { DEFINE_TEST(test_cpp_to_string) { // test toString const auto b5 = uint64_t(5) << 32; - const auto uint32_max = std::numeric_limits::max(); - const auto uint64_max = std::numeric_limits::max(); { // 32-bit test. @@ -1456,7 +1710,10 @@ int main() { cmocka_unit_test(test_cpp_frozen), cmocka_unit_test(test_cpp_frozen_64), cmocka_unit_test(test_cpp_flip), + cmocka_unit_test(test_cpp_flip_closed), cmocka_unit_test(test_cpp_flip_64), + cmocka_unit_test(test_cpp_flip_closed_64), + cmocka_unit_test(test_combinatoric_flip_many_64), cmocka_unit_test(test_cpp_deserialize_64_empty), cmocka_unit_test(test_cpp_deserialize_64_32bit_vals), cmocka_unit_test(test_cpp_deserialize_64_spread_vals), From cd5033b7c298a25fb961eb462a4d81229d8003b1 Mon Sep 17 00:00:00 2001 From: Corey Kosak Date: Tue, 15 Nov 2022 18:24:16 -0500 Subject: [PATCH 026/162] Improve add-type operations (#397) * Improve add-type operations * Rewrite in 'ensureRangePopulated' style. * Respond to review feedback * typo --- cpp/roaring64map.hh | 185 +++++++++++++++++++++++++++------- tests/cpp_unit.cpp | 169 ++++++++++++++++++++++++++++++- tests/roaring64map_checked.hh | 7 +- 3 files changed, 315 insertions(+), 46 deletions(-) diff --git a/cpp/roaring64map.hh b/cpp/roaring64map.hh index d9e4ec464..76df3eda8 100644 --- a/cpp/roaring64map.hh +++ b/cpp/roaring64map.hh @@ -99,34 +99,39 @@ public: } /** - * Add value x + * Adds value x. */ void add(uint32_t x) { - roarings[0].add(x); - roarings[0].setCopyOnWrite(copyOnWrite); + lookupOrCreateInner(0).add(x); } + + /** + * Adds value x. + */ void add(uint64_t x) { - roarings[highBytes(x)].add(lowBytes(x)); - roarings[highBytes(x)].setCopyOnWrite(copyOnWrite); + lookupOrCreateInner(highBytes(x)).add(lowBytes(x)); } /** - * Add value x - * Returns true if a new value was added, false if the value was already existing. + * Adds value x. + * Returns true if a new value was added, false if the value was already + * present. */ bool addChecked(uint32_t x) { - bool result = roarings[0].addChecked(x); - roarings[0].setCopyOnWrite(copyOnWrite); - return result; + return lookupOrCreateInner(0).addChecked(x); } + + /** + * Adds value x. + * Returns true if a new value was added, false if the value was already + * present. + */ bool addChecked(uint64_t x) { - bool result = roarings[highBytes(x)].addChecked(lowBytes(x)); - roarings[highBytes(x)].setCopyOnWrite(copyOnWrite); - return result; + return lookupOrCreateInner(highBytes(x)).addChecked(lowBytes(x)); } /** - * Add all values in range [min, max) + * Adds all values in the half-open interval [min, max). */ void addRange(uint64_t min, uint64_t max) { if (min >= max) { @@ -136,11 +141,15 @@ public: } /** - * Add all values in range [min, max] + * Adds all values in the closed interval [min, max]. */ void addRangeClosed(uint32_t min, uint32_t max) { - roarings[0].addRangeClosed(min, max); + lookupOrCreateInner(0).addRangeClosed(min, max); } + + /** + * Adds all values in the closed interval [min, max] + */ void addRangeClosed(uint64_t min, uint64_t max) { if (min > max) { return; @@ -149,41 +158,83 @@ public: uint32_t start_low = lowBytes(min); uint32_t end_high = highBytes(max); uint32_t end_low = lowBytes(max); + + // We put std::numeric_limits<>::max in parentheses to avoid a + // clash with the Windows.h header under Windows. + const uint32_t uint32_max = (std::numeric_limits::max)(); + + // Fill in any nonexistent slots with empty Roarings. This simplifies + // the logic below, allowing it to simply iterate over the map between + // 'start_high' and 'end_high' in a linear fashion. + auto current_iter = ensureRangePopulated(start_high, end_high); + + // If start and end land on the same inner bitmap, then we can do the + // whole operation in one call. if (start_high == end_high) { - roarings[start_high].addRangeClosed(start_low, end_low); - roarings[start_high].setCopyOnWrite(copyOnWrite); + auto &bitmap = current_iter->second; + bitmap.addRangeClosed(start_low, end_low); return; } - // we put std::numeric_limits<>::max/min in parenthesis to avoid a clash - // with the Windows.h header under Windows - roarings[start_high].addRangeClosed( - start_low, (std::numeric_limits::max)()); - roarings[start_high].setCopyOnWrite(copyOnWrite); - start_high++; - for (; start_high < end_high; ++start_high) { - roarings[start_high].addRangeClosed( - (std::numeric_limits::min)(), - (std::numeric_limits::max)()); - roarings[start_high].setCopyOnWrite(copyOnWrite); + + // Because start and end don't land on the same inner bitmap, + // we need to do this in multiple steps: + // 1. Partially fill the first bitmap with values from the closed + // interval [start_low, uint32_max] + // 2. Fill intermediate bitmaps completely: [0, uint32_max] + // 3. Partially fill the last bitmap with values from the closed + // interval [0, end_low] + auto num_intermediate_bitmaps = end_high - start_high - 1; + + // Step 1: Partially fill the first bitmap. + { + auto &bitmap = current_iter->second; + bitmap.addRangeClosed(start_low, uint32_max); + ++current_iter; + } + + // Step 2. Fill intermediate bitmaps completely. + if (num_intermediate_bitmaps != 0) { + auto &first_intermediate = current_iter->second; + first_intermediate.addRangeClosed(0, uint32_max); + ++current_iter; + + // Now make (num_intermediate_bitmaps - 1) copies of this. + for (uint32_t i = 1; i != num_intermediate_bitmaps; ++i) { + auto &next_intermediate = current_iter->second; + next_intermediate = first_intermediate; + ++current_iter; + } } - roarings[end_high].addRangeClosed( - (std::numeric_limits::min)(), end_low); - roarings[end_high].setCopyOnWrite(copyOnWrite); + + // Step 3: Partially fill the last bitmap. + auto &bitmap = current_iter->second; + bitmap.addRangeClosed(0, end_low); } /** - * Add value n_args from pointer vals + * Adds 'n_args' values from the contiguous memory range starting at 'vals'. */ void addMany(size_t n_args, const uint32_t *vals) { - Roaring &roaring = roarings[0]; - roaring.addMany(n_args, vals); - roaring.setCopyOnWrite(copyOnWrite); + lookupOrCreateInner(0).addMany(n_args, vals); } + /** + * Adds 'n_args' values from the contiguous memory range starting at 'vals'. + */ void addMany(size_t n_args, const uint64_t *vals) { + // Potentially reduce outer map lookups by optimistically + // assuming that adjacent values will belong to the same inner bitmap. + Roaring *last_inner_bitmap = nullptr; + uint32_t last_value_high = 0; for (size_t lcv = 0; lcv < n_args; lcv++) { - roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv])); - roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite); + auto value = vals[lcv]; + auto value_high = highBytes(value); + auto value_low = lowBytes(value); + if (last_inner_bitmap == nullptr || value_high != last_value_high) { + last_inner_bitmap = &lookupOrCreateInner(value_high); + last_value_high = value_high; + } + last_inner_bitmap->add(value_low); } } @@ -1358,6 +1409,17 @@ private: #endif } + /* + * Look up 'key' in the 'roarings' map. If it does not exist, create it. + * Also, set its copyOnWrite flag to 'copyOnWrite'. Then return a reference + * to the (already existing or newly created) inner bitmap. + */ + Roaring &lookupOrCreateInner(uint32_t key) { + auto &bitmap = roarings[key]; + bitmap.setCopyOnWrite(copyOnWrite); + return bitmap; + } + /** * Prints the contents of the bitmap to a caller-provided sink function. */ @@ -1385,6 +1447,53 @@ private: sink("}"); } + /** + * Ensures that every key in the closed interval [start_high, end_high] + * refers to a Roaring bitmap rather being an empty slot. Inserts empty + * Roaring bitmaps if necessary. The interval must be valid and non-empty. + * Returns an iterator to the bitmap at start_high. + */ + roarings_t::iterator ensureRangePopulated(uint32_t start_high, + uint32_t end_high) { + if (start_high > end_high) { + ROARING_TERMINATE("Logic error: start_high > end_high"); + } + // next_populated_iter points to the first entry in the outer map with + // key >= start_high, or end(). + auto next_populated_iter = roarings.lower_bound(start_high); + + // Use uint64_t to avoid an infinite loop when end_high == uint32_max. + roarings_t::iterator start_iter{}; // Definitely assigned in loop. + for (uint64_t slot = start_high; slot <= end_high; ++slot) { + roarings_t::iterator slot_iter; + if (next_populated_iter != roarings.end() && + next_populated_iter->first == slot) { + // 'slot' index has caught up to next_populated_iter. + // Note it here and advance next_populated_iter. + slot_iter = next_populated_iter++; + } else { + // 'slot' index has not yet caught up to next_populated_iter. + // Make a fresh entry {key = 'slot', value = Roaring()}, insert + // it just prior to next_populated_iter, and set its copy + // on write flag. We take pains to use emplace_hint and + // piecewise_construct to minimize effort. + slot_iter = roarings.emplace_hint( + next_populated_iter, std::piecewise_construct, + std::forward_as_tuple(uint32_t(slot)), + std::forward_as_tuple()); + auto &bitmap = slot_iter->second; + bitmap.setCopyOnWrite(copyOnWrite); + } + + // Make a note of the iterator of the starting slot. It will be + // needed for the return value. + if (slot == start_high) { + start_iter = slot_iter; + } + } + return start_iter; + } + /** * Erases the entry pointed to by 'iter' from the 'roarings' map. Warning: * this invalidates 'iter'. diff --git a/tests/cpp_unit.cpp b/tests/cpp_unit.cpp index 057024fea..108855470 100644 --- a/tests/cpp_unit.cpp +++ b/tests/cpp_unit.cpp @@ -778,7 +778,7 @@ DEFINE_TEST(test_cpp_remove_range) { } } -DEFINE_TEST(test_cpp_add_range_64) { +DEFINE_TEST(test_cpp_add_range_closed_64) { { // 32-bit integers Roaring64Map r1; @@ -789,10 +789,12 @@ DEFINE_TEST(test_cpp_add_range_64) { } assert_true(r1 == r2); } + auto b1 = uint64_t(1) << 32; std::vector> ranges = { - {uint64_t(1) << 32, (uint64_t(1) << 32) + 10}, - {(uint64_t(1) << 32) - 10, (uint64_t(1) << 32) + 10}, - {(uint64_t(1) << 32) + 2, (uint64_t(1) << 32) - 2}}; + {b1, b1 + 10}, + {b1 + 100, b1 + 100}, // one element + {b1 - 10, b1 + 10}, + {b1 + 2, b1 - 2}}; for (const auto &range : ranges) { uint64_t min = range.first; uint64_t max = range.second; @@ -806,6 +808,157 @@ DEFINE_TEST(test_cpp_add_range_64) { } } +DEFINE_TEST(test_cpp_add_range_open_64) { + { + // 32-bit integers + Roaring64Map r1; + r1.addRange(uint32_t(1), uint32_t(5)); + Roaring64Map r2; + for (uint32_t v = 1; v < 5; ++v) { + r2.add(v); + } + assert_true(r1 == r2); + } + auto b1 = uint64_t(1) << 32; + std::vector> ranges = { + {b1, b1 + 10}, + {b1 - 10, b1 + 10}, + {b1 + 100, b1 + 100}, // empty + {b1 + 2, b1 - 2}}; + for (const auto &range : ranges) { + uint64_t min = range.first; + uint64_t max = range.second; + Roaring64Map r1; + r1.addRange(min, max); + Roaring64Map r2; + for (uint64_t v = min; v < max; ++v) { + r2.add(v); + } + assert_true(r1 == r2); + } +} + +DEFINE_TEST(test_cpp_add_range_closed_large_64) { + uint32_t start_high = 300; + for (uint32_t end_high = start_high; end_high != 305; ++end_high) { + auto begin = (uint64_t(start_high) << 32) + 0x01234567; + auto end = (uint64_t(end_high) << 32) + 0x89abcdef; + Roaring64Map r1; + r1.addRangeClosed(begin, end); + auto size = end - begin + 1; + assert_true(r1.cardinality() == size); + } +} + +DEFINE_TEST(test_cpp_add_range_open_large_64) { + uint32_t start_high = 300; + for (uint32_t end_high = start_high; end_high != 305; ++end_high) { + auto begin = (uint64_t(start_high) << 32) + 0x01234567; + auto end = (uint64_t(end_high) << 32) + 0x89abcdef; + Roaring64Map r1; + r1.addRange(begin, end); + auto size = end - begin; + assert_true(r1.cardinality() == size); + } +} + +DEFINE_TEST(test_cpp_add_many) { + std::vector values = { 9999, 123, 0xFFFFFFFF, 0xFFFFFFF7, 9999}; + Roaring r1; + r1.addMany(values.size(), values.data()); + Roaring r2; + for (const auto value : values) { + r2.add(value); + } + assert_true(r1 == r2); +} + +DEFINE_TEST(test_cpp_add_many_64) { + { + // 32-bit integers + std::vector values = { 9999, 123, 0xFFFFFFFF, 0xFFFFFFF7, 0, 9999}; + Roaring64Map r1; + r1.addMany(values.size(), values.data()); + Roaring64Map r2; + for (const auto value : values) { + r2.add(value); + } + assert_true(r1 == r2); + } + + auto b1 = uint64_t(1) << 32; + auto b555 = uint64_t(555) << 32; + + std::vector values = { + b555 + 9999, b1 + 123, b1 + 0xFFFFFFFF, b555 + 0xFFFFFFF7, 0, b555 + 9999}; + Roaring64Map r1; + r1.addMany(values.size(), values.data()); + Roaring64Map r2; + for (const auto value : values) { + r2.add(value); + } + assert_true(r1 == r2); +} + +DEFINE_TEST(test_cpp_add_range_closed_combinatoric_64) { + // Given 'num_slots_to_test' outer slots, we repeatedly seed a Roaring64Map + // with all combinations of present and absent outer slots (basically the + // powerset of {0...num_slots_to_test - 1}), then we add_range_closed + // and see if the cardinality is what we expect. + // + // For example (assuming num_slots_to_test = 5), the iterations of the outer + // loop represent these sets: + // 1. {} + // 2. {0} + // 3. {1} + // 4. {0, 1} + // 5. {2} + // 6. {0, 2} + // 7. {1, 2} + // 8. {0, 1, 2} + // 9. {3} + // and so forth... + // + // For example, in step 6 (representing set {0, 2}) we set a bit somewhere + // in slot 0 and we set another bit somehwere in slot 2. The purpose of this + // is to make sure 'addRangeClosed' does the right thing when it encounters + // an arbitrary mix of present and absent slots. Then we call + // 'addRangeClosed' over the whole range and confirm that the cardinality + // is what we expect. + const uint32_t num_slots_to_test = 5; + const uint32_t base_slot = 50; + + // We put std::numeric_limits<>::max in parentheses to avoid a + // clash with the Windows.h header under Windows. + const auto uint32_max = (std::numeric_limits::max)(); + + const uint32_t bitmask_limit = 1 << num_slots_to_test; + + for (uint32_t bitmask = 0; bitmask < bitmask_limit; ++bitmask) { + Roaring64Map roaring; + + // The 1-bits in 'bitmask' indicate which slots we want to seed + // with a value. + for (uint32_t bit_index = 0; bit_index < num_slots_to_test; ++bit_index) { + if ((bitmask & (1 << bit_index)) == 0) { + continue; + } + auto slot = base_slot + bit_index; + auto value = (uint64_t(slot) << 32) + bit_index; + roaring.add(value); + } + + auto first_bucket = uint64_t(base_slot) << 32; + auto last_bucket = uint64_t(base_slot + num_slots_to_test - 1) << 32; + + roaring.addRangeClosed(first_bucket, + last_bucket + uint32_max); + + auto expected_cardinality = num_slots_to_test * (uint64_t(1) << 32); + assert_int_equal(expected_cardinality, roaring.cardinality()); + } +} + DEFINE_TEST(test_cpp_remove_range_closed_64) { { // 32-bit integers @@ -1690,7 +1843,13 @@ int main() { cmocka_unit_test(test_cpp_add_remove_checked_64), cmocka_unit_test(test_cpp_add_range), cmocka_unit_test(test_cpp_remove_range), - cmocka_unit_test(test_cpp_add_range_64), + cmocka_unit_test(test_cpp_add_range_closed_64), + cmocka_unit_test(test_cpp_add_range_open_64), + cmocka_unit_test(test_cpp_add_range_closed_large_64), + cmocka_unit_test(test_cpp_add_range_open_large_64), + cmocka_unit_test(test_cpp_add_many), + cmocka_unit_test(test_cpp_add_many_64), + cmocka_unit_test(test_cpp_add_range_closed_combinatoric_64), cmocka_unit_test(test_cpp_remove_range_closed_64), cmocka_unit_test(test_cpp_remove_range_64), cmocka_unit_test(test_run_compression_cpp_64_true), diff --git a/tests/roaring64map_checked.hh b/tests/roaring64map_checked.hh index 2a3c97cb7..d31276fa9 100644 --- a/tests/roaring64map_checked.hh +++ b/tests/roaring64map_checked.hh @@ -116,9 +116,10 @@ class Roaring64Map { return ans; } - void addRange(const uint64_t x, const uint64_t y) { - if (x != y) { // repeat add_range_closed() cast and bounding logic - addRangeClosed(x, y - 1); + void addRange(const uint64_t min, const uint64_t max) { + plain.addRange(min, max); + for (uint64_t val = min; val < max; ++val) { + check.insert(val); } } From 5c924b60e4b2be06c96bd4905cd6dcdbc531ff6b Mon Sep 17 00:00:00 2001 From: Corey Kosak Date: Wed, 16 Nov 2022 08:50:42 -0500 Subject: [PATCH 027/162] Fix build: remove duplicate 'ensureRangePopulated()' (#411) --- cpp/roaring64map.hh | 47 --------------------------------------------- 1 file changed, 47 deletions(-) diff --git a/cpp/roaring64map.hh b/cpp/roaring64map.hh index 76df3eda8..aaea82dc3 100644 --- a/cpp/roaring64map.hh +++ b/cpp/roaring64map.hh @@ -1504,53 +1504,6 @@ private: roarings.erase(iter); } } - - /** - * Ensure that every key in the closed interval [start_high, end_high] - * refers to a Roaring bitmap rather being an empty slot. Inserts empty - * Roaring bitmaps if necessary. The interval must be valid and non-empty. - * Returns an iterator to the bitmap at start_high. - */ - roarings_t::iterator ensureRangePopulated(uint32_t start_high, - uint32_t end_high) { - if (start_high > end_high) { - ROARING_TERMINATE("Logic error: start_high > end_high"); - } - // next_populated_iter points to the first entry in the outer map with - // key >= start_high, or end(). - auto next_populated_iter = roarings.lower_bound(start_high); - - // Use uint64_t to avoid an infinite loop when end_high == uint32_max. - roarings_t::iterator start_iter{}; // Definitely assigned in loop. - for (uint64_t slot = start_high; slot <= end_high; ++slot) { - roarings_t::iterator slot_iter; - if (next_populated_iter != roarings.end() && - next_populated_iter->first == slot) { - // 'slot' index has caught up to next_populated_iter. - // Note it here and advance next_populated_iter. - slot_iter = next_populated_iter++; - } else { - // 'slot' index has not yet caught up to next_populated_iter. - // Make a fresh entry {key = 'slot', value = Roaring()}, insert - // it just prior to next_populated_iter, and set its copy - // on write flag. We take pains to use emplace_hint and - // piecewise_construct to minimize effort. - slot_iter = roarings.emplace_hint( - next_populated_iter, std::piecewise_construct, - std::forward_as_tuple(uint32_t(slot)), - std::forward_as_tuple()); - auto &bitmap = slot_iter->second; - bitmap.setCopyOnWrite(copyOnWrite); - } - - // Make a note of the iterator of the starting slot. It will be - // needed for the return value. - if (slot == start_high) { - start_iter = slot_iter; - } - } - return start_iter; - } }; /** From 1f95d82042bcdad1d86a04967e8e8d220b6fc809 Mon Sep 17 00:00:00 2001 From: "lgtm-com[bot]" <43144390+lgtm-com[bot]@users.noreply.github.com> Date: Wed, 16 Nov 2022 08:54:48 -0500 Subject: [PATCH 028/162] Add CodeQL workflow for GitHub code scanning (#403) Co-authored-by: LGTM Migrator --- .github/workflows/codeql.yml | 42 ++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 .github/workflows/codeql.yml diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 000000000..6d19b4e62 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,42 @@ +name: "CodeQL" + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + schedule: + - cron: "39 2 * * 6" + +jobs: + analyze: + name: Analyze + runs-on: ubuntu-latest + permissions: + actions: read + contents: read + security-events: write + + strategy: + fail-fast: false + matrix: + language: [ cpp, python ] + + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Initialize CodeQL + uses: github/codeql-action/init@v2 + with: + languages: ${{ matrix.language }} + queries: +security-and-quality + + - name: Autobuild + uses: github/codeql-action/autobuild@v2 + if: ${{ matrix.language == 'cpp' || matrix.language == 'python' }} + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v2 + with: + category: "/language:${{ matrix.language }}" From 827801e2eda53111ecc996af78291a60b80b2099 Mon Sep 17 00:00:00 2001 From: Corey Kosak Date: Wed, 16 Nov 2022 09:16:48 -0500 Subject: [PATCH 029/162] Provide a more aggressive Roaring64Map::fastunion() implementation (#405) * Provide a more aggressive Roaring64Map::fastunion() implementation * Initial benchmark implementation * it looks like this benchmark needs to be inside the "NOT WIN32" clause of the CMakeLists.txt * Respond to review feedback * Fix build on github action Ubuntu CI / ubuntu-noexcept-ci.yml --- benchmarks/CMakeLists.txt | 1 + benchmarks/fastunion_benchmark.cpp | 101 +++++++++++++++++++++++ cpp/roaring64map.hh | 123 +++++++++++++++++++++++++++-- tests/cpp_unit.cpp | 40 ++++++++++ tools/cmake/FindCTargets.cmake | 10 +++ 5 files changed, 270 insertions(+), 5 deletions(-) create mode 100644 benchmarks/fastunion_benchmark.cpp diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 4a3eb0e8e..71be77ee7 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -13,6 +13,7 @@ if(NOT WIN32) target_link_libraries(add_benchmark m) add_c_benchmark(frozen_benchmark) add_c_benchmark(containsmulti_benchmark) + add_cpp_benchmark(fastunion_benchmark) endif() add_c_benchmark(bitset_container_benchmark) add_c_benchmark(array_container_benchmark) diff --git a/benchmarks/fastunion_benchmark.cpp b/benchmarks/fastunion_benchmark.cpp new file mode 100644 index 000000000..ad693f6b8 --- /dev/null +++ b/benchmarks/fastunion_benchmark.cpp @@ -0,0 +1,101 @@ +#include +#include +#include +#include "roaring64map.hh" +#include "benchmark.h" + +using roaring::Roaring64Map; + +namespace { +const uint32_t num_iterations = 10; + +const uint32_t num_bitmaps = 100; +const uint32_t num_outer_slots = 1000; +const uint32_t num_inner_values = 2000; + +/** + * Creates the input maps for the test. This method creates 'num_bitmaps' maps, + * each of which contains 'num_outer_slots' 32-bit Roarings, each of which + * contains 'num_inner_values' bits. The inner bits are separated by + * 'num_bitmaps' and their starting offset is offset by 1 from one bitmap to the + * next. The intent is that in the result of the union, all the bits in a given + * 32 bit Roaring slot will end up densely packed together, which seemed like an + * interesting thing to do. + */ +std::vector makeMaps() { + std::vector result; + for (uint32_t bm_index = 0; bm_index != num_bitmaps; ++bm_index) { + Roaring64Map roaring; + + for (uint32_t slot = 0; slot != num_outer_slots; ++slot) { + auto value = (uint64_t(slot) << 32) + bm_index + 0x98765432; + for (uint32_t inner_index = 0; inner_index != num_inner_values; + ++inner_index) { + roaring.add(value); + value += num_bitmaps; + } + } + result.push_back(std::move(roaring)); + } + return result; +} + +Roaring64Map legacy_fastunion(size_t n, const Roaring64Map **inputs) { + Roaring64Map ans; + // not particularly fast + for (size_t lcv = 0; lcv < n; ++lcv) { + ans |= *(inputs[lcv]); + } + return ans; +} + +void benchmarkLegacyFastUnion() { + std::cout << "*** Legacy fastunion ***\n"; + auto maps = makeMaps(); + + // Need pointers to the above + std::vector result_ptrs; + for (auto &map : maps) { + result_ptrs.push_back(&map); + } + + for (uint32_t iter = 0; iter < num_iterations; ++iter) { + uint64_t cycles_start, cycles_final; + RDTSC_START(cycles_start); + auto result = legacy_fastunion(result_ptrs.size(), result_ptrs.data()); + RDTSC_FINAL(cycles_final); + + auto num_cycles = cycles_final - cycles_start; + uint64_t cycles_per_map = num_cycles / maps.size(); + std::cout << "Iteration " << iter << ": " << cycles_per_map << " per map\n"; + } +} + +void benchmarkNewFastUnion() { + std::cout << "*** New fastunion() ***\n"; + auto maps = makeMaps(); + + // Need pointers to the above + std::vector result_ptrs; + for (auto &map : maps) { + result_ptrs.push_back(&map); + } + + for (uint32_t iter = 0; iter < num_iterations; ++iter) { + uint64_t cycles_start, cycles_final; + RDTSC_START(cycles_start); + auto result = + Roaring64Map::fastunion(result_ptrs.size(), result_ptrs.data()); + RDTSC_FINAL(cycles_final); + + auto num_cycles = cycles_final - cycles_start; + uint64_t cycles_per_map = num_cycles / maps.size(); + std::cout << "Iteration " << iter << ": " << cycles_per_map << " per map\n"; + } +} +} // namespace + +int main() { + benchmarkLegacyFastUnion(); + benchmarkNewFastUnion(); +} diff --git a/cpp/roaring64map.hh b/cpp/roaring64map.hh index aaea82dc3..dcc1e800d 100644 --- a/cpp/roaring64map.hh +++ b/cpp/roaring64map.hh @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -1351,12 +1352,124 @@ public: * pointer). */ static Roaring64Map fastunion(size_t n, const Roaring64Map **inputs) { - Roaring64Map ans; - // not particularly fast - for (size_t lcv = 0; lcv < n; ++lcv) { - ans |= *(inputs[lcv]); + // The strategy here is to basically do a "group by" operation. + // We group the input roarings by key, do a 32-bit + // roaring_bitmap_or_many on each group, and collect the results. + // We accomplish the "group by" operation using a priority queue, which + // tracks the next key for each of our input maps. At each step, our + // algorithm takes the next subset of maps that share the same next key, + // runs roaring_bitmap_or_many on those bitmaps, and then advances the + // current_iter on all the affected entries and then repeats. + + // There is an entry in our priority queue for each of the 'n' inputs. + // For a given Roaring64Map, we look at its underlying 'roarings' + // std::map, and take its begin() and end(). This forms our half-open + // interval [current_iter, end_iter), which we keep in the priority + // queue as a pq_entry. These entries are updated (removed and then + // reinserted with the pq_entry.iterator field advanced by one step) as + // our algorithm progresses. But when a given interval becomes empty + // (i.e. pq_entry.iterator == pq_entry.end) it is not returned to the + // priority queue. + struct pq_entry { + roarings_t::const_iterator iterator; + roarings_t::const_iterator end; + }; + + // Custom comparator for the priority queue. + auto pq_comp = [](const pq_entry &lhs, const pq_entry &rhs) { + auto left_key = lhs.iterator->first; + auto right_key = rhs.iterator->first; + + // We compare in the opposite direction than normal because priority + // queues normally order from largest to smallest, but we want + // smallest to largest. + return left_key > right_key; + }; + + // Create and populate the priority queue. + std::priority_queue, decltype(pq_comp)> pq(pq_comp); + for (size_t i = 0; i < n; ++i) { + const auto &roarings = inputs[i]->roarings; + if (roarings.begin() != roarings.end()) { + pq.push({roarings.begin(), roarings.end()}); + } } - return ans; + + // A reusable vector that holds the pointers to the inner bitmaps that + // we pass to the underlying 32-bit fastunion operation. + std::vector group_bitmaps; + + // Summary of the algorithm: + // 1. While the priority queue is not empty: + // A. Get its lowest key. Call this group_key + // B. While the lowest entry in the priority queue has a key equal to + // group_key: + // 1. Remove this entry (the pair {current_iter, end_iter}) from + // the priority queue. + // 2. Add the bitmap pointed to by current_iter to a list of + // 32-bit bitmaps to process. + // 3. Advance current_iter. Now it will point to a bitmap entry + // with some key greater than group_key (or it will point to + // end()). + // 4. If current_iter != end_iter, reinsert the pair into the + // priority queue. + // C. Invoke the 32-bit roaring_bitmap_or_many() and add to result + Roaring64Map result; + while (!pq.empty()) { + // Find the next key (the lowest key) in the priority queue. + auto group_key = pq.top().iterator->first; + + // The purpose of the inner loop is to gather all the inner bitmaps + // that share "group_key" into "group_bitmaps" so that they can be + // fed to roaring_bitmap_or_many(). While we are doing this, we + // advance those iterators to their next value and reinsert them + // into the priority queue (unless they reach their end). + group_bitmaps.clear(); + while (!pq.empty()) { + auto candidate_current_iter = pq.top().iterator; + auto candidate_end_iter = pq.top().end; + + auto candidate_key = candidate_current_iter->first; + const auto &candidate_bitmap = candidate_current_iter->second; + + // This element will either be in the group (having + // key == group_key) or it will not be in the group (having + // key > group_key). (Note it cannot have key < group_key + // because of the ordered nature of the priority queue itself + // and the ordered nature of all the underlying roaring maps). + if (candidate_key != group_key) { + // This entry, and (thanks to the nature of the priority + // queue) all other entries as well, are all greater than + // group_key, so we're done collecting elements for the + // current group. Because of the way this loop was written, + // the group will will always contain at least one element. + break; + } + + group_bitmaps.push_back(&candidate_bitmap.roaring); + // Remove this entry from the priority queue. Note this + // invalidates pq.top() so make sure you don't have any dangling + // references to it. + pq.pop(); + + // Advance 'candidate_current_iter' and insert a new entry + // {candidate_current_iter, candidate_end_iter} into the + // priority queue (unless it has reached its end). + ++candidate_current_iter; + if (candidate_current_iter != candidate_end_iter) { + pq.push({candidate_current_iter, candidate_end_iter}); + } + } + + // Use the fast inner union to combine these. + auto *inner_result = roaring_bitmap_or_many(group_bitmaps.size(), + group_bitmaps.data()); + // Insert the 32-bit result at end of the 'roarings' map of the + // result we are building. + result.roarings.insert(result.roarings.end(), + std::make_pair(group_key, Roaring(inner_result))); + } + return result; } friend class Roaring64MapSetBitForwardIterator; diff --git a/tests/cpp_unit.cpp b/tests/cpp_unit.cpp index 108855470..fef448278 100644 --- a/tests/cpp_unit.cpp +++ b/tests/cpp_unit.cpp @@ -1716,6 +1716,45 @@ DEFINE_TEST(test_cpp_is_subset_64) { assert_true(r3.isSubset(r2)); } +DEFINE_TEST(test_cpp_fast_union_64) { + auto update = [](Roaring64Map *dest, uint32_t bitmask, uint32_t offset) { + for (uint32_t i = 0; i != 32; ++i) { + if ((bitmask & (1 << i)) != 0) { + dest->add(offset + i); + } + } + }; + + // Generate three Roaring64Maps that have a variety of combinations of + // present and absent slots and calculate their union with fastunion. + const uint32_t num_slots_to_test = 4; + const uint32_t bitmask_limit = 1 << num_slots_to_test; + + for (size_t r0_bitmask = 0; r0_bitmask != bitmask_limit; ++r0_bitmask) { + for (size_t r1_bitmask = 0; r1_bitmask != bitmask_limit; ++r1_bitmask) { + for (size_t r2_bitmask = 0; r2_bitmask != bitmask_limit; + ++r2_bitmask) { + Roaring64Map r0_map, r1_map, r2_map; + update(&r0_map, r0_bitmask, 0); + update(&r1_map, r1_bitmask, 0x1000); + update(&r2_map, r2_bitmask, 0x2000); + + const Roaring64Map *maps[] = { + &r0_map, &r1_map, &r2_map + }; + auto actual = Roaring64Map::fastunion(3, maps); + + Roaring64Map expected; + update(&expected, r0_bitmask, 0); + update(&expected, r1_bitmask, 0x1000); + update(&expected, r2_bitmask, 0x2000); + + assert_true(expected == actual); + } + } + } +} + DEFINE_TEST(test_cpp_to_string) { // test toString const auto b5 = uint64_t(5) << 32; @@ -1888,6 +1927,7 @@ int main() { cmocka_unit_test(issue_336), cmocka_unit_test(issue_372), cmocka_unit_test(test_cpp_is_subset_64), + cmocka_unit_test(test_cpp_fast_union_64), cmocka_unit_test(test_cpp_to_string), cmocka_unit_test(test_cpp_remove_run_compression), cmocka_unit_test(test_cpp_contains_range_interleaved_containers), diff --git a/tools/cmake/FindCTargets.cmake b/tools/cmake/FindCTargets.cmake index 8dae8ffc3..97f2b64d6 100644 --- a/tools/cmake/FindCTargets.cmake +++ b/tools/cmake/FindCTargets.cmake @@ -41,3 +41,13 @@ function(add_c_benchmark BENCH_NAME) add_executable(${BENCH_NAME} ${BENCH_NAME}.c) target_link_libraries(${BENCH_NAME} ${ROARING_LIB_NAME}) endfunction(add_c_benchmark) + +function(add_cpp_benchmark BENCH_NAME) + add_executable(${BENCH_NAME} ${BENCH_NAME}.cpp) + target_link_libraries(${BENCH_NAME} ${ROARING_LIB_NAME}) + if(ROARING_EXCEPTIONS) + target_compile_definitions(${BENCH_NAME} PUBLIC ROARING_EXCEPTIONS=1) + else() + target_compile_definitions(${BENCH_NAME} PUBLIC ROARING_EXCEPTIONS=0) + endif() +endfunction(add_cpp_benchmark) From 9672fe539b2152be19d3ce4b6cd9ff3299853da9 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Wed, 16 Nov 2022 09:19:01 -0500 Subject: [PATCH 030/162] Minor fix. --- tests/cpp_unit.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/cpp_unit.cpp b/tests/cpp_unit.cpp index fef448278..4d4d2dac3 100644 --- a/tests/cpp_unit.cpp +++ b/tests/cpp_unit.cpp @@ -928,10 +928,6 @@ DEFINE_TEST(test_cpp_add_range_closed_combinatoric_64) { const uint32_t num_slots_to_test = 5; const uint32_t base_slot = 50; - // We put std::numeric_limits<>::max in parentheses to avoid a - // clash with the Windows.h header under Windows. - const auto uint32_max = (std::numeric_limits::max)(); - const uint32_t bitmask_limit = 1 << num_slots_to_test; for (uint32_t bitmask = 0; bitmask < bitmask_limit; ++bitmask) { From 4351a6cf8ee7f0b17bd18386bf43c503fc555d51 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Wed, 16 Nov 2022 17:19:53 -0500 Subject: [PATCH 031/162] Let us guard the malloc.h include with a check for glibc. (#412) --- include/roaring/portability.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/roaring/portability.h b/include/roaring/portability.h index 3f43e97fe..d16a4daa9 100644 --- a/include/roaring/portability.h +++ b/include/roaring/portability.h @@ -49,7 +49,7 @@ #include #include #include // will provide posix_memalign with _POSIX_C_SOURCE as defined above -#if !(defined(__APPLE__)) && !(defined(__FreeBSD__)) +#ifdef __GLIBC__ #include // this should never be needed but there are some reports that it is needed. #endif From 5355686f8df739ca72ab6044b7cb257ad7e0e09c Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Thu, 17 Nov 2022 09:23:04 -0500 Subject: [PATCH 032/162] add support for initializer list (#408) * This PR adds support for initializer lists. * Adding constructor. * Portable printf/uint64_t * Fixing a couple of issues. * Trimming some 'ull'. * Fixed spacing. * A few more fixes. * Removing the new constructors. * Fixed typo. * Correcting typo. --- README.md | 5 +++++ cpp/roaring.hh | 11 +++++++++++ cpp/roaring64map.hh | 11 +++++++++++ tests/cpp_random_unit.cpp | 21 +++++++++++---------- tests/cpp_unit.cpp | 30 ++++++++++++++++++++++++++++++ 5 files changed, 68 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 705ddb790..d9689a316 100644 --- a/README.md +++ b/README.md @@ -372,6 +372,11 @@ int main() { r2.printf(); printf("\n"); + // create a new bitmap with initializer list + Roaring r2i = Roaring::bitmapOfList({1, 2, 3, 5, 6}); + + assert(r2i == r2); + // we can also create a bitmap from a pointer to 32-bit integers const uint32_t values[] = {2, 3, 4}; Roaring r3(3, values); diff --git a/cpp/roaring.hh b/cpp/roaring.hh index c193691ba..945a862b6 100644 --- a/cpp/roaring.hh +++ b/cpp/roaring.hh @@ -7,6 +7,7 @@ A C++ header for Roaring Bitmaps. #include #include +#include #include #include #include @@ -112,6 +113,16 @@ public: return ans; } + /** + * Construct a bitmap from a list of uint32_t values. + * E.g., bitmapOfList({1,2,3}). + */ + static Roaring bitmapOfList(std::initializer_list l) { + Roaring ans; + ans.addMany(l.size(), l.begin()); + return ans; + } + /** * Add value x */ diff --git a/cpp/roaring64map.hh b/cpp/roaring64map.hh index dcc1e800d..e0416ba2c 100644 --- a/cpp/roaring64map.hh +++ b/cpp/roaring64map.hh @@ -15,6 +15,7 @@ #include // for std::printf() in the printf() method #include // for std::memcpy() #include +#include #include #include #include @@ -99,6 +100,16 @@ public: return ans; } + /** + * Construct a bitmap from a list of uint64_t values. + * E.g., bitmapOfList({1,2,3}). + */ + static Roaring64Map bitmapOfList(std::initializer_list l) { + Roaring64Map ans; + ans.addMany(l.size(), l.begin()); + return ans; + } + /** * Adds value x. */ diff --git a/tests/cpp_random_unit.cpp b/tests/cpp_random_unit.cpp index 0cc29c6e1..a9da75ff7 100644 --- a/tests/cpp_random_unit.cpp +++ b/tests/cpp_random_unit.cpp @@ -20,11 +20,12 @@ // https://www.llvm.org/docs/LibFuzzer.html // -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include #include #include @@ -378,10 +379,10 @@ DEFINE_TEST(random_doublecheck_test_64) { const Roaring64Map &right = roars[rand() % NUM_ROARS]; #ifdef ROARING_CPP_RANDOM_PRINT_STATUS - printf("[%lu]: %llu %llu %llu\n", step, - static_cast(left.cardinality()), - static_cast(right.cardinality()), - static_cast(out.cardinality())); + printf("[%lu]: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", step, + left.cardinality(), + right.cardinality(), + out.cardinality()); #endif int op = rand() % 6; @@ -482,7 +483,7 @@ DEFINE_TEST(random_doublecheck_test_64) { int main() { uint64_t seed = time(nullptr); srand(seed); - printf("Seed: %lu\n", seed); + printf("Seed: %" PRIu64 "\n", seed); gravity = rand() % 10000; // starting focal point diff --git a/tests/cpp_unit.cpp b/tests/cpp_unit.cpp index 4d4d2dac3..14948d664 100644 --- a/tests/cpp_unit.cpp +++ b/tests/cpp_unit.cpp @@ -333,6 +333,10 @@ void test_example_cpp(bool copy_on_write) { r2.printf(); printf("\n"); + // create a new bitmap with initializer list + Roaring r2i = Roaring::bitmapOfList({1, 2, 3, 5, 6}); + + assert(r2i == r2); // test select uint32_t element; @@ -532,6 +536,11 @@ void test_example_cpp_64(bool copy_on_write) { r2.printf(); printf("\n"); + // create a new bitmap with initializer list + Roaring64Map r2i = + Roaring64Map::bitmapOfList({1, 2, 234294967296, 195839473298, + 14000000000000000100ull}); + assert(r2i == r2); // test select uint64_t element; @@ -807,6 +816,25 @@ DEFINE_TEST(test_cpp_add_range_closed_64) { assert_true(r1 == r2); } } +DEFINE_TEST(test_bitmap_of_32) { + Roaring r1 = Roaring::bitmapOfList({1,2,4}); + r1.printf(); + printf("\n"); + Roaring r2 = + Roaring::bitmapOf(3, 1, 2, 4); + r2.printf(); + printf("\n"); + assert_true(r1 == r2); +} + +DEFINE_TEST(test_bitmap_of_64) { + Roaring64Map r1 = Roaring64Map::bitmapOfList({1,2,4}); + r1.printf(); + Roaring64Map r2 = + Roaring64Map::bitmapOf(3, uint64_t(1), uint64_t(2), uint64_t(4)); + r2.printf(); + assert_true(r1 == r2); +} DEFINE_TEST(test_cpp_add_range_open_64) { { @@ -1867,6 +1895,8 @@ DEFINE_TEST(test_cpp_contains_range_interleaved_containers) { int main() { roaring::misc::tellmeall(); const struct CMUnitTest tests[] = { + cmocka_unit_test(test_bitmap_of_32), + cmocka_unit_test(test_bitmap_of_64), cmocka_unit_test(serial_test), cmocka_unit_test(test_example_true), cmocka_unit_test(test_example_false), From 19d9486f4bd912dd9063e4ae4f297ffa115231e7 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Thu, 17 Nov 2022 09:28:10 -0500 Subject: [PATCH 033/162] Preparing new release --- CMakeLists.txt | 8 ++++---- include/roaring/roaring_version.h | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 58ed46dc5..0c779b62a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,10 +17,10 @@ if(CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_C_COMPILER_VERSION VERSIO endif() set(ROARING_LIB_NAME roaring) set(PROJECT_VERSION_MAJOR 0) -set(PROJECT_VERSION_MINOR 7) -set(PROJECT_VERSION_PATCH 3) -set(ROARING_LIB_VERSION "0.7.3" CACHE STRING "Roaring library version") -set(ROARING_LIB_SOVERSION "5" CACHE STRING "Roaring library soversion") +set(PROJECT_VERSION_MINOR 8) +set(PROJECT_VERSION_PATCH 0) +set(ROARING_LIB_VERSION "0.8.0" CACHE STRING "Roaring library version") +set(ROARING_LIB_SOVERSION "6" CACHE STRING "Roaring library soversion") option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON) if(NOT ROARING_EXCEPTIONS) diff --git a/include/roaring/roaring_version.h b/include/roaring/roaring_version.h index 128727194..ff5a9f5df 100644 --- a/include/roaring/roaring_version.h +++ b/include/roaring/roaring_version.h @@ -1,10 +1,10 @@ // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand #ifndef ROARING_INCLUDE_ROARING_VERSION #define ROARING_INCLUDE_ROARING_VERSION -#define ROARING_VERSION "0.7.3" +#define ROARING_VERSION "0.8.0" enum { ROARING_VERSION_MAJOR = 0, - ROARING_VERSION_MINOR = 7, - ROARING_VERSION_REVISION = 3 + ROARING_VERSION_MINOR = 8, + ROARING_VERSION_REVISION = 0 }; #endif // ROARING_INCLUDE_ROARING_VERSION From 6ef51dcd2a555d521caa46459df06015f17cdf6d Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Thu, 17 Nov 2022 14:43:33 -0500 Subject: [PATCH 034/162] convert.c: Remove set-but-not-used variable (Clang 15 warning) (#413) --- src/containers/convert.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/containers/convert.c b/src/containers/convert.c index a87babff0..300a1c0a8 100644 --- a/src/containers/convert.c +++ b/src/containers/convert.c @@ -263,7 +263,6 @@ container_t *convert_run_optimize( int long_ctr = 0; uint64_t cur_word = c_qua_bitset->words[0]; - int run_count = 0; while (true) { while (cur_word == UINT64_C(0) && long_ctr < BITSET_CONTAINER_SIZE_IN_WORDS - 1) @@ -294,7 +293,6 @@ container_t *convert_run_optimize( int local_run_end = __builtin_ctzll(~cur_word_with_1s); run_end = local_run_end + long_ctr * 64; add_run(answer, run_start, run_end - 1); - run_count++; cur_word = cur_word_with_1s & (cur_word_with_1s + 1); } return answer; From 23492ef06a189883b3b2d4d36711ab56526986b4 Mon Sep 17 00:00:00 2001 From: DavidKorczynski Date: Thu, 17 Nov 2022 22:54:46 +0000 Subject: [PATCH 035/162] Add CIFuzz to Github workflows (#414) Signed-off-by: David Korczynski Signed-off-by: David Korczynski --- .github/workflows/cifuzz.yml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 .github/workflows/cifuzz.yml diff --git a/.github/workflows/cifuzz.yml b/.github/workflows/cifuzz.yml new file mode 100644 index 000000000..94bfd479d --- /dev/null +++ b/.github/workflows/cifuzz.yml @@ -0,0 +1,24 @@ +name: CIFuzz +on: [pull_request] +jobs: + Fuzzing: + runs-on: ubuntu-latest + steps: + - name: Build Fuzzers + id: build + uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master + with: + oss-fuzz-project-name: 'croaring' + dry-run: false + - name: Run Fuzzers + uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master + with: + oss-fuzz-project-name: 'croaring' + fuzz-seconds: 300 + dry-run: false + - name: Upload Crash + uses: actions/upload-artifact@v3 + if: failure() && steps.build.outcome == 'success' + with: + name: artifacts + path: ./out/artifacts From a644f44898d3c0f27af4406827bbde5f42a06b53 Mon Sep 17 00:00:00 2001 From: Corey Kosak Date: Tue, 22 Nov 2022 11:34:28 -0500 Subject: [PATCH 036/162] Add initializer_list constructor and assignment operator to Roaring and Roaring64Map (#415) --- cpp/roaring.hh | 22 +++++++++++++++--- cpp/roaring64map.hh | 18 ++++++++++++++- tests/cpp_unit.cpp | 54 ++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 84 insertions(+), 10 deletions(-) diff --git a/cpp/roaring.hh b/cpp/roaring.hh index 945a862b6..f02f12400 100644 --- a/cpp/roaring.hh +++ b/cpp/roaring.hh @@ -56,12 +56,19 @@ public: } /** - * Construct a bitmap from a list of integer values. + * Construct a bitmap from a list of 32-bit integer values. */ Roaring(size_t n, const uint32_t *data) : Roaring() { api::roaring_bitmap_add_many(&roaring, n, data); } + /** + * Construct a bitmap from an initializer list. + */ + Roaring(std::initializer_list l) : Roaring() { + addMany(l.size(), l.begin()); + } + /** * Copy constructor */ @@ -75,8 +82,8 @@ public: } /** - * Move constructor. The moved object remains valid, i.e. - * all methods can still be called on it. + * Move constructor. The moved-from object remains valid but empty, i.e. + * it behaves as though it was just freshly constructed. */ Roaring(Roaring &&r) noexcept : roaring(r.roaring) { // @@ -260,6 +267,15 @@ public: return *this; } + /** + * Assignment from an initializer list. + */ + Roaring &operator=(std::initializer_list l) { + // Delegate to move assignment operator + *this = Roaring(l); + return *this; + } + /** * Compute the intersection between the current bitmap and the provided * bitmap, writing the result in the current bitmap. The provided bitmap diff --git a/cpp/roaring64map.hh b/cpp/roaring64map.hh index e0416ba2c..cf4b06aae 100644 --- a/cpp/roaring64map.hh +++ b/cpp/roaring64map.hh @@ -53,6 +53,13 @@ public: */ Roaring64Map(size_t n, const uint64_t *data) { addMany(n, data); } + /** + * Construct a bitmap from an initializer list. + */ + Roaring64Map(std::initializer_list l) { + addMany(l.size(), l.begin()); + } + /** * Construct a 64-bit map from a 32-bit one */ @@ -84,7 +91,16 @@ public: /** * Move assignment operator. */ - Roaring64Map &operator=(Roaring64Map &&r) noexcept = default; + Roaring64Map &operator=(Roaring64Map &&r) noexcept = default; + + /** + * Assignment from an initializer list. + */ + Roaring64Map &operator=(std::initializer_list l) { + // Delegate to move assignment operator + *this = Roaring64Map(l); + return *this; + } /** * Construct a bitmap from a list of uint64_t values. diff --git a/tests/cpp_unit.cpp b/tests/cpp_unit.cpp index 14948d664..1ed6c60b2 100644 --- a/tests/cpp_unit.cpp +++ b/tests/cpp_unit.cpp @@ -336,7 +336,12 @@ void test_example_cpp(bool copy_on_write) { // create a new bitmap with initializer list Roaring r2i = Roaring::bitmapOfList({1, 2, 3, 5, 6}); - assert(r2i == r2); + assert_true(r2i == r2); + + // create a new bitmap directly from initializer list + Roaring r2id = {1, 2, 3, 5, 6}; + + assert_true(r2id == r2); // test select uint32_t element; @@ -431,7 +436,8 @@ void test_example_cpp(bool copy_on_write) { assert_true(a.contains(10)); assert_true(a.contains(20)); - // b should be destroyed without any errors + // Our move semantics allow moved-from objects to continue to be used + // normally (they are reset to empty Roarings). assert_true(b.cardinality() == 0); } @@ -448,10 +454,27 @@ void test_example_cpp(bool copy_on_write) { assert_true(a.contains(10)); assert_true(a.contains(20)); - // b should be destroyed without any errors + // Our move semantics allow moved-from objects to continue to be used + // normally (they are reset to empty Roarings). assert_int_equal(0, b.cardinality()); } + // test initializer lists + { + Roaring a; + a.add(10); + a.add(20); + + // construction + Roaring b({10, 20}); + assert_true(a == b); + + a.add(30); + // assignment + b = {10, 20, 30}; + assert_true(a == b); + } + // test toString { Roaring a; @@ -540,7 +563,12 @@ void test_example_cpp_64(bool copy_on_write) { Roaring64Map r2i = Roaring64Map::bitmapOfList({1, 2, 234294967296, 195839473298, 14000000000000000100ull}); - assert(r2i == r2); + assert_true(r2i == r2); + + // create a new bitmap directly from initializer list + Roaring64Map r2id = {1, 2, 234294967296, 195839473298, + 14000000000000000100ull}; + assert_true(r2id == r2); // test select uint64_t element; @@ -817,7 +845,7 @@ DEFINE_TEST(test_cpp_add_range_closed_64) { } } DEFINE_TEST(test_bitmap_of_32) { - Roaring r1 = Roaring::bitmapOfList({1,2,4}); + Roaring r1 = Roaring::bitmapOfList({1, 2, 4}); r1.printf(); printf("\n"); Roaring r2 = @@ -825,15 +853,29 @@ DEFINE_TEST(test_bitmap_of_32) { r2.printf(); printf("\n"); assert_true(r1 == r2); + + Roaring r1d = {1, 2, 4}; + assert_true(r1 == r1d); + + Roaring r3a = Roaring::bitmapOfList({7, 8, 9}); + r3a = {1, 2, 4}; // overwrite with assignment operator + assert_true(r1 == r3a); } DEFINE_TEST(test_bitmap_of_64) { - Roaring64Map r1 = Roaring64Map::bitmapOfList({1,2,4}); + Roaring64Map r1 = Roaring64Map::bitmapOfList({1, 2, 4}); r1.printf(); Roaring64Map r2 = Roaring64Map::bitmapOf(3, uint64_t(1), uint64_t(2), uint64_t(4)); r2.printf(); assert_true(r1 == r2); + + Roaring64Map r1d = {1, 2, 4}; + assert_true(r1 == r1d); + + Roaring64Map r3a = Roaring64Map::bitmapOfList({7, 8, 9}); + r3a = {1, 2, 4}; // overwrite with assignment operator + assert_true(r1 == r3a); } DEFINE_TEST(test_cpp_add_range_open_64) { From 7fad89fc785e227f2c72c7f8152f71bbd7bb10ed Mon Sep 17 00:00:00 2001 From: Justin Whear Date: Mon, 12 Dec 2022 15:41:48 -0800 Subject: [PATCH 037/162] Add link to Zig wrapper to README (#418) For the last year and a half, I've maintained a Zig wrapper here: https://github.com/jwhear/roaring-zig --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index d9689a316..80c01487b 100644 --- a/README.md +++ b/README.md @@ -648,6 +648,9 @@ Yuce Tekol wrote a D wrapper available at https://github.com/yuce/droaring Antonio Guilherme Ferreira Viggiano wrote a Redis Module available at https://github.com/aviggiano/redis-roaring +# Zig Wrapper + +Justin Whear wrote a Zig wrapper available at https://github.com/jwhear/roaring-zig # Mailing list/discussion group From dd582fd83d9430d836eb526382e45993062dfd3e Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Sun, 25 Dec 2022 13:33:27 -0500 Subject: [PATCH 038/162] Documentation improvment. --- cpp/roaring.hh | 8 ++++++++ cpp/roaring64map.hh | 8 ++++++++ include/roaring/roaring.h | 10 +++++++++- 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/cpp/roaring.hh b/cpp/roaring.hh index f02f12400..d48ebdab8 100644 --- a/cpp/roaring.hh +++ b/cpp/roaring.hh @@ -280,6 +280,9 @@ public: * Compute the intersection between the current bitmap and the provided * bitmap, writing the result in the current bitmap. The provided bitmap * is not modified. + * + * Performance hint: if you are computing the intersection between several + * bitmaps, two-by-two, it is best to start with the smallest bitmap. */ Roaring &operator&=(const Roaring &r) { api::roaring_bitmap_and_inplace(&roaring, &r.roaring); @@ -612,6 +615,11 @@ public: /** * Computes the intersection between two bitmaps and returns new bitmap. * The current bitmap and the provided bitmap are unchanged. + * + * Performance hint: if you are computing the intersection between several + * bitmaps, two-by-two, it is best to start with the smallest bitmap. + * Consider also using the operator &= to avoid needlessly creating + * many temporary bitmaps. */ Roaring operator&(const Roaring &o) const { roaring_bitmap_t *r = api::roaring_bitmap_and(&roaring, &o.roaring); diff --git a/cpp/roaring64map.hh b/cpp/roaring64map.hh index cf4b06aae..1b0b97a53 100644 --- a/cpp/roaring64map.hh +++ b/cpp/roaring64map.hh @@ -492,6 +492,9 @@ public: * Compute the intersection of the current bitmap and the provided bitmap, * writing the result in the current bitmap. The provided bitmap is not * modified. + * + * Performance hint: if you are computing the intersection between several + * bitmaps, two-by-two, it is best to start with the smallest bitmap. */ Roaring64Map &operator&=(const Roaring64Map &other) { if (this == &other) { @@ -1304,6 +1307,11 @@ public: /** * Computes the intersection between two bitmaps and returns new bitmap. * The current bitmap and the provided bitmap are unchanged. + * + * Performance hint: if you are computing the intersection between several + * bitmaps, two-by-two, it is best to start with the smallest bitmap. + * Consider also using the operator &= to avoid needlessly creating + * many temporary bitmaps. */ Roaring64Map operator&(const Roaring64Map &o) const { return Roaring64Map(*this) &= o; diff --git a/include/roaring/roaring.h b/include/roaring/roaring.h index 415152445..4283d9a3b 100644 --- a/include/roaring/roaring.h +++ b/include/roaring/roaring.h @@ -122,6 +122,11 @@ void roaring_bitmap_printf(const roaring_bitmap_t *r); /** * Computes the intersection between two bitmaps and returns new bitmap. The * caller is responsible for memory management. + * + * Performance hint: if you are computing the intersection between several + * bitmaps, two-by-two, it is best to start with the smallest bitmap. + * You may also rely on roaring_bitmap_and_inplace to avoid creating + * many temporary bitmaps. */ roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2); @@ -173,7 +178,10 @@ uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *r1, /** * Inplace version of `roaring_bitmap_and()`, modifies r1 - * r1 == r2 is allowed + * r1 == r2 is allowed. + * + * Performance hint: if you are computing the intersection between several + * bitmaps, two-by-two, it is best to start with the smallest bitmap. */ void roaring_bitmap_and_inplace(roaring_bitmap_t *r1, const roaring_bitmap_t *r2); From 025d86759304ffeb8491cda6170f0323c544461b Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Thu, 5 Jan 2023 17:25:33 -0500 Subject: [PATCH 039/162] Adding test for PyRoaringBitMap issue81 --- tests/toplevel_unit.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tests/toplevel_unit.c b/tests/toplevel_unit.c index 05052ff31..630e5524d 100644 --- a/tests/toplevel_unit.c +++ b/tests/toplevel_unit.c @@ -53,7 +53,8 @@ DEFINE_TEST(range_contains) { uint32_t start = end-2; roaring_bitmap_t *bm = roaring_bitmap_from_range(start, end-1, 1); roaring_bitmap_printf_describe(bm);printf("\n"); - roaring_bitmap_contains_range(bm, start, end); + assert_true(roaring_bitmap_contains_range(bm, start, end-1)); + assert_false(roaring_bitmap_contains_range(bm, start, end)); roaring_bitmap_free(bm); } @@ -1469,7 +1470,7 @@ DEFINE_TEST(test_contains_range) { values[i] = val; } for (uint64_t i = 0; i < 100000; ++i){ - if (roaring_bitmap_contains_range(r1, values[i], values[i] + length_range)){ + if (roaring_bitmap_contains_range(r1, values[i], values[i] + length_range)) { for (uint32_t j = values[i]; j < values[i] + length_range; ++j) assert_true(roaring_bitmap_contains(r1, j)); } else { @@ -1501,6 +1502,14 @@ DEFINE_TEST(test_contains_range) { } } +DEFINE_TEST(test_contains_range_PyRoaringBitMap_issue81) { + roaring_bitmap_t* r = roaring_bitmap_create(); + roaring_bitmap_add_range(r, 1, 1900544); + assert_true(roaring_bitmap_contains_range(r,1,1900544)); + assert_false(roaring_bitmap_contains_range(r,1,1900545)); + roaring_bitmap_free(r); +} + DEFINE_TEST(test_intersection_array_x_array) { roaring_bitmap_t *r1 = roaring_bitmap_create(); assert_non_null(r1); @@ -4241,6 +4250,7 @@ int main() { tellmeall(); const struct CMUnitTest tests[] = { + cmocka_unit_test(test_contains_range_PyRoaringBitMap_issue81), cmocka_unit_test(issue316), cmocka_unit_test(issue288), cmocka_unit_test(issue245), From 94c645514b0bc28aee07d429d581d6be0d8d8f37 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Thu, 5 Jan 2023 17:33:20 -0500 Subject: [PATCH 040/162] Correcting code. --- tests/toplevel_unit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/toplevel_unit.c b/tests/toplevel_unit.c index 630e5524d..896cf13b4 100644 --- a/tests/toplevel_unit.c +++ b/tests/toplevel_unit.c @@ -1506,7 +1506,7 @@ DEFINE_TEST(test_contains_range_PyRoaringBitMap_issue81) { roaring_bitmap_t* r = roaring_bitmap_create(); roaring_bitmap_add_range(r, 1, 1900544); assert_true(roaring_bitmap_contains_range(r,1,1900544)); - assert_false(roaring_bitmap_contains_range(r,1,1900545)); + assert_false(roaring_bitmap_contains_range(r,1900543,1900545)); roaring_bitmap_free(r); } From 332ccc740a2af6a1e5c5bf0ec73157688da36cc0 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Thu, 5 Jan 2023 22:18:12 -0500 Subject: [PATCH 041/162] Minor maintenance (removing unused functions). --- amalgamation.sh | 1 - benchmarks/array_container_benchmark.c | 4 +-- benchmarks/bitset_container_benchmark.c | 2 +- benchmarks/run_container_benchmark.c | 4 +-- include/roaring/containers/array.h | 11 +++---- include/roaring/containers/bitset.h | 26 +++++------------ include/roaring/containers/containers.h | 4 +-- include/roaring/containers/run.h | 12 +++----- include/roaring/isadetection.h | 38 ++++++++++++------------- include/roaring/portability.h | 16 +++++++++-- src/containers/array.c | 1 - src/containers/bitset.c | 4 +-- src/containers/run.c | 1 - tests/bitset_container_unit.c | 6 ++-- tests/run_container_unit.c | 13 +++++++-- 15 files changed, 70 insertions(+), 73 deletions(-) diff --git a/amalgamation.sh b/amalgamation.sh index ed3e54000..8cbfba71b 100755 --- a/amalgamation.sh +++ b/amalgamation.sh @@ -67,7 +67,6 @@ $SCRIPTPATH/include/roaring/containers/mixed_union.h $SCRIPTPATH/include/roaring/containers/mixed_xor.h $SCRIPTPATH/include/roaring/containers/containers.h $SCRIPTPATH/include/roaring/roaring_array.h -$SCRIPTPATH/include/roaring/misc/configreport.h " # .c implementation files diff --git a/benchmarks/array_container_benchmark.c b/benchmarks/array_container_benchmark.c index a76844ac9..903d0e4ac 100644 --- a/benchmarks/array_container_benchmark.c +++ b/benchmarks/array_container_benchmark.c @@ -137,8 +137,8 @@ int main() { printf("intersection cardinality = %d \n", answer); BEST_TIME(intersection_test(B1, B2, BO), answer, repeat, answer); printf("==intersection and union test 2 \n"); - array_container_clear(B1); - array_container_clear(B2); + B1->cardinality = 0; + B2->cardinality = 0; for (int x = 0; x < 1 << 16; x += 16) { array_container_add(B1, (uint16_t)x); } diff --git a/benchmarks/bitset_container_benchmark.c b/benchmarks/bitset_container_benchmark.c index 3d418ee2c..bda9cc9d9 100644 --- a/benchmarks/bitset_container_benchmark.c +++ b/benchmarks/bitset_container_benchmark.c @@ -69,7 +69,7 @@ int set_test(bitset_container_t* B) { int unset_test(bitset_container_t* B) { int x; for (x = 0; x < 1 << 16; x += 3) { - bitset_container_unset(B, (uint16_t)x); + bitset_container_remove(B, (uint16_t)x); } return 0; } diff --git a/benchmarks/run_container_benchmark.c b/benchmarks/run_container_benchmark.c index b04170257..9b7256358 100644 --- a/benchmarks/run_container_benchmark.c +++ b/benchmarks/run_container_benchmark.c @@ -141,8 +141,8 @@ int main() { printf("intersection cardinality = %d \n", answer); BEST_TIME(intersection_test(B1, B2, BO), answer, repeat, answer); printf("==intersection and union test 2 \n"); - run_container_clear(B1); - run_container_clear(B2); + B1->n_runs = 0; + B2->n_runs = 0; for (int x = 0; x < (1 << 16); x += 64) { int length = x % 11; for (int y = 0; y < length; ++y) diff --git a/include/roaring/containers/array.h b/include/roaring/containers/array.h index 758703569..3a3fe828a 100644 --- a/include/roaring/containers/array.h +++ b/include/roaring/containers/array.h @@ -86,10 +86,6 @@ void array_container_copy(const array_container_t *src, array_container_t *dst); void array_container_add_from_range(array_container_t *arr, uint32_t min, uint32_t max, uint16_t step); -/* Set the cardinality to zero (does not release memory). */ -static inline void array_container_clear(array_container_t *array) { - array->cardinality = 0; -} static inline bool array_container_empty(const array_container_t *array) { return array->cardinality == 0; @@ -448,14 +444,15 @@ static inline void array_container_add_range_nvals(array_container_t *array, } /** - * Adds all values in range [min,max]. + * Adds all values in range [min,max]. This function is currently unused + * and left as a documentation. */ -static inline void array_container_add_range(array_container_t *array, +/*static inline void array_container_add_range(array_container_t *array, uint32_t min, uint32_t max) { int32_t nvals_greater = count_greater(array->array, array->cardinality, max); int32_t nvals_less = count_less(array->array, array->cardinality - nvals_greater, min); array_container_add_range_nvals(array, min, max, nvals_less, nvals_greater); -} +}*/ /* * Removes all elements array[pos] .. array[pos+count-1] diff --git a/include/roaring/containers/bitset.h b/include/roaring/containers/bitset.h index 2c9e53061..b8c136499 100644 --- a/include/roaring/containers/bitset.h +++ b/include/roaring/containers/bitset.h @@ -77,8 +77,8 @@ static inline void bitset_container_set(bitset_container_t *bitset, bitset->words[offset] = load; } -/* Unset the ith bit. */ -static inline void bitset_container_unset(bitset_container_t *bitset, +/* Unset the ith bit. Currently unused. Could be used for optimization. */ +/*static inline void bitset_container_unset(bitset_container_t *bitset, uint16_t pos) { uint64_t shift = 6; uint64_t offset; @@ -87,7 +87,7 @@ static inline void bitset_container_unset(bitset_container_t *bitset, uint64_t load = bitset->words[offset]; ASM_CLEAR_BIT_DEC_WAS_SET(load, p, bitset->cardinality); bitset->words[offset] = load; -} +}*/ /* Add `pos' to `bitset'. Returns true if `pos' was not present. Might be slower * than bitset_container_set. */ @@ -142,15 +142,15 @@ static inline void bitset_container_set(bitset_container_t *bitset, bitset->words[pos >> 6] = new_word; } -/* Unset the ith bit. */ -static inline void bitset_container_unset(bitset_container_t *bitset, +/* Unset the ith bit. Currently unused. */ +/*static inline void bitset_container_unset(bitset_container_t *bitset, uint16_t pos) { const uint64_t old_word = bitset->words[pos >> 6]; const int index = pos & 63; const uint64_t new_word = old_word & (~(UINT64_C(1) << index)); bitset->cardinality -= (uint32_t)((old_word ^ new_word) >> index); bitset->words[pos >> 6] = new_word; -} +}*/ /* Add `pos' to `bitset'. Returns true if `pos' was not present. Might be slower * than bitset_container_set. */ @@ -254,19 +254,7 @@ void bitset_container_add_from_range(bitset_container_t *bitset, uint32_t min, * bitset->cardinality = bitset_container_compute_cardinality(bitset).*/ int bitset_container_compute_cardinality(const bitset_container_t *bitset); -/* Get whether there is at least one bit set (see bitset_container_empty for the reverse), - when the cardinality is unknown, it is computed and stored in the struct */ -static inline bool bitset_container_nonzero_cardinality( - bitset_container_t *bitset) { - // account for laziness - if (bitset->cardinality == BITSET_UNKNOWN_CARDINALITY) { - // could bail early instead with a nonzero result - bitset->cardinality = bitset_container_compute_cardinality(bitset); - } - return bitset->cardinality > 0; -} - -/* Check whether this bitset is empty (see bitset_container_nonzero_cardinality for the reverse), +/* Check whether this bitset is empty, * it never modifies the bitset struct. */ static inline bool bitset_container_empty( const bitset_container_t *bitset) { diff --git a/include/roaring/containers/containers.h b/include/roaring/containers/containers.h index ce8f86283..ad78515d1 100644 --- a/include/roaring/containers/containers.h +++ b/include/roaring/containers/containers.h @@ -182,7 +182,7 @@ static inline bitset_container_t *container_to_bitset( * Get the container name from the typecode * (unused at time of writing) */ -static inline const char *get_container_name(uint8_t typecode) { +/*static inline const char *get_container_name(uint8_t typecode) { switch (typecode) { case BITSET_CONTAINER_TYPE: return container_names[0]; @@ -197,7 +197,7 @@ static inline const char *get_container_name(uint8_t typecode) { __builtin_unreachable(); return "unknown"; } -} +}*/ static inline const char *get_full_container_name( const container_t *c, uint8_t typecode diff --git a/include/roaring/containers/run.h b/include/roaring/containers/run.h index 793fc01d8..4b01d5987 100644 --- a/include/roaring/containers/run.h +++ b/include/roaring/containers/run.h @@ -305,11 +305,6 @@ static inline bool run_container_empty( /* Copy one container into another. We assume that they are distinct. */ void run_container_copy(const run_container_t *src, run_container_t *dst); -/* Set the cardinality to zero (does not release memory). */ -static inline void run_container_clear(run_container_t *run) { - run->n_runs = 0; -} - /** * Append run described by vl to the run container, possibly merging. * It is assumed that the run would be inserted at the end of the container, no @@ -610,14 +605,15 @@ static inline void run_container_add_range_nruns(run_container_t* run, } /** - * Add all values in range [min, max] + * Add all values in range [min, max]. This function is currently unused + * and left as documentation. */ -static inline void run_container_add_range(run_container_t* run, +/*static inline void run_container_add_range(run_container_t* run, uint32_t min, uint32_t max) { int32_t nruns_greater = rle16_count_greater(run->runs, run->n_runs, max); int32_t nruns_less = rle16_count_less(run->runs, run->n_runs - nruns_greater, min); run_container_add_range_nruns(run, min, max, nruns_less, nruns_greater); -} +}*/ /** * Shifts last $count elements either left (distance < 0) or right (distance > 0) diff --git a/include/roaring/isadetection.h b/include/roaring/isadetection.h index 732903756..69c25a073 100644 --- a/include/roaring/isadetection.h +++ b/include/roaring/isadetection.h @@ -76,23 +76,23 @@ enum croaring_instruction_set { #if defined(__PPC64__) -static inline uint32_t dynamic_croaring_detect_supported_architectures() { - return CROARING_ALTIVEC; -} +//static inline uint32_t dynamic_croaring_detect_supported_architectures() { +// return CROARING_ALTIVEC; +//} #elif defined(__arm__) || defined(__aarch64__) // incl. armel, armhf, arm64 #if defined(__ARM_NEON) -static inline uint32_t dynamic_croaring_detect_supported_architectures() { - return CROARING_NEON; -} +//static inline uint32_t dynamic_croaring_detect_supported_architectures() { +// return CROARING_NEON; +//} #else // ARM without NEON -static inline uint32_t dynamic_croaring_detect_supported_architectures() { - return CROARING_DEFAULT; -} +//static inline uint32_t dynamic_croaring_detect_supported_architectures() { +// return CROARING_DEFAULT; +//} #endif @@ -165,9 +165,9 @@ static inline uint32_t dynamic_croaring_detect_supported_architectures() { #else // fallback -static inline uint32_t dynamic_croaring_detect_supported_architectures() { - return CROARING_DEFAULT; -} +//static inline uint32_t dynamic_croaring_detect_supported_architectures() { +// return CROARING_DEFAULT; +//} #endif // end SIMD extension detection code @@ -220,14 +220,14 @@ static inline bool croaring_avx2() { #else // defined(__x86_64__) || defined(_M_AMD64) // x64 -static inline bool croaring_avx2() { - return false; -} +//static inline bool croaring_avx2() { +// return false; +//} -static inline uint32_t croaring_detect_supported_architectures() { - // no runtime dispatch - return dynamic_croaring_detect_supported_architectures(); -} +//static inline uint32_t croaring_detect_supported_architectures() { +// // no runtime dispatch +// return dynamic_croaring_detect_supported_architectures(); +//} #endif // defined(__x86_64__) || defined(_M_AMD64) // x64 #endif // ROARING_ISADETECTION_H diff --git a/include/roaring/portability.h b/include/roaring/portability.h index d16a4daa9..5b0a942ee 100644 --- a/include/roaring/portability.h +++ b/include/roaring/portability.h @@ -39,7 +39,11 @@ #endif // __clang__ #endif // _MSC_VER -#if !(defined(_POSIX_C_SOURCE)) || (_POSIX_C_SOURCE < 200809L) +#if defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE < 200809L) +#undef _POSIX_C_SOURCE +#endif + +#ifndef _POSIX_C_SOURCE #define _POSIX_C_SOURCE 200809L #endif // !(defined(_POSIX_C_SOURCE)) || (_POSIX_C_SOURCE < 200809L) #if !(defined(_XOPEN_SOURCE)) || (_XOPEN_SOURCE < 700) @@ -226,6 +230,10 @@ inline int __builtin_clzll(unsigned long long input_num) { #define IS_BIG_ENDIAN (*(uint16_t *)"\0\xff" < 0x100) +#ifdef USENEON +// we can always compute the popcount fast. +#elif (defined(_M_ARM) || defined(_M_ARM64)) && (defined(_WIN64) && defined(CROARING_REGULAR_VISUAL_STUDIO) && CROARING_REGULAR_VISUAL_STUDIO) +// we will need this function: static inline int hammingbackup(uint64_t x) { uint64_t c1 = UINT64_C(0x5555555555555555); uint64_t c2 = UINT64_C(0x3333333333333333); @@ -235,10 +243,14 @@ static inline int hammingbackup(uint64_t x) { x *= UINT64_C(0x0101010101010101); return x >> 56; } +#endif + static inline int hamming(uint64_t x) { #if defined(_WIN64) && defined(CROARING_REGULAR_VISUAL_STUDIO) && CROARING_REGULAR_VISUAL_STUDIO -#ifdef _M_ARM64 +#ifdef USENEON + return vaddv_u8(vcnt_u8(vcreate_u8(input_num))); +#elif defined(_M_ARM64) return hammingbackup(x); // (int) _CountOneBits64(x); is unavailable #else // _M_ARM64 diff --git a/src/containers/array.c b/src/containers/array.c index dd9632062..b4adc2de9 100644 --- a/src/containers/array.c +++ b/src/containers/array.c @@ -23,7 +23,6 @@ extern inline bool array_container_contains(const array_container_t *arr, uint16_t pos); extern inline int array_container_cardinality(const array_container_t *array); extern inline bool array_container_nonzero_cardinality(const array_container_t *array); -extern inline void array_container_clear(array_container_t *array); extern inline int32_t array_container_serialized_size_in_bytes(int32_t card); extern inline bool array_container_empty(const array_container_t *array); extern inline bool array_container_full(const array_container_t *array); diff --git a/src/containers/bitset.c b/src/containers/bitset.c index c03d32a9f..31421846a 100644 --- a/src/containers/bitset.c +++ b/src/containers/bitset.c @@ -21,9 +21,9 @@ extern "C" { namespace roaring { namespace internal { #endif extern inline int bitset_container_cardinality(const bitset_container_t *bitset); -extern inline bool bitset_container_nonzero_cardinality(bitset_container_t *bitset); extern inline void bitset_container_set(bitset_container_t *bitset, uint16_t pos); -extern inline void bitset_container_unset(bitset_container_t *bitset, uint16_t pos); +// unused at this time: +//extern inline void bitset_container_unset(bitset_container_t *bitset, uint16_t pos); extern inline bool bitset_container_get(const bitset_container_t *bitset, uint16_t pos); extern inline int32_t bitset_container_serialized_size_in_bytes(void); diff --git a/src/containers/run.c b/src/containers/run.c index 6c14eef6a..ee8a4bcf3 100644 --- a/src/containers/run.c +++ b/src/containers/run.c @@ -18,7 +18,6 @@ extern inline bool run_container_contains(const run_container_t *run, extern inline int run_container_index_equalorlarger(const run_container_t *arr, uint16_t x); extern inline bool run_container_is_full(const run_container_t *run); extern inline bool run_container_nonzero_cardinality(const run_container_t *rc); -extern inline void run_container_clear(run_container_t *run); extern inline int32_t run_container_serialized_size_in_bytes(int32_t num_runs); extern inline run_container_t *run_container_create_range(uint32_t start, uint32_t stop); diff --git a/tests/bitset_container_unit.c b/tests/bitset_container_unit.c index 7de6f6826..64d752d5b 100644 --- a/tests/bitset_container_unit.c +++ b/tests/bitset_container_unit.c @@ -79,7 +79,7 @@ DEFINE_TEST(set_get_test) { (1 << 16) / 3 + 1); for (size_t x = 0; x < 1 << 16; x += 3) { - bitset_container_unset(B, x); + bitset_container_remove(B, x); } assert_int_equal(bitset_container_cardinality(B), 0); @@ -154,7 +154,7 @@ DEFINE_TEST(xor_test) { } for (size_t x = 0; x < (1 << 16); x += 62 * 3) { - bitset_container_unset(BI, x); + bitset_container_remove(BI, x); } bitset_container_xor(B1, B2, TMP); @@ -185,7 +185,7 @@ DEFINE_TEST(andnot_test) { // important: 62 is not divisible by 3 for (size_t x = 0; x < (1 << 16); x += 62) { bitset_container_set(B2, x); - bitset_container_unset(BI, x); + bitset_container_remove(BI, x); } const int expected = bitset_container_compute_cardinality(BI); diff --git a/tests/run_container_unit.c b/tests/run_container_unit.c index dbf08f253..94adf6d88 100644 --- a/tests/run_container_unit.c +++ b/tests/run_container_unit.c @@ -171,11 +171,18 @@ DEFINE_TEST(select_test) { run_container_free(B); } +static inline void _run_container_add_range(run_container_t* run, + uint32_t min, uint32_t max) { + int32_t nruns_greater = rle16_count_greater(run->runs, run->n_runs, max); + int32_t nruns_less = rle16_count_less(run->runs, run->n_runs - nruns_greater, min); + run_container_add_range_nruns(run, min, max, nruns_less, nruns_greater); +} + DEFINE_TEST(remove_range_test) { run_container_t* run = run_container_create(); - run_container_add_range(run, 100, 150); - run_container_add_range(run, 200, 250); - run_container_add_range(run, 300, 350); + _run_container_add_range(run, 100, 150); + _run_container_add_range(run, 200, 250); + _run_container_add_range(run, 300, 350); // act on left-most run run_container_remove_range(run, 100, 110); From 6fee997d9ed94dab3f75ca78d3fbade22415a240 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 6 Jan 2023 03:25:59 +0000 Subject: [PATCH 042/162] Minor fixes --- tests/cpp_random_unit.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/cpp_random_unit.cpp b/tests/cpp_random_unit.cpp index a9da75ff7..fc49c3239 100644 --- a/tests/cpp_random_unit.cpp +++ b/tests/cpp_random_unit.cpp @@ -131,7 +131,7 @@ Roaring64Map make_random_bitset64() { uint64_t card = r.cardinality(); if (card != 0) { uint64_t rnk = rand() % card; - uint64_t element; + uint64_t element = 0; assert_true(r.select(rnk, &element)); assert_int_equal(rnk + 1, r.rank(element)); r.remove(rnk); @@ -424,7 +424,7 @@ DEFINE_TEST(random_doublecheck_test_64) { uint64_t card = out.cardinality(); if (card != 0) { // pick gravity point inside set somewhere uint64_t rnk = rand() % card; - uint64_t element; + uint64_t element = 0; assert_true(out.select(rnk, &element)); assert_int_equal(rnk + 1, out.rank(element)); gravity64 = element; From 4fc13f1e54f0ae8e0bcd6bde5225c2bb3de61111 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Thu, 5 Jan 2023 22:26:17 -0500 Subject: [PATCH 043/162] Minor fix --- include/roaring/misc/configreport.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/include/roaring/misc/configreport.h b/include/roaring/misc/configreport.h index 7e3c3c1d6..05b214505 100644 --- a/include/roaring/misc/configreport.h +++ b/include/roaring/misc/configreport.h @@ -177,14 +177,6 @@ static inline void tellmeall() { #ifdef __VERSION__ printf(" compiler version: %s\t", __VERSION__); #endif - uint32_t config = croaring_detect_supported_architectures(); - if((config & CROARING_NEON) == CROARING_NEON) { - printf(" NEON detected\t"); - } - if((config & CROARING_ALTIVEC) == CROARING_ALTIVEC) { - printf("Altivec detected\n"); - } - if ((sizeof(int) != 4) || (sizeof(long) != 8)) { printf("number of bytes: int = %lu long = %lu \n", (long unsigned int)sizeof(size_t), From 79efb65e364102963f63354b440620a741648fd5 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Thu, 5 Jan 2023 22:39:05 -0500 Subject: [PATCH 044/162] Please don't rely on asserts in tests !!! --- tests/add_offset.c | 16 +++---- tests/c_example1.c | 21 +++++---- tests/cpp_example2.cpp | 11 +++-- tests/mixed_container_unit.c | 4 +- tests/realdata_unit.c | 41 +++++++++-------- tests/roaring64map_checked.hh | 50 ++++++++++---------- tests/roaring_checked.hh | 86 +++++++++++++++++------------------ 7 files changed, 117 insertions(+), 112 deletions(-) diff --git a/tests/add_offset.c b/tests/add_offset.c index 211b3ae50..edd515d97 100644 --- a/tests/add_offset.c +++ b/tests/add_offset.c @@ -41,7 +41,7 @@ static int setup_container_add_offset_test(void **state_) { switch (test.type) { case BITSET_CONTAINER_TYPE: bc = bitset_container_create(); - assert(bc != NULL); + assert_true(bc != NULL); for (size_t i = 0; i < test.n_values; i++) { bitset_container_add(bc, test.values[i]); } @@ -49,7 +49,7 @@ static int setup_container_add_offset_test(void **state_) { break; case ARRAY_CONTAINER_TYPE: ac = array_container_create(); - assert(ac != NULL); + assert_true(ac != NULL); for (size_t i = 0; i < test.n_values; i++) { array_container_add(ac, test.values[i]); } @@ -57,14 +57,14 @@ static int setup_container_add_offset_test(void **state_) { break; case RUN_CONTAINER_TYPE: rc = run_container_create(); - assert(rc != NULL); + assert_true(rc != NULL); for (size_t i = 0; i < test.n_values; i++) { run_container_add(rc, test.values[i]); } state->in = rc; break; default: - assert(false); // To catch buggy tests. + assert_true(false); // To catch buggy tests. } return 0; @@ -104,7 +104,7 @@ static void container_add_offset_test(void **state_) { uint8_t type = test.type; int card_lo = 0, card_hi = 0; - assert(test.n_values > 0); + assert_true(test.n_values > 0); container_add_offset(state->in, type, &state->lo, &state->hi, offset); container_add_offset(state->in, type, NULL, &state->hi_only, offset); @@ -158,7 +158,7 @@ static int setup_roaring_add_offset_test(void **state_) { roaring_add_offset_test_case_t test = state->test_case; state->in = roaring_bitmap_of_ptr(test.n_values, test.values); - assert(state->in != NULL); + assert_true(state->in != NULL); return 0; } @@ -314,7 +314,7 @@ int main() { dense_bitmap[i++] = 400000; dense_bitmap[i++] = 1400000; - assert(i == ARRAY_SIZE(dense_bitmap)); + assert_true(i == ARRAY_SIZE(dense_bitmap)); // NB: only add positive offsets, the test function takes care of also // running a negative test for that offset. @@ -338,7 +338,7 @@ int main() { roaring_add_offset_test_state_t state = ROARING_ADD_OFFSET_TEST_CASE(dense_bitmap, offset); roaring_state[i++] = state; } - assert(i <= ARRAY_SIZE(roaring_state)); + assert_true(i <= ARRAY_SIZE(roaring_state)); i = j = 0; struct CMUnitTest tests[ARRAY_SIZE(container_state)+ARRAY_SIZE(roaring_state)]; diff --git a/tests/c_example1.c b/tests/c_example1.c index 833eeb9ce..a0d3480af 100644 --- a/tests/c_example1.c +++ b/tests/c_example1.c @@ -2,6 +2,7 @@ #include #include #include +#include "test.h" bool roaring_iterator_sumall(uint32_t value, void *param) { *(uint32_t *)param += value; @@ -14,7 +15,7 @@ int main() { // then we can add values for (uint32_t i = 100; i < 1000; i++) roaring_bitmap_add(r1, i); // check whether a value is contained - assert(roaring_bitmap_contains(r1, 500)); + assert_true(roaring_bitmap_contains(r1, 500)); // compute how many bits there are: uint32_t cardinality = roaring_bitmap_get_cardinality(r1); printf("Cardinality = %d \n", cardinality); @@ -38,24 +39,24 @@ int main() { // we can also go in reverse and go from arrays to bitmaps uint64_t card1 = roaring_bitmap_get_cardinality(r1); uint32_t *arr1 = (uint32_t *)malloc(card1 * sizeof(uint32_t)); - assert(arr1 != NULL); + assert_true(arr1 != NULL); roaring_bitmap_to_uint32_array(r1, arr1); roaring_bitmap_t *r1f = roaring_bitmap_of_ptr(card1, arr1); free(arr1); - assert(roaring_bitmap_equals(r1, r1f)); // what we recover is equal + assert_true(roaring_bitmap_equals(r1, r1f)); // what we recover is equal roaring_bitmap_free(r1f); // we can go from arrays to bitmaps from "offset" by "limit" size_t offset = 100; size_t limit = 1000; uint32_t *arr3 = (uint32_t *)malloc(limit * sizeof(uint32_t)); - assert(arr3 != NULL); + assert_true(arr3 != NULL); roaring_bitmap_range_uint32_array(r1, offset, limit, arr3); free(arr3); // we can copy and compare bitmaps roaring_bitmap_t *z = roaring_bitmap_copy(r3); - assert(roaring_bitmap_equals(r3, z)); // what we recover is equal + assert_true(roaring_bitmap_equals(r3, z)); // what we recover is equal roaring_bitmap_free(z); // we can compute union two-by-two @@ -65,12 +66,12 @@ int main() { // we can compute a big union const roaring_bitmap_t *allmybitmaps[] = {r1, r2, r3}; roaring_bitmap_t *bigunion = roaring_bitmap_or_many(3, allmybitmaps); - assert( + assert_true( roaring_bitmap_equals(r1_2_3, bigunion)); // what we recover is equal // can also do the big union with a heap roaring_bitmap_t *bigunionheap = roaring_bitmap_or_many_heap(3, allmybitmaps); - assert(roaring_bitmap_equals(r1_2_3, bigunionheap)); + assert_true(roaring_bitmap_equals(r1_2_3, bigunionheap)); roaring_bitmap_free(r1_2_3); roaring_bitmap_free(bigunion); @@ -85,18 +86,18 @@ int main() { char *serializedbytes = (char*)malloc(expectedsize); roaring_bitmap_portable_serialize(r1, serializedbytes); roaring_bitmap_t *t = roaring_bitmap_portable_deserialize(serializedbytes); - assert(roaring_bitmap_equals(r1, t)); // what we recover is equal + assert_true(roaring_bitmap_equals(r1, t)); // what we recover is equal roaring_bitmap_free(t); // we can also check whether there is a bitmap at a memory location without // reading it size_t sizeofbitmap = roaring_bitmap_portable_deserialize_size(serializedbytes, expectedsize); printf("sizeofbitmap = %zu \n", sizeofbitmap); - assert(sizeofbitmap == + assert_true(sizeofbitmap == expectedsize); // sizeofbitmap would be zero if no bitmap were found // we can also read the bitmap "safely" by specifying a byte size limit: t = roaring_bitmap_portable_deserialize_safe(serializedbytes, expectedsize); - assert(roaring_bitmap_equals(r1, t)); // what we recover is equal + assert_true(roaring_bitmap_equals(r1, t)); // what we recover is equal roaring_bitmap_free(t); free(serializedbytes); diff --git a/tests/cpp_example2.cpp b/tests/cpp_example2.cpp index 9005712d0..bea031345 100644 --- a/tests/cpp_example2.cpp +++ b/tests/cpp_example2.cpp @@ -1,6 +1,7 @@ #include #include "roaring.hh" +#include "test.h" using namespace roaring; @@ -11,7 +12,7 @@ int main() { } // check whether a value is contained - assert(r1.contains(500)); + assert_true(r1.contains(500)); // compute how many bits there are: uint32_t cardinality = r1.cardinality(); @@ -47,11 +48,11 @@ int main() { delete[] arr1; // bitmaps shall be equal - assert(r1 == r1f); + assert_true(r1 == r1f); // we can copy and compare bitmaps Roaring z(r3); - assert(r3 == z); + assert_true(r3 == z); // we can compute union two-by-two Roaring r1_2_3 = r1 | r2; @@ -60,7 +61,7 @@ int main() { // we can compute a big union const Roaring *allmybitmaps[] = {&r1, &r2, &r3}; Roaring bigunion = Roaring::fastunion(3, allmybitmaps); - assert(r1_2_3 == bigunion); + assert_true(r1_2_3 == bigunion); // we can compute intersection two-by-two Roaring i1_2 = r1 & r2; @@ -70,7 +71,7 @@ int main() { char *serializedbytes = new char[expectedsize]; r1.write(serializedbytes); Roaring t = Roaring::read(serializedbytes); - assert(r1 == t); + assert_true(r1 == t); delete[] serializedbytes; // we can iterate over all values using custom functions diff --git a/tests/mixed_container_unit.c b/tests/mixed_container_unit.c index c55a59dc3..72964eba7 100644 --- a/tests/mixed_container_unit.c +++ b/tests/mixed_container_unit.c @@ -1588,7 +1588,7 @@ static int run_negation_range_tests(int k, int h, int start_offset, int r_start, int result_size_should_be; bool result_should_be[1 << 16]; - assert(h < k); // bad test call otherwise..not failure of code under test + assert_true(h < k); // bad test call otherwise..not failure of code under test int runlen = h; for (int x = 0; x < (1 << 16) - start_offset; x++) { @@ -1669,7 +1669,7 @@ static int run_negation_range_tests_simpler(int k, int h, int start_offset, int result_size_should_be; bool result_should_be[1 << 16]; - assert(h < k); + assert_true(h < k); int runlen = h; for (int x = 0; x < (1 << 16) - start_offset; x++) { diff --git a/tests/realdata_unit.c b/tests/realdata_unit.c index 5603f5206..1d37a3d18 100644 --- a/tests/realdata_unit.c +++ b/tests/realdata_unit.c @@ -16,6 +16,7 @@ #include "../benchmarks/numbersfromtextfiles.h" #include "config.h" +#include "test.h" /** * Once you have collected all the integers, build the bitmaps. @@ -645,17 +646,17 @@ bool compare_wide_unions(roaring_bitmap_t **rnorun, roaring_bitmap_t **rruns, printf("[compare_wide_unions] Unions don't agree! (fast run-norun) \n"); return false; } - assert(roaring_bitmap_equals(tempornorun, temporruns)); + assert_true(roaring_bitmap_equals(tempornorun, temporruns)); roaring_bitmap_t *tempornorunheap = roaring_bitmap_or_many_heap(count, (const roaring_bitmap_t **)rnorun); roaring_bitmap_t *temporrunsheap = roaring_bitmap_or_many_heap(count, (const roaring_bitmap_t **)rruns); - // assert(slow_bitmap_equals(tempornorun, tempornorunheap)); - // assert(slow_bitmap_equals(temporruns,temporrunsheap)); + // assert_true(slow_bitmap_equals(tempornorun, tempornorunheap)); + // assert_true(slow_bitmap_equals(temporruns,temporrunsheap)); - assert(roaring_bitmap_equals(tempornorun, tempornorunheap)); - assert(roaring_bitmap_equals(temporruns, temporrunsheap)); + assert_true(roaring_bitmap_equals(tempornorun, tempornorunheap)); + assert_true(roaring_bitmap_equals(temporruns, temporrunsheap)); roaring_bitmap_free(tempornorunheap); roaring_bitmap_free(temporrunsheap); @@ -665,24 +666,24 @@ bool compare_wide_unions(roaring_bitmap_t **rnorun, roaring_bitmap_t **rruns, longtempornorun = rnorun[0]; longtemporruns = rruns[0]; } else { - assert(roaring_bitmap_equals(rnorun[0], rruns[0])); - assert(roaring_bitmap_equals(rnorun[1], rruns[1])); + assert_true(roaring_bitmap_equals(rnorun[0], rruns[0])); + assert_true(roaring_bitmap_equals(rnorun[1], rruns[1])); longtempornorun = roaring_bitmap_or(rnorun[0], rnorun[1]); longtemporruns = roaring_bitmap_or(rruns[0], rruns[1]); - assert(roaring_bitmap_equals(longtempornorun, longtemporruns)); + assert_true(roaring_bitmap_equals(longtempornorun, longtemporruns)); for (int i = 2; i < (int)count; ++i) { - assert(roaring_bitmap_equals(rnorun[i], rruns[i])); - assert(roaring_bitmap_equals(longtempornorun, longtemporruns)); + assert_true(roaring_bitmap_equals(rnorun[i], rruns[i])); + assert_true(roaring_bitmap_equals(longtempornorun, longtemporruns)); roaring_bitmap_t *t1 = roaring_bitmap_or(rnorun[i], longtempornorun); roaring_bitmap_t *t2 = roaring_bitmap_or(rruns[i], longtemporruns); - assert(roaring_bitmap_equals(t1, t2)); + assert_true(roaring_bitmap_equals(t1, t2)); roaring_bitmap_free(longtempornorun); longtempornorun = t1; roaring_bitmap_free(longtemporruns); longtemporruns = t2; - assert(roaring_bitmap_equals(longtempornorun, longtemporruns)); + assert_true(roaring_bitmap_equals(longtempornorun, longtemporruns)); } } if (!slow_bitmap_equals(longtempornorun, tempornorun)) { @@ -712,7 +713,7 @@ bool compare_wide_xors(roaring_bitmap_t **rnorun, roaring_bitmap_t **rruns, printf("[compare_wide_xors] Xors don't agree! (fast run-norun) \n"); return false; } - assert(roaring_bitmap_equals(tempornorun, temporruns)); + assert_true(roaring_bitmap_equals(tempornorun, temporruns)); roaring_bitmap_t *longtempornorun; roaring_bitmap_t *longtemporruns; @@ -720,24 +721,24 @@ bool compare_wide_xors(roaring_bitmap_t **rnorun, roaring_bitmap_t **rruns, longtempornorun = rnorun[0]; longtemporruns = rruns[0]; } else { - assert(roaring_bitmap_equals(rnorun[0], rruns[0])); - assert(roaring_bitmap_equals(rnorun[1], rruns[1])); + assert_true(roaring_bitmap_equals(rnorun[0], rruns[0])); + assert_true(roaring_bitmap_equals(rnorun[1], rruns[1])); longtempornorun = roaring_bitmap_xor(rnorun[0], rnorun[1]); longtemporruns = roaring_bitmap_xor(rruns[0], rruns[1]); - assert(roaring_bitmap_equals(longtempornorun, longtemporruns)); + assert_true(roaring_bitmap_equals(longtempornorun, longtemporruns)); for (int i = 2; i < (int)count; ++i) { - assert(roaring_bitmap_equals(rnorun[i], rruns[i])); - assert(roaring_bitmap_equals(longtempornorun, longtemporruns)); + assert_true(roaring_bitmap_equals(rnorun[i], rruns[i])); + assert_true(roaring_bitmap_equals(longtempornorun, longtemporruns)); roaring_bitmap_t *t1 = roaring_bitmap_xor(rnorun[i], longtempornorun); roaring_bitmap_t *t2 = roaring_bitmap_xor(rruns[i], longtemporruns); - assert(roaring_bitmap_equals(t1, t2)); + assert_true(roaring_bitmap_equals(t1, t2)); roaring_bitmap_free(longtempornorun); longtempornorun = t1; roaring_bitmap_free(longtemporruns); longtemporruns = t2; - assert(roaring_bitmap_equals(longtempornorun, longtemporruns)); + assert_true(roaring_bitmap_equals(longtempornorun, longtemporruns)); } } if (!slow_bitmap_equals(longtempornorun, tempornorun)) { diff --git a/tests/roaring64map_checked.hh b/tests/roaring64map_checked.hh index d31276fa9..d195b205a 100644 --- a/tests/roaring64map_checked.hh +++ b/tests/roaring64map_checked.hh @@ -35,7 +35,9 @@ #include #include // sorted set, typically a red-black tree implementation -#include + +#include "test.h" + #define ROARING_CPP_NAMESPACE unchecked // can't be overridden if global #include "roaring64map.hh" // contains Roaring64Map unchecked class @@ -104,14 +106,14 @@ class Roaring64Map { bool addChecked(uint32_t x) { bool ans = plain.addChecked(x); bool was_in_set = check.insert(x).second; // insert -> pair - assert(ans == was_in_set); + assert_true(ans == was_in_set); (void)was_in_set; // unused besides assert return ans; } bool addChecked(uint64_t x) { bool ans = plain.addChecked(x); bool was_in_set = check.insert(x).second; // insert -> pair - assert(ans == was_in_set); + assert_true(ans == was_in_set); (void)was_in_set; // unused besides assert return ans; } @@ -161,14 +163,14 @@ class Roaring64Map { bool removeChecked(uint32_t x) { bool ans = plain.removeChecked(x); size_t num_removed = check.erase(x); - assert(ans == (num_removed == 1)); + assert_true(ans == (num_removed == 1)); (void)num_removed; // unused besides assert return ans; } bool removeChecked(uint64_t x) { bool ans = plain.removeChecked(x); size_t num_removed = check.erase(x); - assert(ans == (num_removed == 1)); + assert_true(ans == (num_removed == 1)); (void)num_removed; // unused besides assert return ans; } @@ -211,13 +213,13 @@ class Roaring64Map { uint64_t maximum() const { uint64_t ans = plain.maximum(); - assert(check.empty() ? ans == 0 : ans == *check.rbegin()); + assert_true(check.empty() ? ans == 0 : ans == *check.rbegin()); return ans; } uint64_t minimum() const { uint64_t ans = plain.minimum(); - assert(check.empty() + assert_true(check.empty() ? ans == (std::numeric_limits::max)() : ans == *check.begin()); return ans; @@ -225,12 +227,12 @@ class Roaring64Map { bool contains(uint32_t x) const { bool ans = plain.contains(x); - assert(ans == (check.find(x) != check.end())); + assert_true(ans == (check.find(x) != check.end())); return ans; } bool contains(uint64_t x) const { bool ans = plain.contains(x); - assert(ans == (check.find(x) != check.end())); + assert_true(ans == (check.find(x) != check.end())); return ans; } @@ -253,7 +255,7 @@ class Roaring64Map { } ~Roaring64Map() { - assert(does_std_set_match_roaring()); // always check on destructor + assert_true(does_std_set_match_roaring()); // always check on destructor } Roaring64Map &operator=(const Roaring64Map &r) { @@ -331,19 +333,19 @@ class Roaring64Map { uint64_t cardinality() const { uint64_t ans = plain.cardinality(); - assert(ans == check.size()); + assert_true(ans == check.size()); return ans; } bool isEmpty() const { bool ans = plain.isEmpty(); - assert(ans == check.empty()); + assert_true(ans == check.empty()); return ans; } bool isSubset(const Roaring64Map &r) const { // is `this` subset of `r`? bool ans = plain.isSubset(r.plain); - assert(ans == std::includes( + assert_true(ans == std::includes( r.check.begin(), r.check.end(), // containing range check.begin(), check.end() // range to test for containment )); @@ -352,7 +354,7 @@ class Roaring64Map { bool isStrictSubset(const Roaring64Map &r) const { // is `this` subset of `r`? bool ans = plain.isStrictSubset(r.plain); - assert(ans == (std::includes( + assert_true(ans == (std::includes( r.check.begin(), r.check.end(), // containing range check.begin(), check.end() // range to test for containment ) && r.check.size() > check.size())); @@ -366,7 +368,7 @@ class Roaring64Map { bool operator==(const Roaring64Map &r) const { bool ans = (plain == r.plain); - assert(ans == (check == r.check)); + assert_true(ans == (check == r.check)); return ans; } @@ -399,7 +401,7 @@ class Roaring64Map { void iterate(roaring::api::roaring_iterator64 iterator, void *ptr) const { plain.iterate(iterator, ptr); - assert(does_std_set_match_roaring()); // checks equivalent iteration + assert_true(does_std_set_match_roaring()); // checks equivalent iteration } bool select(uint64_t rnk, uint64_t *element) const { @@ -409,7 +411,7 @@ class Roaring64Map { auto it_end = check.end(); for (uint64_t i = 0; it != it_end && i < rnk; ++i) ++it; - assert(ans == (it != it_end) && (ans ? *it == *element : true)); + assert_true(ans == (it != it_end) && (ans ? *it == *element : true)); return ans; } @@ -422,7 +424,7 @@ class Roaring64Map { auto it_end = check.end(); for (; it != it_end && *it <= x; ++it) ++count; - assert(ans == count); + assert_true(ans == count); return ans; } @@ -449,7 +451,7 @@ class Roaring64Map { Roaring64Map ans(plain & o.plain); Roaring64Map inplace(*this); - assert(ans == (inplace &= o)); // validate against in-place version + assert_true(ans == (inplace &= o)); // validate against in-place version return ans; } @@ -458,7 +460,7 @@ class Roaring64Map { Roaring64Map ans(plain - o.plain); Roaring64Map inplace(*this); - assert(ans == (inplace -= o)); // validate against in-place version + assert_true(ans == (inplace -= o)); // validate against in-place version return ans; } @@ -467,7 +469,7 @@ class Roaring64Map { Roaring64Map ans(plain | o.plain); Roaring64Map inplace(*this); - assert(ans == (inplace |= o)); // validate against in-place version + assert_true(ans == (inplace |= o)); // validate against in-place version return ans; } @@ -476,7 +478,7 @@ class Roaring64Map { Roaring64Map ans(plain ^ o.plain); Roaring64Map inplace(*this); - assert(ans == (inplace ^= o)); // validate against in-place version + assert_true(ans == (inplace ^= o)); // validate against in-place version return ans; } @@ -505,12 +507,12 @@ class Roaring64Map { delete[] plain_inputs; if (n == 0) - assert(ans.cardinality() == 0); + assert_true(ans.cardinality() == 0); else { Roaring64Map temp = *inputs[0]; for (size_t i = 1; i < n; ++i) temp |= *inputs[i]; - assert(temp == ans); + assert_true(temp == ans); } return ans; diff --git a/tests/roaring_checked.hh b/tests/roaring_checked.hh index b06320199..2eb7a2e5c 100644 --- a/tests/roaring_checked.hh +++ b/tests/roaring_checked.hh @@ -42,7 +42,7 @@ #include #include // sorted set, typically a red-black tree implementation -#include +#include "test.h" #define ROARING_CPP_NAMESPACE unchecked // can't be overridden if global #include "roaring.hh" // contains Roaring unchecked class @@ -112,7 +112,7 @@ class Roaring { bool addChecked(uint32_t x) { bool ans = plain.addChecked(x); bool was_in_set = check.insert(x).second; // insert -> pair - assert(ans == was_in_set); + assert_true(ans == was_in_set); (void)was_in_set; // unused besides assert return ans; } @@ -145,7 +145,7 @@ class Roaring { bool removeChecked(uint32_t x) { bool ans = plain.removeChecked(x); size_t num_removed = check.erase(x); - assert(ans == (num_removed == 1)); + assert_true(ans == (num_removed == 1)); (void)num_removed; // unused besides assert return ans; } @@ -165,19 +165,19 @@ class Roaring { uint32_t maximum() const { uint32_t ans = plain.maximum(); - assert(check.empty() ? ans == 0 : ans == *check.rbegin()); + assert_true(check.empty() ? ans == 0 : ans == *check.rbegin()); return ans; } uint32_t minimum() const { uint32_t ans = plain.minimum(); - assert(check.empty() ? ans == UINT32_MAX : ans == *check.begin()); + assert_true(check.empty() ? ans == UINT32_MAX : ans == *check.begin()); return ans; } bool contains(uint32_t x) const { bool ans = plain.contains(x); - assert(ans == (check.find(x) != check.end())); + assert_true(ans == (check.find(x) != check.end())); return ans; } @@ -186,14 +186,14 @@ class Roaring { auto it = check.find(x); if (x >= y) - assert(ans == true); // roaring says true for this + assert_true(ans == true); // roaring says true for this else if (it == check.end()) - assert(ans == false); // start of range not in set + assert_true(ans == false); // start of range not in set else { uint64_t last = x; // iterate up to y so long as values sequential while (++it != check.end() && last + 1 == *it && *it < y) last = *it; - assert(ans == (last == y - 1)); + assert_true(ans == (last == y - 1)); } return ans; @@ -217,7 +217,7 @@ class Roaring { } ~Roaring() { - assert(does_std_set_match_roaring()); // always check on destructor + assert_true(does_std_set_match_roaring()); // always check on destructor } Roaring &operator=(const Roaring &r) { @@ -295,19 +295,19 @@ class Roaring { uint64_t cardinality() const { uint64_t ans = plain.cardinality(); - assert(ans == check.size()); + assert_true(ans == check.size()); return ans; } bool isEmpty() const { bool ans = plain.isEmpty(); - assert(ans == check.empty()); + assert_true(ans == check.empty()); return ans; } bool isSubset(const Roaring &r) const { // is `this` subset of `r`? bool ans = plain.isSubset(r.plain); - assert(ans == std::includes( + assert_true(ans == std::includes( r.check.begin(), r.check.end(), // containing range check.begin(), check.end() // range to test for containment )); @@ -316,7 +316,7 @@ class Roaring { bool isStrictSubset(const Roaring &r) const { // is `this` subset of `r`? bool ans = plain.isStrictSubset(r.plain); - assert(ans == (std::includes( + assert_true(ans == (std::includes( r.check.begin(), r.check.end(), // containing range check.begin(), check.end() // range to test for containment ) && r.check.size() > check.size())); @@ -335,7 +335,7 @@ class Roaring { bool operator==(const Roaring &r) const { bool ans = (plain == r.plain); - assert(ans == (check == r.check)); + assert_true(ans == (check == r.check)); return ans; } @@ -370,7 +370,7 @@ class Roaring { void iterate(roaring::api::roaring_iterator iterator, void *ptr) const { plain.iterate(iterator, ptr); - assert(does_std_set_match_roaring()); // checks equivalent iteration + assert_true(does_std_set_match_roaring()); // checks equivalent iteration } bool select(uint32_t rnk, uint32_t *element) const { @@ -380,7 +380,7 @@ class Roaring { auto it_end = check.end(); for (uint32_t i = 0; it != it_end && i < rnk; ++i) ++it; - assert(ans == (it != it_end) && (ans ? *it == *element : true)); + assert_true(ans == (it != it_end) && (ans ? *it == *element : true)); return ans; } @@ -393,9 +393,9 @@ class Roaring { auto r_it = r.check.begin(); auto r_it_end = r.check.end(); if (it == it_end || r_it == r_it_end) { - assert(ans == 0); // if either is empty then no intersection + assert_true(ans == 0); // if either is empty then no intersection } else if (*it > *r.check.rbegin() || *r_it > *check.rbegin()) { - assert(ans == 0); // obvious disjoint + assert_true(ans == 0); // obvious disjoint } else { // may overlap uint64_t count = 0; while (it != it_end && r_it != r_it_end) { @@ -403,7 +403,7 @@ class Roaring { else if (*it < *r_it) { ++it; } else { ++r_it; } } - assert(ans == count); + assert_true(ans == count); } return ans; @@ -417,15 +417,15 @@ class Roaring { auto r_it = r.check.begin(); auto r_it_end = r.check.end(); if (it == it_end || r_it == r_it_end) { - assert(ans == false); // if either are empty, no intersection + assert_true(ans == false); // if either are empty, no intersection } else if (*it > *r.check.rbegin() || *r_it > *check.rbegin()) { - assert(ans == false); // obvious disjoint + assert_true(ans == false); // obvious disjoint } else while (it != it_end && r_it != r_it_end) { // may overlap - if (*it == *r_it) { assert(ans == true); goto done; } // overlap + if (*it == *r_it) { assert_true(ans == true); goto done; } // overlap else if (*it < *r_it) { ++it; } else { ++r_it; } } - assert(ans == false); + assert_true(ans == false); done: // (could use lambda vs goto, but debug step in lambdas is poor) return ans; @@ -444,10 +444,10 @@ class Roaring { auto it_end = check.end(); auto r_it = r.check.begin(); auto r_it_end = r.check.end(); - if (it == it_end) { assert(ans == r.check.size()); } // this empty - else if (r_it == r_it_end) { assert(ans == check.size()); } // r empty + if (it == it_end) { assert_true(ans == r.check.size()); } // this empty + else if (r_it == r_it_end) { assert_true(ans == check.size()); } // r empty else if (*it > *r.check.rbegin() || *r_it > *check.rbegin()) { - assert(ans == check.size() + r.check.size()); // obvious disjoint + assert_true(ans == check.size() + r.check.size()); // obvious disjoint } else { uint64_t count = 0; while (it != it_end || r_it != r_it_end) { @@ -458,7 +458,7 @@ class Roaring { else if (*it < *r_it) { ++it; } else { ++r_it; } } - assert(ans == count); + assert_true(ans == count); } return ans; @@ -471,10 +471,10 @@ class Roaring { auto it_end = check.end(); auto r_it = r.check.begin(); auto r_it_end = r.check.end(); - if (it == it_end) { assert(ans == 0); } // this empty - else if (r_it == r_it_end) { assert(ans == check.size()); } // r empty + if (it == it_end) { assert_true(ans == 0); } // this empty + else if (r_it == r_it_end) { assert_true(ans == check.size()); } // r empty else if (*it > *r.check.rbegin() || *r_it > *check.rbegin()) { - assert(ans == check.size()); // disjoint so nothing removed + assert_true(ans == check.size()); // disjoint so nothing removed } else { // may overlap uint64_t count = check.size(); // start with cardinality of this while (it != it_end && r_it != r_it_end) { @@ -482,7 +482,7 @@ class Roaring { else if (*it < *r_it) { ++it; } else { ++r_it; } } - assert(ans == count); + assert_true(ans == count); } return ans; @@ -495,10 +495,10 @@ class Roaring { auto it_end = check.end(); auto r_it = r.check.begin(); auto r_it_end = r.check.end(); - if (it == it_end) { assert(ans == r.check.size()); } // this empty - else if (r_it == r_it_end) { assert(ans == check.size()); } // r empty + if (it == it_end) { assert_true(ans == r.check.size()); } // this empty + else if (r_it == r_it_end) { assert_true(ans == check.size()); } // r empty else if (*it > *r.check.rbegin() || *r_it > *check.rbegin()) { - assert(ans == check.size() + r.check.size()); // obvious disjoint + assert_true(ans == check.size() + r.check.size()); // obvious disjoint } else { // may overlap uint64_t count = 0; while (it != it_end || r_it != r_it_end) { @@ -508,7 +508,7 @@ class Roaring { else if (*it < *r_it) { ++count; ++it; } else { ++count; ++r_it; } } - assert(ans == count); + assert_true(ans == count); } return ans; @@ -522,7 +522,7 @@ class Roaring { auto it_end = check.end(); for (; it != it_end && *it <= x; ++it) ++count; - assert(ans == count); + assert_true(ans == count); return ans; } @@ -549,7 +549,7 @@ class Roaring { Roaring ans(plain & o.plain); Roaring inplace(*this); - assert(ans == (inplace &= o)); // validate against in-place version + assert_true(ans == (inplace &= o)); // validate against in-place version return ans; } @@ -558,7 +558,7 @@ class Roaring { Roaring ans(plain - o.plain); Roaring inplace(*this); - assert(ans == (inplace -= o)); // validate against in-place version + assert_true(ans == (inplace -= o)); // validate against in-place version return ans; } @@ -567,7 +567,7 @@ class Roaring { Roaring ans(plain | o.plain); Roaring inplace(*this); - assert(ans == (inplace |= o)); // validate against in-place version + assert_true(ans == (inplace |= o)); // validate against in-place version return ans; } @@ -576,7 +576,7 @@ class Roaring { Roaring ans(plain ^ o.plain); Roaring inplace(*this); - assert(ans == (inplace ^= o)); // validate against in-place version + assert_true(ans == (inplace ^= o)); // validate against in-place version return ans; } @@ -605,12 +605,12 @@ class Roaring { delete[] plain_inputs; if (n == 0) - assert(ans.cardinality() == 0); + assert_true(ans.cardinality() == 0); else { Roaring temp = *inputs[0]; for (size_t i = 1; i < n; ++i) temp |= *inputs[i]; - assert(temp == ans); + assert_true(temp == ans); } return ans; From bc51a40aa276053fef531ff45f47c4f548f84986 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Thu, 5 Jan 2023 22:45:26 -0500 Subject: [PATCH 045/162] Updating version. --- CMakeLists.txt | 4 ++-- include/roaring/roaring_version.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0c779b62a..4ff34ab82 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,8 +18,8 @@ endif() set(ROARING_LIB_NAME roaring) set(PROJECT_VERSION_MAJOR 0) set(PROJECT_VERSION_MINOR 8) -set(PROJECT_VERSION_PATCH 0) -set(ROARING_LIB_VERSION "0.8.0" CACHE STRING "Roaring library version") +set(PROJECT_VERSION_PATCH 1) +set(ROARING_LIB_VERSION "0.8.1" CACHE STRING "Roaring library version") set(ROARING_LIB_SOVERSION "6" CACHE STRING "Roaring library soversion") option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON) diff --git a/include/roaring/roaring_version.h b/include/roaring/roaring_version.h index ff5a9f5df..df6a1c90f 100644 --- a/include/roaring/roaring_version.h +++ b/include/roaring/roaring_version.h @@ -1,10 +1,10 @@ // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand #ifndef ROARING_INCLUDE_ROARING_VERSION #define ROARING_INCLUDE_ROARING_VERSION -#define ROARING_VERSION "0.8.0" +#define ROARING_VERSION "0.8.1" enum { ROARING_VERSION_MAJOR = 0, ROARING_VERSION_MINOR = 8, - ROARING_VERSION_REVISION = 0 + ROARING_VERSION_REVISION = 1 }; #endif // ROARING_INCLUDE_ROARING_VERSION From 7c787c66db8baf344e8fb7e5acce68c938ac224a Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Thu, 5 Jan 2023 22:48:11 -0500 Subject: [PATCH 046/162] Minor fix --- include/roaring/portability.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/roaring/portability.h b/include/roaring/portability.h index 5b0a942ee..239c056e9 100644 --- a/include/roaring/portability.h +++ b/include/roaring/portability.h @@ -232,7 +232,7 @@ inline int __builtin_clzll(unsigned long long input_num) { #ifdef USENEON // we can always compute the popcount fast. -#elif (defined(_M_ARM) || defined(_M_ARM64)) && (defined(_WIN64) && defined(CROARING_REGULAR_VISUAL_STUDIO) && CROARING_REGULAR_VISUAL_STUDIO) +#elif (defined(_M_ARM) || defined(_M_ARM64)) && ((defined(_WIN64) || defined(_WIN32)) && defined(CROARING_REGULAR_VISUAL_STUDIO) && CROARING_REGULAR_VISUAL_STUDIO) // we will need this function: static inline int hammingbackup(uint64_t x) { uint64_t c1 = UINT64_C(0x5555555555555555); From a44caa798052b66b2c3286eaa0dfe34e9014583a Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 23 Jan 2023 23:13:42 -0500 Subject: [PATCH 047/162] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 80c01487b..cd86ddfeb 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# CRoaring [![Build status](https://ci.appveyor.com/api/projects/status/gr4ibsflqs9by1bc/branch/master?svg=true)](https://ci.appveyor.com/project/lemire/croaring/branch/master) [![Build Status](https://cloud.drone.io/api/badges/RoaringBitmap/CRoaring/status.svg)](https://cloud.drone.io/RoaringBitmap/CRoaring) +# CRoaring [![Build status](https://ci.appveyor.com/api/projects/status/gr4ibsflqs9by1bc/branch/master?svg=true)](https://cloud.drone.io/api/badges/RoaringBitmap/CRoaring/status.svg)](https://cloud.drone.io/RoaringBitmap/CRoaring) Portable Roaring bitmaps in C (and C++) with full support for your favorite compiler (GNU GCC, LLVM's clang, Visual Studio). Included in the [Awesome C](https://github.com/kozross/awesome-c) list of open source C software. From 66f1bb7b8f00e01930d33bfcc94a7a634df5555e Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 23 Jan 2023 23:14:38 -0500 Subject: [PATCH 048/162] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index cd86ddfeb..41aab9b00 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# CRoaring [![Build status](https://ci.appveyor.com/api/projects/status/gr4ibsflqs9by1bc/branch/master?svg=true)](https://cloud.drone.io/api/badges/RoaringBitmap/CRoaring/status.svg)](https://cloud.drone.io/RoaringBitmap/CRoaring) +# CRoaring [![Ubuntu-CI](https://github.com/RoaringBitmap/CRoaring/actions/workflows/ubuntu-noexcept-ci.yml/badge.svg)](https://github.com/RoaringBitmap/CRoaring/actions/workflows/ubuntu-noexcept-ci.yml) Portable Roaring bitmaps in C (and C++) with full support for your favorite compiler (GNU GCC, LLVM's clang, Visual Studio). Included in the [Awesome C](https://github.com/kozross/awesome-c) list of open source C software. From 3078dfafc22c9becf94bb3ab90956dedc39d82e1 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Tue, 24 Jan 2023 11:18:48 -0500 Subject: [PATCH 049/162] Adding Debug tests to VS. --- .github/workflows/vs17-ci.yml | 8 +++++++- .github/workflows/vs17-clang-ci.yml | 8 +++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/.github/workflows/vs17-ci.yml b/.github/workflows/vs17-ci.yml index 8d6e0d65e..1cfc0b652 100644 --- a/.github/workflows/vs17-ci.yml +++ b/.github/workflows/vs17-ci.yml @@ -24,4 +24,10 @@ jobs: - name: Run basic tests run: | cd build - ctest -C Release --output-on-failure + ctest -C Release --output-on-failure + - name: Build Debug + run: cmake --build build --config Debug + - name: Run basic tests in Debug + run: | + cd build + ctest -C Debug --output-on-failure diff --git a/.github/workflows/vs17-clang-ci.yml b/.github/workflows/vs17-clang-ci.yml index 0f258a4dc..9beb6d320 100644 --- a/.github/workflows/vs17-clang-ci.yml +++ b/.github/workflows/vs17-clang-ci.yml @@ -24,4 +24,10 @@ jobs: - name: Run basic tests run: | cd build - ctest -C Release --output-on-failure \ No newline at end of file + ctest -C Release --output-on-failure + - name: Build Debug + run: cmake --build build --config Debug --parallel + - name: Run basic tests in Debug + run: | + cd build + ctest -C Debug --output-on-failure \ No newline at end of file From 5f73c5e8981a979a988cecd5c4a085fbd576081f Mon Sep 17 00:00:00 2001 From: Andreas Garnaes Date: Thu, 26 Jan 2023 04:25:23 +0100 Subject: [PATCH 050/162] Add roaring_bitmap_portable_deserialize_frozen (#421) * Add roaring_bitmap_portable_deserialize_frozen * Add deserialization to real_bitmaps_benchmark * Fix cycle count variable types in real_bitmaps_benchmark * Add comment on unaligned access * Allow unaligned memory access for roaring_bitmap_portable_deserialize_frozen * Add ALLOW_UNALIGNED --- benchmarks/real_bitmaps_benchmark.c | 40 +++++++ include/roaring/containers/array.h | 2 + include/roaring/containers/bitset.h | 1 + include/roaring/containers/run.h | 1 + include/roaring/portability.h | 6 ++ include/roaring/roaring.h | 18 ++++ src/containers/array.c | 1 + src/containers/bitset.c | 3 + src/containers/run.c | 2 + src/roaring.c | 155 ++++++++++++++++++++++++++++ tests/toplevel_unit.c | 94 +++++++++++++++++ 11 files changed, 323 insertions(+) diff --git a/benchmarks/real_bitmaps_benchmark.c b/benchmarks/real_bitmaps_benchmark.c index 87840a926..887c9df74 100644 --- a/benchmarks/real_bitmaps_benchmark.c +++ b/benchmarks/real_bitmaps_benchmark.c @@ -174,6 +174,46 @@ int main(int argc, char **argv) { " cycles\n", count, total_count, cycles_final - cycles_start); + uint64_t portable_cycle_count = 0, portable_frozen_cycle_count = 0, + frozen_cycle_count = 0; + for(int i = 0; i < (int)count; i++) { + int size = roaring_bitmap_portable_size_in_bytes(bitmaps[i]); + char *buf = malloc(size); + roaring_bitmap_portable_serialize(bitmaps[i], buf); + + int frozen_size = roaring_bitmap_frozen_size_in_bytes(bitmaps[i]); + char *frozen_buf = roaring_aligned_malloc(32, frozen_size); + roaring_bitmap_frozen_serialize(bitmaps[i], frozen_buf); + + RDTSC_START(cycles_start); + roaring_bitmap_t *r1 = roaring_bitmap_portable_deserialize(buf); + RDTSC_FINAL(cycles_final); + portable_cycle_count += cycles_final - cycles_start; + + RDTSC_START(cycles_start); + roaring_bitmap_t *r2 = roaring_bitmap_portable_deserialize_frozen(buf); + RDTSC_FINAL(cycles_final); + portable_frozen_cycle_count += cycles_final - cycles_start; + + RDTSC_START(cycles_start); + roaring_bitmap_t *r3 = roaring_bitmap_frozen_view(frozen_buf, frozen_size); + RDTSC_FINAL(cycles_final); + frozen_cycle_count += cycles_final - cycles_start; + + roaring_bitmap_free(r1); + roaring_bitmap_free(r2); + roaring_bitmap_free(r3); + free(buf); + roaring_aligned_free(frozen_buf); + } + + printf("Deserializing %zu bitmaps took %" PRIu64 " cycles for portable format\n", + count, portable_cycle_count); + printf("Deserializing %zu bitmaps took %" PRIu64 " cycles for portable frozen format\n", + count, portable_frozen_cycle_count); + printf("Deserializing %zu bitmaps took %" PRIu64 " cycles for frozen format\n", + count, frozen_cycle_count); + for (int i = 0; i < (int)count; ++i) { free(numbers[i]); numbers[i] = NULL; // paranoid diff --git a/include/roaring/containers/array.h b/include/roaring/containers/array.h index 3a3fe828a..071b0b25f 100644 --- a/include/roaring/containers/array.h +++ b/include/roaring/containers/array.h @@ -68,6 +68,7 @@ void array_container_free(array_container_t *array); array_container_t *array_container_clone(const array_container_t *src); /* Get the cardinality of `array'. */ +ALLOW_UNALIGNED static inline int array_container_cardinality(const array_container_t *array) { return array->cardinality; } @@ -214,6 +215,7 @@ static inline int32_t array_container_size_in_bytes( /** * Return true if the two arrays have the same content. */ +ALLOW_UNALIGNED static inline bool array_container_equals( const array_container_t *container1, const array_container_t *container2) { diff --git a/include/roaring/containers/bitset.h b/include/roaring/containers/bitset.h index b8c136499..15767a23f 100644 --- a/include/roaring/containers/bitset.h +++ b/include/roaring/containers/bitset.h @@ -232,6 +232,7 @@ static inline bool bitset_container_contains_range(const bitset_container_t *bit } /* Get the number of bits set */ +ALLOW_UNALIGNED static inline int bitset_container_cardinality( const bitset_container_t *bitset) { return bitset->cardinality; diff --git a/include/roaring/containers/run.h b/include/roaring/containers/run.h index 4b01d5987..673fde8a0 100644 --- a/include/roaring/containers/run.h +++ b/include/roaring/containers/run.h @@ -481,6 +481,7 @@ static inline int32_t run_container_size_in_bytes( /** * Return true if the two containers have the same content. */ +ALLOW_UNALIGNED static inline bool run_container_equals(const run_container_t *container1, const run_container_t *container2) { if (container1->n_runs != container2->n_runs) { diff --git a/include/roaring/portability.h b/include/roaring/portability.h index 239c056e9..558cc7582 100644 --- a/include/roaring/portability.h +++ b/include/roaring/portability.h @@ -338,6 +338,12 @@ static inline int hamming(uint64_t x) { #define CROARING_UNTARGET_REGION #endif +// Allow unaligned memory access +#if defined(__GNUC__) || defined(__clang__) +#define ALLOW_UNALIGNED __attribute__((no_sanitize("alignment"))) +#else +#define ALLOW_UNALIGNED +#endif // We need portability.h to be included first, // but we also always want isadetection.h to be diff --git a/include/roaring/roaring.h b/include/roaring/roaring.h index 4283d9a3b..090d63aac 100644 --- a/include/roaring/roaring.h +++ b/include/roaring/roaring.h @@ -519,6 +519,24 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf); roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes); +/** + * Read bitmap from a serialized buffer. + * In case of failure, NULL is returned. + * + * Bitmap returned by this function can be used in all readonly contexts. + * Bitmap must be freed as usual, by calling roaring_bitmap_free(). + * Underlying buffer must not be freed or modified while it backs any bitmaps. + * + * The function is unsafe in the following ways: + * 1) It may execute unaligned memory accesses. + * 2) A buffer overflow may occure if buf does not point to a valid serialized + * bitmap. + * + * This is meant to be compatible with the Java and Go versions: + * https://github.com/RoaringBitmap/RoaringFormatSpec + */ +roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf); + /** * Check how many bytes would be read (up to maxbytes) at this pointer if there * is a bitmap, returns zero if there is no valid bitmap. diff --git a/src/containers/array.c b/src/containers/array.c index b4adc2de9..312f7c6a1 100644 --- a/src/containers/array.c +++ b/src/containers/array.c @@ -376,6 +376,7 @@ void array_container_intersection_inplace(array_container_t *src_1, } } +ALLOW_UNALIGNED int array_container_to_uint32_array(void *vout, const array_container_t *cont, uint32_t base) { int outpos = 0; diff --git a/src/containers/bitset.c b/src/containers/bitset.c index 31421846a..25248e00e 100644 --- a/src/containers/bitset.c +++ b/src/containers/bitset.c @@ -686,6 +686,7 @@ BITSET_CONTAINER_FN(andnot, &~, _mm256_andnot_si256, vbicq_u64) // clang-format On +ALLOW_UNALIGNED int bitset_container_to_uint32_array( uint32_t *out, const bitset_container_t *bc, @@ -816,6 +817,7 @@ bool bitset_container_iterate64(const bitset_container_t *cont, uint32_t base, r #ifdef CROARING_IS_X64 CROARING_TARGET_AVX2 +ALLOW_UNALIGNED static inline bool _avx2_bitset_container_equals(const bitset_container_t *container1, const bitset_container_t *container2) { const __m256i *ptr1 = (const __m256i*)container1->words; const __m256i *ptr2 = (const __m256i*)container2->words; @@ -832,6 +834,7 @@ static inline bool _avx2_bitset_container_equals(const bitset_container_t *conta CROARING_UNTARGET_REGION #endif // CROARING_IS_X64 +ALLOW_UNALIGNED bool bitset_container_equals(const bitset_container_t *container1, const bitset_container_t *container2) { if((container1->cardinality != BITSET_UNKNOWN_CARDINALITY) && (container2->cardinality != BITSET_UNKNOWN_CARDINALITY)) { if(container1->cardinality != container2->cardinality) { diff --git a/src/containers/run.c b/src/containers/run.c index ee8a4bcf3..bde8a5371 100644 --- a/src/containers/run.c +++ b/src/containers/run.c @@ -625,6 +625,7 @@ void run_container_andnot(const run_container_t *src_1, } } +ALLOW_UNALIGNED int run_container_to_uint32_array(void *vout, const run_container_t *cont, uint32_t base) { int outpos = 0; @@ -830,6 +831,7 @@ int run_container_rank(const run_container_t *container, uint16_t x) { #ifdef CROARING_IS_X64 CROARING_TARGET_AVX2 +ALLOW_UNALIGNED /* Get the cardinality of `run'. Requires an actual computation. */ static inline int _avx2_run_container_cardinality(const run_container_t *run) { const int32_t n_runs = run->n_runs; diff --git a/src/roaring.c b/src/roaring.c index 7479b4720..64291a639 100644 --- a/src/roaring.c +++ b/src/roaring.c @@ -3194,6 +3194,161 @@ roaring_bitmap_frozen_view(const char *buf, size_t length) { return rb; } +ALLOW_UNALIGNED +roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf) { + char *start_of_buf = (char *) buf; + uint32_t cookie; + int32_t num_containers; + uint16_t *descriptive_headers; + uint32_t *offset_headers = NULL; + const char *run_flag_bitset = NULL; + bool hasrun = false; + + // deserialize cookie + memcpy(&cookie, buf, sizeof(uint32_t)); + buf += sizeof(uint32_t); + if (cookie == SERIAL_COOKIE_NO_RUNCONTAINER) { + memcpy(&num_containers, buf, sizeof(int32_t)); + buf += sizeof(int32_t); + descriptive_headers = (uint16_t *) buf; + buf += num_containers * 2 * sizeof(uint16_t); + offset_headers = (uint32_t *) buf; + buf += num_containers * sizeof(uint32_t); + } else if ((cookie & 0xFFFF) == SERIAL_COOKIE) { + num_containers = (cookie >> 16) + 1; + hasrun = true; + int32_t run_flag_bitset_size = (num_containers + 7) / 8; + run_flag_bitset = buf; + buf += run_flag_bitset_size; + descriptive_headers = (uint16_t *) buf; + buf += num_containers * 2 * sizeof(uint16_t); + if(num_containers >= NO_OFFSET_THRESHOLD) { + offset_headers = (uint32_t *) buf; + buf += num_containers * sizeof(uint32_t); + } + } else { + return NULL; + } + + // calculate total size for allocation + int32_t num_bitset_containers = 0; + int32_t num_run_containers = 0; + int32_t num_array_containers = 0; + + for (int32_t i = 0; i < num_containers; i++) { + uint16_t tmp; + memcpy(&tmp, descriptive_headers + 2*i+1, sizeof(tmp)); + uint32_t cardinality = tmp + 1; + bool isbitmap = (cardinality > DEFAULT_MAX_SIZE); + bool isrun = false; + if(hasrun) { + if((run_flag_bitset[i / 8] & (1 << (i % 8))) != 0) { + isbitmap = false; + isrun = true; + } + } + + if (isbitmap) { + num_bitset_containers++; + } else if (isrun) { + num_run_containers++; + } else { + num_array_containers++; + } + } + + size_t alloc_size = 0; + alloc_size += sizeof(roaring_bitmap_t); + alloc_size += num_containers * sizeof(container_t*); + alloc_size += num_bitset_containers * sizeof(bitset_container_t); + alloc_size += num_run_containers * sizeof(run_container_t); + alloc_size += num_array_containers * sizeof(array_container_t); + alloc_size += num_containers * sizeof(uint16_t); // keys + alloc_size += num_containers * sizeof(uint8_t); // typecodes + + // allocate bitmap and construct containers + char *arena = (char *)roaring_malloc(alloc_size); + if (arena == NULL) { + return NULL; + } + + roaring_bitmap_t *rb = (roaring_bitmap_t *) + arena_alloc(&arena, sizeof(roaring_bitmap_t)); + rb->high_low_container.flags = ROARING_FLAG_FROZEN; + rb->high_low_container.allocation_size = num_containers; + rb->high_low_container.size = num_containers; + rb->high_low_container.containers = + (container_t **)arena_alloc(&arena, + sizeof(container_t*) * num_containers); + + uint16_t *keys = arena_alloc(&arena, num_containers * sizeof(uint16_t)); + uint8_t *typecodes = arena_alloc(&arena, num_containers * sizeof(uint8_t)); + + rb->high_low_container.keys = keys; + rb->high_low_container.typecodes = typecodes; + + for (int32_t i = 0; i < num_containers; i++) { + uint16_t tmp; + memcpy(&tmp, descriptive_headers + 2*i+1, sizeof(tmp)); + int32_t cardinality = tmp + 1; + bool isbitmap = (cardinality > DEFAULT_MAX_SIZE); + bool isrun = false; + if(hasrun) { + if((run_flag_bitset[i / 8] & (1 << (i % 8))) != 0) { + isbitmap = false; + isrun = true; + } + } + + keys[i] = descriptive_headers[2*i]; + + if (isbitmap) { + typecodes[i] = BITSET_CONTAINER_TYPE; + bitset_container_t *c = arena_alloc(&arena, sizeof(bitset_container_t)); + c->cardinality = cardinality; + if(offset_headers != NULL) { + c->words = (uint64_t *) (start_of_buf + offset_headers[i]); + } else { + c->words = (uint64_t *) buf; + buf += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); + } + rb->high_low_container.containers[i] = c; + } else if (isrun) { + typecodes[i] = RUN_CONTAINER_TYPE; + run_container_t *c = arena_alloc(&arena, sizeof(run_container_t)); + c->capacity = cardinality; + uint16_t n_runs; + if(offset_headers != NULL) { + memcpy(&n_runs, start_of_buf + offset_headers[i], sizeof(uint16_t)); + c->n_runs = n_runs; + c->runs = (rle16_t *) (start_of_buf + offset_headers[i] + sizeof(uint16_t)); + } else { + memcpy(&n_runs, buf, sizeof(uint16_t)); + c->n_runs = n_runs; + buf += sizeof(uint16_t); + c->runs = (rle16_t *) buf; + buf += c->n_runs * sizeof(rle16_t); + } + rb->high_low_container.containers[i] = c; + } else { + typecodes[i] = ARRAY_CONTAINER_TYPE; + array_container_t *c = arena_alloc(&arena, sizeof(array_container_t)); + c->cardinality = cardinality; + c->capacity = cardinality; + if(offset_headers != NULL) { + c->array = (uint16_t *) (start_of_buf + offset_headers[i]); + } else { + c->array = (uint16_t *) buf; + buf += cardinality * sizeof(uint16_t); + } + rb->high_low_container.containers[i] = c; + } + } + + return rb; +} + + #ifdef __cplusplus } } } // extern "C" { namespace roaring { #endif diff --git a/tests/toplevel_unit.c b/tests/toplevel_unit.c index 896cf13b4..4f129870d 100644 --- a/tests/toplevel_unit.c +++ b/tests/toplevel_unit.c @@ -4245,6 +4245,99 @@ DEFINE_TEST(test_frozen_serialization_max_containers) { frozen_serialization_compare(r); } +DEFINE_TEST(test_portable_deserialize_frozen) { + roaring_bitmap_t *r1 = + roaring_bitmap_of(8, 1, 2, 3, 100, 1000, 10000, 1000000, 20000000); + assert_non_null(r1); + + uint32_t serialize_len; + roaring_bitmap_t *r2; + + for (int i = 0, top_val = 384000; i < top_val; i++) + roaring_bitmap_add(r1, 3 * i); + + uint32_t expectedsize = roaring_bitmap_portable_size_in_bytes(r1); + char *serialized = (char*)malloc(expectedsize); + serialize_len = roaring_bitmap_portable_serialize(r1, serialized); + assert_int_equal(serialize_len, expectedsize); + r2 = roaring_bitmap_portable_deserialize_frozen(serialized); + assert_non_null(r2); + + uint64_t card1 = roaring_bitmap_get_cardinality(r1); + uint32_t *arr1 = (uint32_t *)malloc(card1 * sizeof(uint32_t)); + roaring_bitmap_to_uint32_array(r1, arr1); + + uint64_t card2 = roaring_bitmap_get_cardinality(r2); + uint32_t *arr2 = (uint32_t *)malloc(card2 * sizeof(uint32_t)); + roaring_bitmap_to_uint32_array(r2, arr2); + + assert_true(array_equals(arr1, card1, arr2, card2)); + assert_true(roaring_bitmap_equals(r1, r2)); + free(arr1); + free(arr2); + free(serialized); + roaring_bitmap_free(r1); + roaring_bitmap_free(r2); + + r1 = roaring_bitmap_of(6, 2946000, 2997491, 10478289, 10490227, 10502444, + 19866827); + expectedsize = roaring_bitmap_portable_size_in_bytes(r1); + serialized = (char*)malloc(expectedsize); + serialize_len = roaring_bitmap_portable_serialize(r1, serialized); + assert_int_equal(serialize_len, expectedsize); + assert_int_equal(serialize_len, expectedsize); + + r2 = roaring_bitmap_portable_deserialize_frozen(serialized); + assert_non_null(r2); + + card1 = roaring_bitmap_get_cardinality(r1); + arr1 = (uint32_t *)malloc(card1 * sizeof(uint32_t)); + roaring_bitmap_to_uint32_array(r1, arr1); + + card2 = roaring_bitmap_get_cardinality(r2); + arr2 = (uint32_t *)malloc(card2 * sizeof(uint32_t)); + roaring_bitmap_to_uint32_array(r2, arr2); + + assert_true(array_equals(arr1, card1, arr2, card2)); + assert_true(roaring_bitmap_equals(r1, r2)); + free(arr1); + free(arr2); + roaring_bitmap_free(r1); + roaring_bitmap_free(r2); + free(serialized); + + r1 = roaring_bitmap_create(); + assert_non_null(r1); + + for (uint32_t k = 100; k < 100000; ++k) { + roaring_bitmap_add(r1, k); + } + + roaring_bitmap_run_optimize(r1); + expectedsize = roaring_bitmap_portable_size_in_bytes(r1); + serialized = (char*)malloc(expectedsize); + serialize_len = roaring_bitmap_portable_serialize(r1, serialized); + assert_int_equal(serialize_len, expectedsize); + + r2 = roaring_bitmap_portable_deserialize_frozen(serialized); + assert_non_null(r2); + + card1 = roaring_bitmap_get_cardinality(r1); + arr1 = (uint32_t *)malloc(card1 * sizeof(uint32_t)); + roaring_bitmap_to_uint32_array(r1, arr1); + + card2 = roaring_bitmap_get_cardinality(r2); + arr2 = (uint32_t *)malloc(card2 * sizeof(uint32_t)); + roaring_bitmap_to_uint32_array(r2, arr2); + + assert_true(array_equals(arr1, card1, arr2, card2)); + assert_true(roaring_bitmap_equals(r1, r2)); + free(arr1); + free(arr2); + roaring_bitmap_free(r1); + roaring_bitmap_free(r2); + free(serialized); +} int main() { tellmeall(); @@ -4378,6 +4471,7 @@ int main() { cmocka_unit_test(test_range_cardinality), cmocka_unit_test(test_frozen_serialization), cmocka_unit_test(test_frozen_serialization_max_containers), + cmocka_unit_test(test_portable_deserialize_frozen), }; return cmocka_run_group_tests(tests, NULL, NULL); From 198cac911ef7c141075c719029720df4cab6d463 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Wed, 25 Jan 2023 22:35:57 -0500 Subject: [PATCH 051/162] Casting. --- tests/cpp_unit.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/cpp_unit.cpp b/tests/cpp_unit.cpp index 1ed6c60b2..139249c4b 100644 --- a/tests/cpp_unit.cpp +++ b/tests/cpp_unit.cpp @@ -1870,7 +1870,7 @@ DEFINE_TEST(test_cpp_remove_run_compression) { bool test64Deserialize(const std::string& filename) { std::ifstream in(TEST_DATA_DIR + filename, std::ios::binary); std::vector buf1(std::istreambuf_iterator(in), {}); - printf("Reading %lu bytes\n", buf1.size()); + printf("Reading %lu bytes\n", (unsigned long)buf1.size()); Roaring64Map roaring; #if ROARING_EXCEPTIONS try { From e69217bbd73ff96fa930c1768243a26055da892f Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Wed, 25 Jan 2023 22:37:12 -0500 Subject: [PATCH 052/162] Preparing release --- CMakeLists.txt | 8 ++++---- include/roaring/roaring_version.h | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4ff34ab82..210825065 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,10 +17,10 @@ if(CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_C_COMPILER_VERSION VERSIO endif() set(ROARING_LIB_NAME roaring) set(PROJECT_VERSION_MAJOR 0) -set(PROJECT_VERSION_MINOR 8) -set(PROJECT_VERSION_PATCH 1) -set(ROARING_LIB_VERSION "0.8.1" CACHE STRING "Roaring library version") -set(ROARING_LIB_SOVERSION "6" CACHE STRING "Roaring library soversion") +set(PROJECT_VERSION_MINOR 9) +set(PROJECT_VERSION_PATCH 0) +set(ROARING_LIB_VERSION "0.9.0" CACHE STRING "Roaring library version") +set(ROARING_LIB_SOVERSION "7" CACHE STRING "Roaring library soversion") option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON) if(NOT ROARING_EXCEPTIONS) diff --git a/include/roaring/roaring_version.h b/include/roaring/roaring_version.h index df6a1c90f..b7ad25e23 100644 --- a/include/roaring/roaring_version.h +++ b/include/roaring/roaring_version.h @@ -1,10 +1,10 @@ // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand #ifndef ROARING_INCLUDE_ROARING_VERSION #define ROARING_INCLUDE_ROARING_VERSION -#define ROARING_VERSION "0.8.1" +#define ROARING_VERSION "0.9.0" enum { ROARING_VERSION_MAJOR = 0, - ROARING_VERSION_MINOR = 8, - ROARING_VERSION_REVISION = 1 + ROARING_VERSION_MINOR = 9, + ROARING_VERSION_REVISION = 0 }; #endif // ROARING_INCLUDE_ROARING_VERSION From 4d492a7f32e31fbfba1921458f88be8f98db0aa1 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Fri, 27 Jan 2023 19:13:26 -0500 Subject: [PATCH 053/162] Adding 390 runner (#422) * Adding 390 runner * Adding token * Fixing branch * Flushing out big-endian tests. --- .github/workflows/s390x.yml | 30 +++++++++++++++++++++++++ README.md | 5 +++++ benchmarks/real_bitmaps_benchmark.c | 2 +- include/roaring/misc/configreport.h | 8 +++++-- include/roaring/portability.h | 31 ++++++++++++++++++++++++++ include/roaring/roaring.h | 34 ++++++++++++++++++++++++++++- src/roaring_array.c | 4 +++- tests/c_example1.c | 9 +++++++- tests/cpp_example2.cpp | 5 ++++- tests/cpp_unit.cpp | 15 +++++++++++-- tests/format_portability_unit.c | 12 +++++++--- tests/realdata_unit.c | 5 +++++ tests/robust_deserialization_unit.c | 6 ++++- tests/toplevel_unit.c | 10 +++++++++ 14 files changed, 163 insertions(+), 13 deletions(-) create mode 100644 .github/workflows/s390x.yml diff --git a/.github/workflows/s390x.yml b/.github/workflows/s390x.yml new file mode 100644 index 000000000..d49858218 --- /dev/null +++ b/.github/workflows/s390x.yml @@ -0,0 +1,30 @@ +name: Ubuntu s390x (GCC 11) + +on: + push: + branches: + - master + pull_request: + branches: + - master + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: uraimo/run-on-arch-action@v2 + name: Test + id: runcmd + with: + arch: s390x + githubToken: ${{ github.token }} + distro: ubuntu_latest + install: | + apt-get update -q -y + apt-get install -y cmake make g++ + run: | + cmake -DCMAKE_BUILD_TYPE=Release -B build + cmake --build build -j=2 + ctest --output-on-failure --test-dir build + diff --git a/README.md b/README.md index 41aab9b00..30488508a 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,11 @@ of the latest hardware. Roaring bitmaps are already available on a variety of pl - CMake (to contribute to the project, users can rely on amalgamation/unity builds if they do not wish to use CMake). - Under x64 systems, the library provides runtime dispatch so that optimized functions are called based on the detected CPU features. It works with GCC, clang (version 9 and up) and Visual Studio (2017 and up). Other systems (e.g., ARM) do not need runtime dispatch. +Hardly anyone has access to an actual big-endian system. Nevertheless, +We support big-endian systems such as IBM s390x through emulators---except for +IO serialization which is only supported on little-endian systems (see [issue 423](https://github.com/RoaringBitmap/CRoaring/issues/423)). + + # Using as a CMake dependency If you like CMake, you can just a few lines in you `CMakeLists.txt` file to grab a `CRoaring` release. [See our demonstration for further details](https://github.com/RoaringBitmap/croaring_cmake_demo_single_file). diff --git a/benchmarks/real_bitmaps_benchmark.c b/benchmarks/real_bitmaps_benchmark.c index 887c9df74..7d8c4847e 100644 --- a/benchmarks/real_bitmaps_benchmark.c +++ b/benchmarks/real_bitmaps_benchmark.c @@ -196,7 +196,7 @@ int main(int argc, char **argv) { portable_frozen_cycle_count += cycles_final - cycles_start; RDTSC_START(cycles_start); - roaring_bitmap_t *r3 = roaring_bitmap_frozen_view(frozen_buf, frozen_size); + const roaring_bitmap_t *r3 = roaring_bitmap_frozen_view(frozen_buf, frozen_size); RDTSC_FINAL(cycles_final); frozen_cycle_count += cycles_final - cycles_start; diff --git a/include/roaring/misc/configreport.h b/include/roaring/misc/configreport.h index 05b214505..a6a64285e 100644 --- a/include/roaring/misc/configreport.h +++ b/include/roaring/misc/configreport.h @@ -11,7 +11,6 @@ #include #include - #ifdef __cplusplus extern "C" { namespace roaring { namespace misc { #endif @@ -118,8 +117,10 @@ static inline const char *guessprocessor() { } static inline void tellmeall() { +#if CROARING_IS_BIG_ENDIAN + printf("big-endian system detected\n")); +#endif printf("x64 processor: %s\t", guessprocessor()); - #ifdef __VERSION__ printf(" compiler version: %s\t", __VERSION__); #endif @@ -170,6 +171,9 @@ static inline void tellmeall() { #else static inline void tellmeall() { +#if CROARING_IS_BIG_ENDIAN + printf("big-endian system detected\n"); +#endif printf("Non-X64 processor\n"); #ifdef __arm__ printf("ARM processor detected\n"); diff --git a/include/roaring/portability.h b/include/roaring/portability.h index 558cc7582..15577c84a 100644 --- a/include/roaring/portability.h +++ b/include/roaring/portability.h @@ -345,6 +345,37 @@ static inline int hamming(uint64_t x) { #define ALLOW_UNALIGNED #endif +#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) + #define CROARING_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) + #elif defined(_WIN32) + #define CROARING_IS_BIG_ENDIAN 0 + #else + #if defined(__APPLE__) || defined(__FreeBSD__) // defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__ + #include + #elif defined(sun) || defined(__sun) // defined(__APPLE__) || defined(__FreeBSD__) + #include + #else // defined(__APPLE__) || defined(__FreeBSD__) + + #ifdef __has_include + #if __has_include() + #include + #endif //__has_include() + #endif //__has_include + + #endif // defined(__APPLE__) || defined(__FreeBSD__) + + + #ifndef !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__) + #define CROARING_IS_BIG_ENDIAN 0 + #endif + + #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + #define CROARING_IS_BIG_ENDIAN 0 + #else // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + #define CROARING_IS_BIG_ENDIAN 1 + #endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#endif + // We need portability.h to be included first, // but we also always want isadetection.h to be // included (right after). diff --git a/include/roaring/roaring.h b/include/roaring/roaring.h index 090d63aac..7b52da6d6 100644 --- a/include/roaring/roaring.h +++ b/include/roaring/roaring.h @@ -479,6 +479,10 @@ size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r); * more space efficient than the portable form, e.g. when the data is sparse. * * Returns how many bytes written, should be `roaring_bitmap_size_in_bytes(r)`. + * + * This function is endian-sensitive: big-endian systems will not be able toCROARING_ + * reading from big-endian systems, etc. We assume that you have a little-endian + * system. */ size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf); @@ -486,7 +490,11 @@ size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf); * Use with `roaring_bitmap_serialize()`. * * (See `roaring_bitmap_portable_deserialize()` if you want a format that's - * compatible with Java and Go implementations) + * compatible with Java and Go implementations). + * + * This function is endian-sensitive: big-endian systems will not be able toCROARING_ + * reading from big-endian systems, etc. We assume that you have a little-endian + * system. */ roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf); @@ -506,6 +514,10 @@ size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *r); * * This is meant to be compatible with the Java and Go versions: * https://github.com/RoaringBitmap/RoaringFormatSpec +* + * This function is endian-sensitive: big-endian systems will not be able toCROARING_ + * reading from big-endian systems, etc. We assume that you have a little-endian + * system. */ roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf); @@ -515,6 +527,10 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf); * * This is meant to be compatible with the Java and Go versions: * https://github.com/RoaringBitmap/RoaringFormatSpec + * + * This function is endian-sensitive: big-endian systems will not be able toCROARING_ + * reading from big-endian systems, etc. We assume that you have a little-endian + * system. */ roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes); @@ -534,6 +550,10 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, * * This is meant to be compatible with the Java and Go versions: * https://github.com/RoaringBitmap/RoaringFormatSpec + * + * This function is endian-sensitive: big-endian systems will not be able toCROARING_ + * reading from big-endian systems, etc. We assume that you have a little-endian + * system. */ roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf); @@ -564,6 +584,10 @@ size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r); * * This is meant to be compatible with the Java and Go versions: * https://github.com/RoaringBitmap/RoaringFormatSpec + * + * This function is endian-sensitive: big-endian systems will not be able toCROARING_ + * reading from big-endian systems, etc. We assume that you have a little-endian + * system. */ size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, char *buf); @@ -594,6 +618,10 @@ size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *r); /** * Serializes bitmap using frozen format. * Buffer size must be at least roaring_bitmap_frozen_size_in_bytes(). + * + * This function is endian-sensitive: big-endian systems will not be able toCROARING_ + * reading from big-endian systems, etc. We assume that you have a little-endian + * system. */ void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *r, char *buf); @@ -607,6 +635,10 @@ void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *r, char *buf); * Bitmap returned by this function can be used in all readonly contexts. * Bitmap must be freed as usual, by calling roaring_bitmap_free(). * Underlying buffer must not be freed or modified while it backs any bitmaps. + * + * This function is endian-sensitive: big-endian systems will not be able toCROARING_ + * reading from big-endian systems, etc. We assume that you have a little-endian + * system. */ const roaring_bitmap_t *roaring_bitmap_frozen_view(const char *buf, size_t length); diff --git a/src/roaring_array.c b/src/roaring_array.c index 5151e7062..bfcf6f7b2 100644 --- a/src/roaring_array.c +++ b/src/roaring_array.c @@ -542,6 +542,7 @@ size_t ra_portable_size_in_bytes(const roaring_array_t *ra) { return count; } +// This function is endian-sensitive: big-endian systems will not be able to reading from big-endian systems, etc. size_t ra_portable_serialize(const roaring_array_t *ra, char *buf) { char *initbuf = buf; uint32_t startOffset = 0; @@ -690,10 +691,11 @@ size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes) { return bytestotal; } - // this function populates answer from the content of buf (reading up to maxbytes bytes). // The function returns false if a properly serialized bitmap cannot be found. // if it returns true, readbytes is populated by how many bytes were read, we have that *readbytes <= maxbytes. +// +// This function is endian-sensitive: big-endian systems will not be able to reading from big-endian systems, etc. bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const size_t maxbytes, size_t * readbytes) { *readbytes = sizeof(int32_t);// for cookie if(*readbytes > maxbytes) { diff --git a/tests/c_example1.c b/tests/c_example1.c index a0d3480af..0765c1b22 100644 --- a/tests/c_example1.c +++ b/tests/c_example1.c @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -10,6 +11,7 @@ bool roaring_iterator_sumall(uint32_t value, void *param) { } int main() { + tellmeall(); // create a new empty bitmap roaring_bitmap_t *r1 = roaring_bitmap_create(); // then we can add values @@ -27,7 +29,11 @@ int main() { uint32_t expectedsizerun = roaring_bitmap_portable_size_in_bytes(r1); printf("size before run optimize %d bytes, and after %d bytes\n", expectedsizebasic, expectedsizerun); - +#if CROARING_IS_BIG_ENDIAN + printf("we omit serialization tests because you have a big endian system.\n"); + roaring_bitmap_free(r1); + return EXIT_SUCCESS; +#else // create a new bitmap containing the values {1,2,3,5,6} roaring_bitmap_t *r2 = roaring_bitmap_of(5, 1, 2, 3, 5, 6); roaring_bitmap_printf(r2); // print it @@ -137,4 +143,5 @@ int main() { roaring_bitmap_free(r2); roaring_bitmap_free(r3); return EXIT_SUCCESS; +#endif } \ No newline at end of file diff --git a/tests/cpp_example2.cpp b/tests/cpp_example2.cpp index bea031345..8e2b3cff6 100644 --- a/tests/cpp_example2.cpp +++ b/tests/cpp_example2.cpp @@ -66,6 +66,9 @@ int main() { // we can compute intersection two-by-two Roaring i1_2 = r1 & r2; +#if CROARING_IS_BIG_ENDIAN + printf("We omit serialization tests because you have a big endian system.\n"); +#else // we can write a bitmap to a pointer and recover it later uint32_t expectedsize = r1.getSizeInBytes(); char *serializedbytes = new char[expectedsize]; @@ -89,7 +92,7 @@ int main() { ++counter; } // counter == t.cardinality() - +#endif // we can move iterators to skip values const uint32_t manyvalues[] = {2, 3, 4, 7, 8}; Roaring rogue(5, manyvalues); diff --git a/tests/cpp_unit.cpp b/tests/cpp_unit.cpp index 139249c4b..b1b50ac58 100644 --- a/tests/cpp_unit.cpp +++ b/tests/cpp_unit.cpp @@ -673,9 +673,11 @@ DEFINE_TEST(test_example_cpp_true) { test_example_cpp(true); } DEFINE_TEST(test_example_cpp_false) { test_example_cpp(false); } +#if !CROARING_IS_BIG_ENDIAN DEFINE_TEST(test_example_cpp_64_true) { test_example_cpp_64(true); } DEFINE_TEST(test_example_cpp_64_false) { test_example_cpp_64(false); } +#endif DEFINE_TEST(test_run_compression_cpp_64_true) { test_run_compression_cpp_64(true); @@ -1868,6 +1870,10 @@ DEFINE_TEST(test_cpp_remove_run_compression) { // Returns true on success, false on exception. bool test64Deserialize(const std::string& filename) { +#if CROARING_IS_BIG_ENDIAN + (void)filename; + printf("Big-endian IO unsupported.\n"); +#else // CROARING_IS_BIG_ENDIAN std::ifstream in(TEST_DATA_DIR + filename, std::ios::binary); std::vector buf1(std::istreambuf_iterator(in), {}); printf("Reading %lu bytes\n", (unsigned long)buf1.size()); @@ -1878,15 +1884,16 @@ bool test64Deserialize(const std::string& filename) { } catch (...) { return false; } -#else +#else // ROARING_EXCEPTIONS roaring = Roaring64Map::readSafe(buf1.data(), buf1.size()); -#endif +#endif // ROARING_EXCEPTIONS std::vector buf2(roaring.getSizeInBytes()); assert_true(buf1.size() == buf2.size()); assert_true(roaring.write(buf2.data()) == buf2.size()); for (size_t i = 0; i < buf1.size(); ++i) { assert_true(buf1[i] == buf2[i]); } +#endif // CROARING_IS_BIG_ENDIAN return true; } @@ -1940,12 +1947,14 @@ int main() { cmocka_unit_test(test_bitmap_of_32), cmocka_unit_test(test_bitmap_of_64), cmocka_unit_test(serial_test), +#if !CROARING_IS_BIG_ENDIAN cmocka_unit_test(test_example_true), cmocka_unit_test(test_example_false), cmocka_unit_test(test_example_cpp_true), cmocka_unit_test(test_example_cpp_false), cmocka_unit_test(test_example_cpp_64_true), cmocka_unit_test(test_example_cpp_64_false), +#endif cmocka_unit_test(test_cpp_add_remove_checked), cmocka_unit_test(test_cpp_add_remove_checked_64), cmocka_unit_test(test_cpp_add_range), @@ -1980,6 +1989,7 @@ int main() { cmocka_unit_test(test_cpp_flip_64), cmocka_unit_test(test_cpp_flip_closed_64), cmocka_unit_test(test_combinatoric_flip_many_64), +#if !CROARING_IS_BIG_ENDIAN cmocka_unit_test(test_cpp_deserialize_64_empty), cmocka_unit_test(test_cpp_deserialize_64_32bit_vals), cmocka_unit_test(test_cpp_deserialize_64_spread_vals), @@ -1990,6 +2000,7 @@ int main() { cmocka_unit_test(test_cpp_deserialize_64_invalid_size), cmocka_unit_test(test_cpp_deserialize_64_key_too_small), #endif +#endif // !CROARING_IS_BIG_ENDIAN cmocka_unit_test(issue316), cmocka_unit_test(test_issue304), cmocka_unit_test(issue_336), diff --git a/tests/format_portability_unit.c b/tests/format_portability_unit.c index a08584688..a823a9b64 100644 --- a/tests/format_portability_unit.c +++ b/tests/format_portability_unit.c @@ -74,7 +74,9 @@ void test_deserialize(char* filename) { free(input_buffer); roaring_bitmap_free(bitmap); } - +#if CROARING_IS_BIG_ENDIAN +// port the test below. +#else DEFINE_TEST(test_deserialize_portable_norun) { char filename[1024]; @@ -92,14 +94,18 @@ DEFINE_TEST(test_deserialize_portable_wrun) { test_deserialize(filename); } +#endif int main() { tellmeall(); - +#if CROARING_IS_BIG_ENDIAN + printf("Big-endian IO unsupported.\n"); + return EXIT_SUCCESS; +#else const struct CMUnitTest tests[] = { cmocka_unit_test(test_deserialize_portable_norun), cmocka_unit_test(test_deserialize_portable_wrun), }; - return cmocka_run_group_tests(tests, NULL, NULL); +#endif } diff --git a/tests/realdata_unit.c b/tests/realdata_unit.c index 1d37a3d18..e6e1c7388 100644 --- a/tests/realdata_unit.c +++ b/tests/realdata_unit.c @@ -44,6 +44,10 @@ const char *datadir[] = { "weather_sept_85_srt", "wikileaks-noquotes", "wikileaks-noquotes_srt"}; bool serialize_correctly(roaring_bitmap_t *r) { +#if CROARING_IS_BIG_ENDIAN + (void)r; + return r; +#else uint32_t expectedsize = roaring_bitmap_portable_size_in_bytes(r); char *serialized = (char*)malloc(expectedsize); if (serialized == NULL) { @@ -70,6 +74,7 @@ bool serialize_correctly(roaring_bitmap_t *r) { } roaring_bitmap_free(r2); return true; +#endif } // arrays expected to both be sorted. diff --git a/tests/robust_deserialization_unit.c b/tests/robust_deserialization_unit.c index ee6750296..24467a99b 100644 --- a/tests/robust_deserialization_unit.c +++ b/tests/robust_deserialization_unit.c @@ -165,7 +165,10 @@ DEFINE_TEST(test_robust_deserialize7) { int main() { tellmeall(); - +#if CROARING_IS_BIG_ENDIAN + printf("Big-endian IO unsupported.\n"); + return EXIT_SUCCESS; +#else const struct CMUnitTest tests[] = { cmocka_unit_test(test_robust_deserialize1), cmocka_unit_test(test_robust_deserialize2), @@ -177,4 +180,5 @@ int main() { }; return cmocka_run_group_tests(tests, NULL, NULL); +#endif } diff --git a/tests/toplevel_unit.c b/tests/toplevel_unit.c index 4f129870d..1a1e8ab17 100644 --- a/tests/toplevel_unit.c +++ b/tests/toplevel_unit.c @@ -163,6 +163,7 @@ bool check_serialization(roaring_bitmap_t *bitmap) { return ret; } +#if !CROARING_IS_BIG_ENDIAN DEFINE_TEST(issue245) { roaring_bitmap_t *bitmap = roaring_bitmap_create(); const uint32_t targetEntries = 2048; @@ -191,6 +192,7 @@ DEFINE_TEST(issue245) { } roaring_bitmap_free(bitmap); } +#endif DEFINE_TEST(issue208) { roaring_bitmap_t *r = roaring_bitmap_create(); @@ -4346,7 +4348,9 @@ int main() { cmocka_unit_test(test_contains_range_PyRoaringBitMap_issue81), cmocka_unit_test(issue316), cmocka_unit_test(issue288), +#if !CROARING_IS_BIG_ENDIAN cmocka_unit_test(issue245), +#endif cmocka_unit_test(issue208), cmocka_unit_test(issue208b), cmocka_unit_test(range_contains), @@ -4364,8 +4368,10 @@ int main() { cmocka_unit_test(test_stress_memory_false), cmocka_unit_test(check_interval), cmocka_unit_test(test_uint32_iterator_true), +#if !CROARING_IS_BIG_ENDIAN cmocka_unit_test(test_example_true), cmocka_unit_test(test_example_false), +#endif cmocka_unit_test(test_clear), cmocka_unit_test(can_copy_empty_true), cmocka_unit_test(can_copy_empty_false), @@ -4395,8 +4401,10 @@ int main() { cmocka_unit_test(test_iterate_empty), cmocka_unit_test(test_iterate_withbitmap), cmocka_unit_test(test_iterate_withrun), +#if !CROARING_IS_BIG_ENDIAN cmocka_unit_test(test_serialize), cmocka_unit_test(test_portable_serialize), +#endif cmocka_unit_test(test_add), cmocka_unit_test(test_add_checked), cmocka_unit_test(test_remove_checked), @@ -4469,9 +4477,11 @@ int main() { cmocka_unit_test(test_remove_range), cmocka_unit_test(test_remove_many), cmocka_unit_test(test_range_cardinality), +#if !CROARING_IS_BIG_ENDIAN cmocka_unit_test(test_frozen_serialization), cmocka_unit_test(test_frozen_serialization_max_containers), cmocka_unit_test(test_portable_deserialize_frozen), +#endif }; return cmocka_run_group_tests(tests, NULL, NULL); From 407467cc1eee278950b1b507223a0651991666cf Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Sat, 28 Jan 2023 20:46:38 -0500 Subject: [PATCH 054/162] Dlemire/390 (#424) * Adding 390 runner * Adding token * Fixing branch * Flushing out big-endian tests. * Minor fixes. --- src/containers/run.c | 7 +++++-- tests/c_example1.c | 9 ++------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/containers/run.c b/src/containers/run.c index bde8a5371..a7e4ab0b7 100644 --- a/src/containers/run.c +++ b/src/containers/run.c @@ -675,7 +675,8 @@ void run_container_printf_as_uint32_array(const run_container_t *cont, } int32_t run_container_write(const run_container_t *container, char *buf) { - memcpy(buf, &container->n_runs, sizeof(uint16_t)); + uint16_t cast_16 = container->n_runs; + memcpy(buf, &cast_16, sizeof(uint16_t)); memcpy(buf + sizeof(uint16_t), container->runs, container->n_runs * sizeof(rle16_t)); return run_container_size_in_bytes(container); @@ -684,7 +685,9 @@ int32_t run_container_write(const run_container_t *container, char *buf) { int32_t run_container_read(int32_t cardinality, run_container_t *container, const char *buf) { (void)cardinality; - memcpy(&container->n_runs, buf, sizeof(uint16_t)); + uint16_t cast_16; + memcpy(&cast_16, buf, sizeof(uint16_t)); + container->n_runs = cast_16; if (container->n_runs > container->capacity) run_container_grow(container, container->n_runs, false); if(container->n_runs > 0) { diff --git a/tests/c_example1.c b/tests/c_example1.c index 0765c1b22..7e92065f2 100644 --- a/tests/c_example1.c +++ b/tests/c_example1.c @@ -29,11 +29,6 @@ int main() { uint32_t expectedsizerun = roaring_bitmap_portable_size_in_bytes(r1); printf("size before run optimize %d bytes, and after %d bytes\n", expectedsizebasic, expectedsizerun); -#if CROARING_IS_BIG_ENDIAN - printf("we omit serialization tests because you have a big endian system.\n"); - roaring_bitmap_free(r1); - return EXIT_SUCCESS; -#else // create a new bitmap containing the values {1,2,3,5,6} roaring_bitmap_t *r2 = roaring_bitmap_of(5, 1, 2, 3, 5, 6); roaring_bitmap_printf(r2); // print it @@ -98,7 +93,7 @@ int main() { // reading it size_t sizeofbitmap = roaring_bitmap_portable_deserialize_size(serializedbytes, expectedsize); - printf("sizeofbitmap = %zu \n", sizeofbitmap); + printf("\nsizeofbitmap = %zu \n", sizeofbitmap); assert_true(sizeofbitmap == expectedsize); // sizeofbitmap would be zero if no bitmap were found // we can also read the bitmap "safely" by specifying a byte size limit: @@ -142,6 +137,6 @@ int main() { roaring_bitmap_free(r1); roaring_bitmap_free(r2); roaring_bitmap_free(r3); + printf("Success.\n"); return EXIT_SUCCESS; -#endif } \ No newline at end of file From 51519e5f4115b90f46d13754e6ebc66c9f7a3b69 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 30 Jan 2023 08:48:39 -0500 Subject: [PATCH 055/162] Correcting bad comments... --- include/roaring/roaring.h | 40 ++++++++++++++++----------------------- src/roaring_array.c | 4 ++-- 2 files changed, 18 insertions(+), 26 deletions(-) diff --git a/include/roaring/roaring.h b/include/roaring/roaring.h index 7b52da6d6..e4a732136 100644 --- a/include/roaring/roaring.h +++ b/include/roaring/roaring.h @@ -480,9 +480,8 @@ size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r); * * Returns how many bytes written, should be `roaring_bitmap_size_in_bytes(r)`. * - * This function is endian-sensitive: big-endian systems will not be able toCROARING_ - * reading from big-endian systems, etc. We assume that you have a little-endian - * system. + * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), + * the data format is going to be big-endian and not compatible with little-endian systems. */ size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf); @@ -492,9 +491,8 @@ size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf); * (See `roaring_bitmap_portable_deserialize()` if you want a format that's * compatible with Java and Go implementations). * - * This function is endian-sensitive: big-endian systems will not be able toCROARING_ - * reading from big-endian systems, etc. We assume that you have a little-endian - * system. + * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), + * the data format is going to be big-endian and not compatible with little-endian systems. */ roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf); @@ -515,9 +513,8 @@ size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *r); * This is meant to be compatible with the Java and Go versions: * https://github.com/RoaringBitmap/RoaringFormatSpec * - * This function is endian-sensitive: big-endian systems will not be able toCROARING_ - * reading from big-endian systems, etc. We assume that you have a little-endian - * system. + * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), + * the data format is going to be big-endian and not compatible with little-endian systems. */ roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf); @@ -528,9 +525,8 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf); * This is meant to be compatible with the Java and Go versions: * https://github.com/RoaringBitmap/RoaringFormatSpec * - * This function is endian-sensitive: big-endian systems will not be able toCROARING_ - * reading from big-endian systems, etc. We assume that you have a little-endian - * system. + * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), + * the data format is going to be big-endian and not compatible with little-endian systems. */ roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes); @@ -551,9 +547,8 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, * This is meant to be compatible with the Java and Go versions: * https://github.com/RoaringBitmap/RoaringFormatSpec * - * This function is endian-sensitive: big-endian systems will not be able toCROARING_ - * reading from big-endian systems, etc. We assume that you have a little-endian - * system. + * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), + * the data format is going to be big-endian and not compatible with little-endian systems. */ roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf); @@ -585,9 +580,8 @@ size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r); * This is meant to be compatible with the Java and Go versions: * https://github.com/RoaringBitmap/RoaringFormatSpec * - * This function is endian-sensitive: big-endian systems will not be able toCROARING_ - * reading from big-endian systems, etc. We assume that you have a little-endian - * system. + * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), + * the data format is going to be big-endian and not compatible with little-endian systems. */ size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, char *buf); @@ -619,9 +613,8 @@ size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *r); * Serializes bitmap using frozen format. * Buffer size must be at least roaring_bitmap_frozen_size_in_bytes(). * - * This function is endian-sensitive: big-endian systems will not be able toCROARING_ - * reading from big-endian systems, etc. We assume that you have a little-endian - * system. + * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), + * the data format is going to be big-endian and not compatible with little-endian systems. */ void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *r, char *buf); @@ -636,9 +629,8 @@ void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *r, char *buf); * Bitmap must be freed as usual, by calling roaring_bitmap_free(). * Underlying buffer must not be freed or modified while it backs any bitmaps. * - * This function is endian-sensitive: big-endian systems will not be able toCROARING_ - * reading from big-endian systems, etc. We assume that you have a little-endian - * system. + * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), + * the data format is going to be big-endian and not compatible with little-endian systems. */ const roaring_bitmap_t *roaring_bitmap_frozen_view(const char *buf, size_t length); diff --git a/src/roaring_array.c b/src/roaring_array.c index bfcf6f7b2..bff88f39b 100644 --- a/src/roaring_array.c +++ b/src/roaring_array.c @@ -542,7 +542,7 @@ size_t ra_portable_size_in_bytes(const roaring_array_t *ra) { return count; } -// This function is endian-sensitive: big-endian systems will not be able to reading from big-endian systems, etc. +// This function is endian-sensitive. size_t ra_portable_serialize(const roaring_array_t *ra, char *buf) { char *initbuf = buf; uint32_t startOffset = 0; @@ -695,7 +695,7 @@ size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes) { // The function returns false if a properly serialized bitmap cannot be found. // if it returns true, readbytes is populated by how many bytes were read, we have that *readbytes <= maxbytes. // -// This function is endian-sensitive: big-endian systems will not be able to reading from big-endian systems, etc. +// This function is endian-sensitive. bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const size_t maxbytes, size_t * readbytes) { *readbytes = sizeof(int32_t);// for cookie if(*readbytes > maxbytes) { From 9a9c3e979b19c3fbd7b8ab89589be6636c0fc5c6 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 30 Jan 2023 13:23:40 -0500 Subject: [PATCH 056/162] add doxygen documentation for the C++ code... automatically... (#425) * Preparing an automated doxygen run. * Minor update. --- .github/workflows/documentation.yml | 34 + doxygen | 2737 +++++++++++++++++++++++++++ tools/prepare_doxygen.sh | 22 + tools/release.py | 5 + 4 files changed, 2798 insertions(+) create mode 100644 .github/workflows/documentation.yml create mode 100644 doxygen create mode 100755 tools/prepare_doxygen.sh diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml new file mode 100644 index 000000000..6d59fa4c8 --- /dev/null +++ b/.github/workflows/documentation.yml @@ -0,0 +1,34 @@ +name: Doxygen GitHub Pages + +on: + push: + branches: + - main + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +permissions: + contents: write + pages: write + id-token: write + +jobs: + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Install Doxygen + run: sudo apt-get install doxygen graphviz -y + - run: mkdir docs + - name: Install theme + run: ./tools/prepare-doxygen.sh + - name: Generate Doxygen Documentation + run: doxygen ./doxygen + - name: Deploy to GitHub Pages + uses: peaceiris/actions-gh-pages@v3 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: docs/html \ No newline at end of file diff --git a/doxygen b/doxygen new file mode 100644 index 000000000..768017761 --- /dev/null +++ b/doxygen @@ -0,0 +1,2737 @@ +# Doxyfile 1.9.6 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project. +# +# All text after a double hash (##) is considered a comment and is placed in +# front of the TAG it is preceding. +# +# All text after a single hash (#) is considered a comment and will be ignored. +# The format is: +# TAG = value [value, ...] +# For lists, items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (\" \"). +# +# Note: +# +# Use doxygen to compare the used configuration file with the template +# configuration file: +# doxygen -x [configFile] +# Use doxygen to compare the used configuration file with the template +# configuration file without replacing the environment variables or CMake type +# replacement variables: +# doxygen -x_noenv [configFile] + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# This tag specifies the encoding used for all characters in the configuration +# file that follow. The default is UTF-8 which is also the encoding used for all +# text before the first occurrence of this tag. Doxygen uses libiconv (or the +# iconv built into libc) for the transcoding. See +# https://www.gnu.org/software/libiconv/ for the list of possible encodings. +# The default value is: UTF-8. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by +# double-quotes, unless you are using Doxywizard) that should identify the +# project for which the documentation is generated. This name is used in the +# title of most generated pages and in a few other places. +# The default value is: My Project. + +PROJECT_NAME = "CRoaring" + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. This +# could be handy for archiving the generated documentation or if some version +# control system is used. + +PROJECT_NUMBER = "0.9.0" + +# Using the PROJECT_BRIEF tag one can provide an optional one line description +# for a project that appears at the top of each page and should give viewer a +# quick idea about the purpose of the project. Keep the description short. + +PROJECT_BRIEF = "Roaring bitmaps in C (and C++)" + +# With the PROJECT_LOGO tag one can specify a logo or an icon that is included +# in the documentation. The maximum height of the logo should not exceed 55 +# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy +# the logo to the output directory. + +PROJECT_LOGO = + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path +# into which the generated documentation will be written. If a relative path is +# entered, it will be relative to the location where doxygen was started. If +# left blank the current directory will be used. + +OUTPUT_DIRECTORY = "docs" + +# If the CREATE_SUBDIRS tag is set to YES then doxygen will create up to 4096 +# sub-directories (in 2 levels) under the output directory of each output format +# and will distribute the generated files over these directories. Enabling this +# option can be useful when feeding doxygen a huge amount of source files, where +# putting all generated files in the same directory would otherwise causes +# performance problems for the file system. Adapt CREATE_SUBDIRS_LEVEL to +# control the number of sub-directories. +# The default value is: NO. + +CREATE_SUBDIRS = YES + +# Controls the number of sub-directories that will be created when +# CREATE_SUBDIRS tag is set to YES. Level 0 represents 16 directories, and every +# level increment doubles the number of directories, resulting in 4096 +# directories at level 8 which is the default and also the maximum value. The +# sub-directories are organized in 2 levels, the first level always has a fixed +# number of 16 directories. +# Minimum value: 0, maximum value: 8, default value: 8. +# This tag requires that the tag CREATE_SUBDIRS is set to YES. + +CREATE_SUBDIRS_LEVEL = 8 + +# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII +# characters to appear in the names of generated files. If set to NO, non-ASCII +# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode +# U+3044. +# The default value is: NO. + +ALLOW_UNICODE_NAMES = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Bulgarian, +# Catalan, Chinese, Chinese-Traditional, Croatian, Czech, Danish, Dutch, English +# (United States), Esperanto, Farsi (Persian), Finnish, French, German, Greek, +# Hindi, Hungarian, Indonesian, Italian, Japanese, Japanese-en (Japanese with +# English messages), Korean, Korean-en (Korean with English messages), Latvian, +# Lithuanian, Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, +# Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, +# Swedish, Turkish, Ukrainian and Vietnamese. +# The default value is: English. + +OUTPUT_LANGUAGE = English + +# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member +# descriptions after the members that are listed in the file and class +# documentation (similar to Javadoc). Set to NO to disable this. +# The default value is: YES. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief +# description of a member or function before the detailed description +# +# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. +# The default value is: YES. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator that is +# used to form the text in various listings. Each string in this list, if found +# as the leading text of the brief description, will be stripped from the text +# and the result, after processing the whole list, is used as the annotated +# text. Otherwise, the brief description is used as-is. If left blank, the +# following values are used ($name is automatically replaced with the name of +# the entity):The $name class, The $name widget, The $name file, is, provides, +# specifies, contains, represents, a, an and the. + +ABBREVIATE_BRIEF = "The $name class" \ + "The $name widget" \ + "The $name file" \ + is \ + provides \ + specifies \ + contains \ + represents \ + a \ + an \ + the + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# doxygen will generate a detailed section even if there is only a brief +# description. +# The default value is: NO. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. +# The default value is: NO. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path +# before files name in the file list and in the header files. If set to NO the +# shortest path that makes the file name unique will be used +# The default value is: YES. + +FULL_PATH_NAMES = YES + +# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. +# Stripping is only done if one of the specified strings matches the left-hand +# part of the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the path to +# strip. +# +# Note that you can specify absolute paths here, but also relative paths, which +# will be relative from the directory where doxygen is started. +# This tag requires that the tag FULL_PATH_NAMES is set to YES. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the +# path mentioned in the documentation of a class, which tells the reader which +# header file to include in order to use a class. If left blank only the name of +# the header file containing the class definition is used. Otherwise one should +# specify the list of include paths that are normally passed to the compiler +# using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but +# less readable) file names. This can be useful is your file systems doesn't +# support long names like on DOS, Mac, or CD-ROM. +# The default value is: NO. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the +# first line (until the first dot) of a Javadoc-style comment as the brief +# description. If set to NO, the Javadoc-style will behave just like regular Qt- +# style comments (thus requiring an explicit @brief command for a brief +# description.) +# The default value is: NO. + +JAVADOC_AUTOBRIEF = NO + +# If the JAVADOC_BANNER tag is set to YES then doxygen will interpret a line +# such as +# /*************** +# as being the beginning of a Javadoc-style comment "banner". If set to NO, the +# Javadoc-style will behave just like regular comments and it will not be +# interpreted by doxygen. +# The default value is: NO. + +JAVADOC_BANNER = YES + +# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first +# line (until the first dot) of a Qt-style comment as the brief description. If +# set to NO, the Qt-style will behave just like regular Qt-style comments (thus +# requiring an explicit \brief command for a brief description.) +# The default value is: NO. + +QT_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a +# multi-line C++ special comment block (i.e. a block of //! or /// comments) as +# a brief description. This used to be the default behavior. The new default is +# to treat a multi-line C++ comment block as a detailed description. Set this +# tag to YES if you prefer the old behavior instead. +# +# Note that setting this tag to YES also means that rational rose comments are +# not recognized any more. +# The default value is: NO. + +MULTILINE_CPP_IS_BRIEF = NO + +# By default Python docstrings are displayed as preformatted text and doxygen's +# special commands cannot be used. By setting PYTHON_DOCSTRING to NO the +# doxygen's special commands can be used and the contents of the docstring +# documentation blocks is shown as doxygen documentation. +# The default value is: YES. + +PYTHON_DOCSTRING = YES + +# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the +# documentation from any documented member that it re-implements. +# The default value is: YES. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new +# page for each member. If set to NO, the documentation of a member will be part +# of the file/class/namespace that contains it. +# The default value is: NO. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen +# uses this value to replace tabs by spaces in code fragments. +# Minimum value: 1, maximum value: 16, default value: 4. + +TAB_SIZE = 2 + +# This tag can be used to specify a number of aliases that act as commands in +# the documentation. An alias has the form: +# name=value +# For example adding +# "sideeffect=@par Side Effects:^^" +# will allow you to put the command \sideeffect (or @sideeffect) in the +# documentation, which will result in a user-defined paragraph with heading +# "Side Effects:". Note that you cannot put \n's in the value part of an alias +# to insert newlines (in the resulting output). You can put ^^ in the value part +# of an alias to insert a newline as if a physical newline was in the original +# file. When you need a literal { or } or , in the value part of an alias you +# have to escape them by means of a backslash (\), this can lead to conflicts +# with the commands \{ and \} for these it is advised to use the version @{ and +# @} or use a double escape (\\{ and \\}) + +ALIASES = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources +# only. Doxygen will then generate output that is more tailored for C. For +# instance, some of the names that are used will be different. The list of all +# members will be omitted, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_FOR_C = NO + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or +# Python sources only. Doxygen will then generate output that is more tailored +# for that language. For instance, namespaces will be presented as packages, +# qualified scopes will look different, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran +# sources. Doxygen will then generate output that is tailored for Fortran. +# The default value is: NO. + +OPTIMIZE_FOR_FORTRAN = NO + +# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL +# sources. Doxygen will then generate output that is tailored for VHDL. +# The default value is: NO. + +OPTIMIZE_OUTPUT_VHDL = NO + +# Set the OPTIMIZE_OUTPUT_SLICE tag to YES if your project consists of Slice +# sources only. Doxygen will then generate output that is more tailored for that +# language. For instance, namespaces will be presented as modules, types will be +# separated into more groups, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_SLICE = NO + +# Doxygen selects the parser to use depending on the extension of the files it +# parses. With this tag you can assign which parser to use for a given +# extension. Doxygen has a built-in mapping, but you can override or extend it +# using this tag. The format is ext=language, where ext is a file extension, and +# language is one of the parsers supported by doxygen: IDL, Java, JavaScript, +# Csharp (C#), C, C++, Lex, D, PHP, md (Markdown), Objective-C, Python, Slice, +# VHDL, Fortran (fixed format Fortran: FortranFixed, free formatted Fortran: +# FortranFree, unknown formatted Fortran: Fortran. In the later case the parser +# tries to guess whether the code is fixed or free formatted code, this is the +# default for Fortran type files). For instance to make doxygen treat .inc files +# as Fortran files (default is PHP), and .f files as C (default is Fortran), +# use: inc=Fortran f=C. +# +# Note: For files without extension you can use no_extension as a placeholder. +# +# Note that for custom extensions you also need to set FILE_PATTERNS otherwise +# the files are not read by doxygen. When specifying no_extension you should add +# * to the FILE_PATTERNS. +# +# Note see also the list of default file extension mappings. + +EXTENSION_MAPPING = + +# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments +# according to the Markdown format, which allows for more readable +# documentation. See https://daringfireball.net/projects/markdown/ for details. +# The output of markdown processing is further processed by doxygen, so you can +# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in +# case of backward compatibilities issues. +# The default value is: YES. + +MARKDOWN_SUPPORT = YES + +# When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up +# to that level are automatically included in the table of contents, even if +# they do not have an id attribute. +# Note: This feature currently applies only to Markdown headings. +# Minimum value: 0, maximum value: 99, default value: 5. +# This tag requires that the tag MARKDOWN_SUPPORT is set to YES. + +TOC_INCLUDE_HEADINGS = 5 + +# When enabled doxygen tries to link words that correspond to documented +# classes, or namespaces to their corresponding documentation. Such a link can +# be prevented in individual cases by putting a % sign in front of the word or +# globally by setting AUTOLINK_SUPPORT to NO. +# The default value is: YES. + +AUTOLINK_SUPPORT = YES + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want +# to include (a tag file for) the STL sources as input, then you should set this +# tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); +# versus func(std::string) {}). This also make the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. +# The default value is: NO. + +BUILTIN_STL_SUPPORT = NO + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. +# The default value is: NO. + +CPP_CLI_SUPPORT = NO + +# Set the SIP_SUPPORT tag to YES if your project consists of sip (see: +# https://www.riverbankcomputing.com/software/sip/intro) sources only. Doxygen +# will parse them like normal C++ but will assume all classes use public instead +# of private inheritance when no explicit protection keyword is present. +# The default value is: NO. + +SIP_SUPPORT = NO + +# For Microsoft's IDL there are propget and propput attributes to indicate +# getter and setter methods for a property. Setting this option to YES will make +# doxygen to replace the get and set methods by a property in the documentation. +# This will only work if the methods are indeed getting or setting a simple +# type. If this is not the case, or you want to show the methods anyway, you +# should set this option to NO. +# The default value is: YES. + +IDL_PROPERTY_SUPPORT = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. +# The default value is: NO. + +DISTRIBUTE_GROUP_DOC = YES + +# If one adds a struct or class to a group and this option is enabled, then also +# any nested class or struct is added to the same group. By default this option +# is disabled and one has to add nested compounds explicitly via \ingroup. +# The default value is: NO. + +GROUP_NESTED_COMPOUNDS = NO + +# Set the SUBGROUPING tag to YES to allow class member groups of the same type +# (for instance a group of public functions) to be put as a subgroup of that +# type (e.g. under the Public Functions section). Set it to NO to prevent +# subgrouping. Alternatively, this can be done per class using the +# \nosubgrouping command. +# The default value is: YES. + +SUBGROUPING = YES + +# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions +# are shown inside the group in which they are included (e.g. using \ingroup) +# instead of on a separate page (for HTML and Man pages) or section (for LaTeX +# and RTF). +# +# Note that this feature does not work in combination with +# SEPARATE_MEMBER_PAGES. +# The default value is: NO. + +INLINE_GROUPED_CLASSES = NO + +# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions +# with only public data fields or simple typedef fields will be shown inline in +# the documentation of the scope in which they are defined (i.e. file, +# namespace, or group documentation), provided this scope is documented. If set +# to NO, structs, classes, and unions are shown on a separate page (for HTML and +# Man pages) or section (for LaTeX and RTF). +# The default value is: NO. + +INLINE_SIMPLE_STRUCTS = NO + +# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or +# enum is documented as struct, union, or enum with the name of the typedef. So +# typedef struct TypeS {} TypeT, will appear in the documentation as a struct +# with name TypeT. When disabled the typedef will appear as a member of a file, +# namespace, or class. And the struct will be named TypeS. This can typically be +# useful for C code in case the coding convention dictates that all compound +# types are typedef'ed and only the typedef is referenced, never the tag name. +# The default value is: NO. + +TYPEDEF_HIDES_STRUCT = NO + +# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This +# cache is used to resolve symbols given their name and scope. Since this can be +# an expensive process and often the same symbol appears multiple times in the +# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small +# doxygen will become slower. If the cache is too large, memory is wasted. The +# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range +# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 +# symbols. At the end of a run doxygen will report the cache usage and suggest +# the optimal cache size from a speed point of view. +# Minimum value: 0, maximum value: 9, default value: 0. + +LOOKUP_CACHE_SIZE = 0 + +# The NUM_PROC_THREADS specifies the number of threads doxygen is allowed to use +# during processing. When set to 0 doxygen will based this on the number of +# cores available in the system. You can set it explicitly to a value larger +# than 0 to get more control over the balance between CPU load and processing +# speed. At this moment only the input processing can be done using multiple +# threads. Since this is still an experimental feature the default is set to 1, +# which effectively disables parallel processing. Please report any issues you +# encounter. Generating dot graphs in parallel is controlled by the +# DOT_NUM_THREADS setting. +# Minimum value: 0, maximum value: 32, default value: 1. + +NUM_PROC_THREADS = 1 + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in +# documentation are documented, even if no documentation was available. Private +# class members and static file members will be hidden unless the +# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. +# Note: This will also disable the warnings about undocumented members that are +# normally produced when WARNINGS is set to YES. +# The default value is: NO. + +EXTRACT_ALL = YES + +# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will +# be included in the documentation. +# The default value is: NO. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_PRIV_VIRTUAL tag is set to YES, documented private virtual +# methods of a class will be included in the documentation. +# The default value is: NO. + +EXTRACT_PRIV_VIRTUAL = NO + +# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal +# scope will be included in the documentation. +# The default value is: NO. + +EXTRACT_PACKAGE = YES + +# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be +# included in the documentation. +# The default value is: NO. + +EXTRACT_STATIC = YES + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined +# locally in source files will be included in the documentation. If set to NO, +# only classes defined in header files are included. Does not have any effect +# for Java sources. +# The default value is: YES. + +EXTRACT_LOCAL_CLASSES = YES + +# This flag is only useful for Objective-C code. If set to YES, local methods, +# which are defined in the implementation section but not in the interface are +# included in the documentation. If set to NO, only methods in the interface are +# included. +# The default value is: NO. + +EXTRACT_LOCAL_METHODS = YES + +# If this flag is set to YES, the members of anonymous namespaces will be +# extracted and appear in the documentation as a namespace called +# 'anonymous_namespace{file}', where file will be replaced with the base name of +# the file that contains the anonymous namespace. By default anonymous namespace +# are hidden. +# The default value is: NO. + +EXTRACT_ANON_NSPACES = YES + +# If this flag is set to YES, the name of an unnamed parameter in a declaration +# will be determined by the corresponding definition. By default unnamed +# parameters remain unnamed in the output. +# The default value is: YES. + +RESOLVE_UNNAMED_PARAMS = YES + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all +# undocumented members inside documented classes or files. If set to NO these +# members will be included in the various overviews, but no documentation +# section is generated. This option has no effect if EXTRACT_ALL is enabled. +# The default value is: NO. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. If set +# to NO, these classes will be included in the various overviews. This option +# will also hide undocumented C++ concepts if enabled. This option has no effect +# if EXTRACT_ALL is enabled. +# The default value is: NO. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend +# declarations. If set to NO, these declarations will be included in the +# documentation. +# The default value is: NO. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any +# documentation blocks found inside the body of a function. If set to NO, these +# blocks will be appended to the function's detailed documentation block. +# The default value is: NO. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation that is typed after a +# \internal command is included. If the tag is set to NO then the documentation +# will be excluded. Set it to YES to include the internal documentation. +# The default value is: NO. + +INTERNAL_DOCS = NO + +# With the correct setting of option CASE_SENSE_NAMES doxygen will better be +# able to match the capabilities of the underlying filesystem. In case the +# filesystem is case sensitive (i.e. it supports files in the same directory +# whose names only differ in casing), the option must be set to YES to properly +# deal with such files in case they appear in the input. For filesystems that +# are not case sensitive the option should be set to NO to properly deal with +# output files written for symbols that only differ in casing, such as for two +# classes, one named CLASS and the other named Class, and to also support +# references to files without having to specify the exact matching casing. On +# Windows (including Cygwin) and MacOS, users should typically set this option +# to NO, whereas on Linux or other Unix flavors it should typically be set to +# YES. +# Possible values are: SYSTEM, NO and YES. +# The default value is: SYSTEM. + +CASE_SENSE_NAMES = SYSTEM + +# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with +# their full class and namespace scopes in the documentation. If set to YES, the +# scope will be hidden. +# The default value is: NO. + +HIDE_SCOPE_NAMES = NO + +# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will +# append additional text to a page's title, such as Class Reference. If set to +# YES the compound reference will be hidden. +# The default value is: NO. + +HIDE_COMPOUND_REFERENCE= NO + +# If the SHOW_HEADERFILE tag is set to YES then the documentation for a class +# will show which file needs to be included to use the class. +# The default value is: YES. + +SHOW_HEADERFILE = YES + +# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of +# the files that are included by a file in the documentation of that file. +# The default value is: YES. + +SHOW_INCLUDE_FILES = YES + +# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each +# grouped member an include statement to the documentation, telling the reader +# which file to include in order to use the member. +# The default value is: NO. + +SHOW_GROUPED_MEMB_INC = NO + +# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include +# files with double quotes in the documentation rather than with sharp brackets. +# The default value is: NO. + +FORCE_LOCAL_INCLUDES = NO + +# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the +# documentation for inline members. +# The default value is: YES. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the +# (detailed) documentation of file and class members alphabetically by member +# name. If set to NO, the members will appear in declaration order. +# The default value is: YES. + +SORT_MEMBER_DOCS = YES + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief +# descriptions of file, namespace and class members alphabetically by member +# name. If set to NO, the members will appear in declaration order. Note that +# this will also influence the order of the classes in the class list. +# The default value is: NO. + +SORT_BRIEF_DOCS = NO + +# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the +# (brief and detailed) documentation of class members so that constructors and +# destructors are listed first. If set to NO the constructors will appear in the +# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. +# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief +# member documentation. +# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting +# detailed member documentation. +# The default value is: NO. + +SORT_MEMBERS_CTORS_1ST = NO + +# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy +# of group names into alphabetical order. If set to NO the group names will +# appear in their defined order. +# The default value is: NO. + +SORT_GROUP_NAMES = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by +# fully-qualified names, including namespaces. If set to NO, the class list will +# be sorted only by class name, not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the alphabetical +# list. +# The default value is: NO. + +SORT_BY_SCOPE_NAME = NO + +# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper +# type resolution of all parameters of a function it will reject a match between +# the prototype and the implementation of a member function even if there is +# only one candidate or it is obvious which candidate to choose by doing a +# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still +# accept a match between prototype and implementation in such cases. +# The default value is: NO. + +STRICT_PROTO_MATCHING = NO + +# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo +# list. This list is created by putting \todo commands in the documentation. +# The default value is: YES. + +GENERATE_TODOLIST = NO + +# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test +# list. This list is created by putting \test commands in the documentation. +# The default value is: YES. + +GENERATE_TESTLIST = NO + +# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug +# list. This list is created by putting \bug commands in the documentation. +# The default value is: YES. + +GENERATE_BUGLIST = NO + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO) +# the deprecated list. This list is created by putting \deprecated commands in +# the documentation. +# The default value is: YES. + +GENERATE_DEPRECATEDLIST= NO + +# The ENABLED_SECTIONS tag can be used to enable conditional documentation +# sections, marked by \if ... \endif and \cond +# ... \endcond blocks. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the +# initial value of a variable or macro / define can have for it to appear in the +# documentation. If the initializer consists of more lines than specified here +# it will be hidden. Use a value of 0 to hide initializers completely. The +# appearance of the value of individual variables and macros / defines can be +# controlled using \showinitializer or \hideinitializer command in the +# documentation regardless of this setting. +# Minimum value: 0, maximum value: 10000, default value: 30. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at +# the bottom of the documentation of classes and structs. If set to YES, the +# list will mention the files that were used to generate the documentation. +# The default value is: YES. + +SHOW_USED_FILES = YES + +# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This +# will remove the Files entry from the Quick Index and from the Folder Tree View +# (if specified). +# The default value is: YES. + +SHOW_FILES = YES + +# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces +# page. This will remove the Namespaces entry from the Quick Index and from the +# Folder Tree View (if specified). +# The default value is: YES. + +SHOW_NAMESPACES = YES + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from +# the version control system). Doxygen will invoke the program by executing (via +# popen()) the command command input-file, where command is the value of the +# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided +# by doxygen. Whatever the program writes to standard output is used as the file +# version. For an example see the documentation. + +FILE_VERSION_FILTER = + +# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed +# by doxygen. The layout file controls the global structure of the generated +# output files in an output format independent way. To create the layout file +# that represents doxygen's defaults, run doxygen with the -l option. You can +# optionally specify a file name after the option, if omitted DoxygenLayout.xml +# will be used as the name of the layout file. See also section "Changing the +# layout of pages" for information. +# +# Note that if you run doxygen from a directory containing a file called +# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE +# tag is left empty. + +LAYOUT_FILE = + +# The CITE_BIB_FILES tag can be used to specify one or more bib files containing +# the reference definitions. This must be a list of .bib files. The .bib +# extension is automatically appended if omitted. This requires the bibtex tool +# to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info. +# For LaTeX the style of the bibliography can be controlled using +# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the +# search path. See also \cite for info how to create references. + +CITE_BIB_FILES = + +#--------------------------------------------------------------------------- +# Configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated to +# standard output by doxygen. If QUIET is set to YES this implies that the +# messages are off. +# The default value is: NO. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES +# this implies that the warnings are on. +# +# Tip: Turn warnings on while writing the documentation. +# The default value is: YES. + +WARNINGS = YES + +# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate +# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag +# will automatically be disabled. +# The default value is: YES. + +WARN_IF_UNDOCUMENTED = YES + +# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as documenting some parameters in +# a documented function twice, or documenting parameters that don't exist or +# using markup commands wrongly. +# The default value is: YES. + +WARN_IF_DOC_ERROR = YES + +# If WARN_IF_INCOMPLETE_DOC is set to YES, doxygen will warn about incomplete +# function parameter documentation. If set to NO, doxygen will accept that some +# parameters have no documentation without warning. +# The default value is: YES. + +WARN_IF_INCOMPLETE_DOC = YES + +# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that +# are documented, but have no documentation for their parameters or return +# value. If set to NO, doxygen will only warn about wrong parameter +# documentation, but not about the absence of documentation. If EXTRACT_ALL is +# set to YES then this flag will automatically be disabled. See also +# WARN_IF_INCOMPLETE_DOC +# The default value is: NO. + +WARN_NO_PARAMDOC = YES + +# If WARN_IF_UNDOC_ENUM_VAL option is set to YES, doxygen will warn about +# undocumented enumeration values. If set to NO, doxygen will accept +# undocumented enumeration values. If EXTRACT_ALL is set to YES then this flag +# will automatically be disabled. +# The default value is: NO. + +WARN_IF_UNDOC_ENUM_VAL = NO + +# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when +# a warning is encountered. If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS +# then doxygen will continue running as if WARN_AS_ERROR tag is set to NO, but +# at the end of the doxygen process doxygen will return with a non-zero status. +# Possible values are: NO, YES and FAIL_ON_WARNINGS. +# The default value is: NO. + +WARN_AS_ERROR = NO + +# The WARN_FORMAT tag determines the format of the warning messages that doxygen +# can produce. The string should contain the $file, $line, and $text tags, which +# will be replaced by the file and line number from which the warning originated +# and the warning text. Optionally the format may contain $version, which will +# be replaced by the version of the file (if it could be obtained via +# FILE_VERSION_FILTER) +# See also: WARN_LINE_FORMAT +# The default value is: $file:$line: $text. + +WARN_FORMAT = "$file:$line: $text" + +# In the $text part of the WARN_FORMAT command it is possible that a reference +# to a more specific place is given. To make it easier to jump to this place +# (outside of doxygen) the user can define a custom "cut" / "paste" string. +# Example: +# WARN_LINE_FORMAT = "'vi $file +$line'" +# See also: WARN_FORMAT +# The default value is: at line $line of file $file. + +WARN_LINE_FORMAT = "at line $line of file $file" + +# The WARN_LOGFILE tag can be used to specify a file to which warning and error +# messages should be written. If left blank the output is written to standard +# error (stderr). In case the file specified cannot be opened for writing the +# warning and error messages are written to standard error. When as file - is +# specified the warning and error messages are written to standard output +# (stdout). + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# Configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag is used to specify the files and/or directories that contain +# documented source files. You may enter file names like myfile.cpp or +# directories like /usr/src/myproject. Separate the files or directories with +# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING +# Note: If this tag is empty the current directory is searched. + +INPUT = cpp/roaring.hh cpp/roaring64map.hh + +# This tag can be used to specify the character encoding of the source files +# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses +# libiconv (or the iconv built into libc) for the transcoding. See the libiconv +# documentation (see: +# https://www.gnu.org/software/libiconv/) for the list of possible encodings. +# See also: INPUT_FILE_ENCODING +# The default value is: UTF-8. + +INPUT_ENCODING = UTF-8 + +# This tag can be used to specify the character encoding of the source files +# that doxygen parses The INPUT_FILE_ENCODING tag can be used to specify +# character encoding on a per file pattern basis. Doxygen will compare the file +# name with each pattern and apply the encoding instead of the default +# INPUT_ENCODING) if there is a match. The character encodings are a list of the +# form: pattern=encoding (like *.php=ISO-8859-1). See cfg_input_encoding +# "INPUT_ENCODING" for further information on supported encodings. + +INPUT_FILE_ENCODING = + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and +# *.h) to filter out the source-files in the directories. +# +# Note that for custom extensions or not directly supported extensions you also +# need to set EXTENSION_MAPPING for the extension otherwise the files are not +# read by doxygen. +# +# Note the list of default checked file patterns might differ from the list of +# default file extension mappings. +# +# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, +# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, +# *.hh, *.hxx, *.hpp, *.h++, *.l, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, +# *.inc, *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C +# comment), *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd, +# *.vhdl, *.ucf, *.qsf and *.ice. + +FILE_PATTERNS = *.c \ + *.cc \ + *.cxx \ + *.cpp \ + *.c++ \ + *.java \ + *.ii \ + *.ixx \ + *.ipp \ + *.i++ \ + *.inl \ + *.idl \ + *.ddl \ + *.odl \ + *.h \ + *.hh \ + *.hxx \ + *.hpp \ + *.h++ \ + *.l \ + *.cs \ + *.d \ + *.php \ + *.php4 \ + *.php5 \ + *.phtml \ + *.inc \ + *.m \ + *.markdown \ + *.md \ + *.mm \ + *.dox \ + *.py \ + *.pyw \ + *.f90 \ + *.f95 \ + *.f03 \ + *.f08 \ + *.f18 \ + *.f \ + *.for \ + *.vhd \ + *.vhdl \ + *.ucf \ + *.qsf \ + *.ice + +# The RECURSIVE tag can be used to specify whether or not subdirectories should +# be searched for input files as well. +# The default value is: NO. + +RECURSIVE = YES + +# The EXCLUDE tag can be used to specify files and/or directories that should be +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. +# +# Note that relative paths are relative to the directory from which doxygen is +# run. + +EXCLUDE = benchmarks, tests, Testing, tools, build, docs + +# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or +# directories that are symbolic links (a Unix file system feature) are excluded +# from the input. +# The default value is: NO. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. +# +# Note that the wildcards are matched against the file with absolute path, so to +# exclude all test directories for example use the pattern */test/* + +EXCLUDE_PATTERNS = "*/test/*" + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the +# output. The symbol name can be a fully qualified name, a word, or if the +# wildcard * is used, a substring. Examples: ANamespace, AClass, +# ANamespace::AClass, ANamespace::*Test +# +# Note that the wildcards are matched against the file with absolute path, so to +# exclude all test directories use the pattern */test/* + +EXCLUDE_SYMBOLS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or directories +# that contain example code fragments that are included (see the \include +# command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and +# *.h) to filter out the source-files in the directories. If left blank all +# files are included. + +EXAMPLE_PATTERNS = * + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude commands +# irrespective of the value of the RECURSIVE tag. +# The default value is: NO. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or directories +# that contain images that are to be included in the documentation (see the +# \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command: +# +# +# +# where is the value of the INPUT_FILTER tag, and is the +# name of an input file. Doxygen will then use the output that the filter +# program writes to standard output. If FILTER_PATTERNS is specified, this tag +# will be ignored. +# +# Note that the filter must not add or remove lines; it is applied before the +# code is scanned, but not when the output code is generated. If lines are added +# or removed, the anchors will not be placed correctly. +# +# Note that doxygen will use the data processed and written to standard output +# for further processing, therefore nothing else, like debug statements or used +# commands (so in case of a Windows batch file always use @echo OFF), should be +# written to standard output. +# +# Note that for custom extensions or not directly supported extensions you also +# need to set EXTENSION_MAPPING for the extension otherwise the files are not +# properly processed by doxygen. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. The filters are a list of the form: pattern=filter +# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how +# filters are used. If the FILTER_PATTERNS tag is empty or if none of the +# patterns match the file name, INPUT_FILTER is applied. +# +# Note that for custom extensions or not directly supported extensions you also +# need to set EXTENSION_MAPPING for the extension otherwise the files are not +# properly processed by doxygen. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will also be used to filter the input files that are used for +# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). +# The default value is: NO. + +FILTER_SOURCE_FILES = NO + +# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file +# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and +# it is also possible to disable source filtering for a specific pattern using +# *.ext= (so without naming a filter). +# This tag requires that the tag FILTER_SOURCE_FILES is set to YES. + +FILTER_SOURCE_PATTERNS = + +# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that +# is part of the input, its contents will be placed on the main page +# (index.html). This can be useful if you have a project on for instance GitHub +# and want to reuse the introduction page also for the doxygen output. + +USE_MDFILE_AS_MAINPAGE = README.md + +# The Fortran standard specifies that for fixed formatted Fortran code all +# characters from position 72 are to be considered as comment. A common +# extension is to allow longer lines before the automatic comment starts. The +# setting FORTRAN_COMMENT_AFTER will also make it possible that longer lines can +# be processed before the automatic comment starts. +# Minimum value: 7, maximum value: 10000, default value: 72. + +FORTRAN_COMMENT_AFTER = 72 + +#--------------------------------------------------------------------------- +# Configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will be +# generated. Documented entities will be cross-referenced with these sources. +# +# Note: To get rid of all source code in the generated output, make sure that +# also VERBATIM_HEADERS is set to NO. +# The default value is: NO. + +SOURCE_BROWSER = YES + +# Setting the INLINE_SOURCES tag to YES will include the body of functions, +# classes and enums directly into the documentation. +# The default value is: NO. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any +# special comment blocks from generated source code fragments. Normal C, C++ and +# Fortran comments will always remain visible. +# The default value is: YES. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES then for each documented +# entity all documented functions referencing it will be listed. +# The default value is: NO. + +REFERENCED_BY_RELATION = YES + +# If the REFERENCES_RELATION tag is set to YES then for each documented function +# all documented entities called/used by that function will be listed. +# The default value is: NO. + +REFERENCES_RELATION = YES + +# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set +# to YES then the hyperlinks from functions in REFERENCES_RELATION and +# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will +# link to the documentation. +# The default value is: YES. + +REFERENCES_LINK_SOURCE = NO + +# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the +# source code will show a tooltip with additional information such as prototype, +# brief description and links to the definition and documentation. Since this +# will make the HTML file larger and loading of large files a bit slower, you +# can opt to disable this feature. +# The default value is: YES. +# This tag requires that the tag SOURCE_BROWSER is set to YES. + +SOURCE_TOOLTIPS = YES + +# If the USE_HTAGS tag is set to YES then the references to source code will +# point to the HTML generated by the htags(1) tool instead of doxygen built-in +# source browser. The htags tool is part of GNU's global source tagging system +# (see https://www.gnu.org/software/global/global.html). You will need version +# 4.8.6 or higher. +# +# To use it do the following: +# - Install the latest version of global +# - Enable SOURCE_BROWSER and USE_HTAGS in the configuration file +# - Make sure the INPUT points to the root of the source tree +# - Run doxygen as normal +# +# Doxygen will invoke htags (and that will in turn invoke gtags), so these +# tools must be available from the command line (i.e. in the search path). +# +# The result: instead of the source browser generated by doxygen, the links to +# source code will now point to the output of htags. +# The default value is: NO. +# This tag requires that the tag SOURCE_BROWSER is set to YES. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a +# verbatim copy of the header file for each class for which an include is +# specified. Set to NO to disable this. +# See also: Section \class. +# The default value is: YES. + +VERBATIM_HEADERS = YES + +#--------------------------------------------------------------------------- +# Configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all +# compounds will be generated. Enable this if the project contains a lot of +# classes, structs, unions or interfaces. +# The default value is: YES. + +ALPHABETICAL_INDEX = YES + +# The IGNORE_PREFIX tag can be used to specify a prefix (or a list of prefixes) +# that should be ignored while generating the index headers. The IGNORE_PREFIX +# tag works for classes, function and member names. The entity will be placed in +# the alphabetical list under the first letter of the entity name that remains +# after removing the prefix. +# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output +# The default value is: YES. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a +# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of +# it. +# The default directory is: html. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each +# generated HTML page (for example: .htm, .php, .asp). +# The default value is: .html. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a user-defined HTML header file for +# each generated HTML page. If the tag is left blank doxygen will generate a +# standard header. +# +# To get valid HTML the header file that includes any scripts and style sheets +# that doxygen needs, which is dependent on the configuration options used (e.g. +# the setting GENERATE_TREEVIEW). It is highly recommended to start with a +# default header using +# doxygen -w html new_header.html new_footer.html new_stylesheet.css +# YourConfigFile +# and then modify the file new_header.html. See also section "Doxygen usage" +# for information on how to generate the default header that doxygen normally +# uses. +# Note: The header is subject to change so you typically have to regenerate the +# default header when upgrading to a newer version of doxygen. For a description +# of the possible markers and block names see the documentation. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each +# generated HTML page. If the tag is left blank doxygen will generate a standard +# footer. See HTML_HEADER for more information on how to generate a default +# footer and what special commands can be used inside the footer. See also +# section "Doxygen usage" for information on how to generate the default footer +# that doxygen normally uses. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style +# sheet that is used by each HTML page. It can be used to fine-tune the look of +# the HTML output. If left blank doxygen will generate a default style sheet. +# See also section "Doxygen usage" for information on how to generate the style +# sheet that doxygen normally uses. +# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as +# it is more robust and this tag (HTML_STYLESHEET) will in the future become +# obsolete. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_STYLESHEET = + +# The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined +# cascading style sheets that are included after the standard style sheets +# created by doxygen. Using this option one can overrule certain style aspects. +# This is preferred over using HTML_STYLESHEET since it does not replace the +# standard style sheet and is therefore more robust against future updates. +# Doxygen will copy the style sheet files to the output directory. +# Note: The order of the extra style sheet files is of importance (e.g. the last +# style sheet in the list overrules the setting of the previous ones in the +# list). +# Note: Since the styling of scrollbars can currently not be overruled in +# Webkit/Chromium, the styling will be left out of the default doxygen.css if +# one or more extra stylesheets have been specified. So if scrollbar +# customization is desired it has to be added explicitly. For an example see the +# documentation. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_EXTRA_STYLESHEET = docs/theme/doxygen-awesome.css \ + docs/theme/doxygen-awesome-sidebar-only.css + +# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or +# other source files which should be copied to the HTML output directory. Note +# that these files will be copied to the base HTML output directory. Use the +# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these +# files. In the HTML_STYLESHEET file, use the file name only. Also note that the +# files will be copied as-is; there are no commands or markers available. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_EXTRA_FILES = docs/theme/doxygen-awesome-darkmode-toggle.js + +# The HTML_COLORSTYLE tag can be used to specify if the generated HTML output +# should be rendered with a dark or light theme. +# Possible values are: LIGHT always generate light mode output, DARK always +# generate dark mode output, AUTO_LIGHT automatically set the mode according to +# the user preference, use light mode if no preference is set (the default), +# AUTO_DARK automatically set the mode according to the user preference, use +# dark mode if no preference is set and TOGGLE allow to user to switch between +# light and dark mode via a button. +# The default value is: AUTO_LIGHT. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE = LIGHT + +# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen +# will adjust the colors in the style sheet and background images according to +# this color. Hue is specified as an angle on a color-wheel, see +# https://en.wikipedia.org/wiki/Hue for more information. For instance the value +# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 +# purple, and 360 is red again. +# Minimum value: 0, maximum value: 359, default value: 220. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_HUE = 209 + +# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors +# in the HTML output. For a value of 0 the output will use gray-scales only. A +# value of 255 will produce the most vivid colors. +# Minimum value: 0, maximum value: 255, default value: 100. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_SAT = 255 + +# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the +# luminance component of the colors in the HTML output. Values below 100 +# gradually make the output lighter, whereas values above 100 make the output +# darker. The value divided by 100 is the actual gamma applied, so 80 represents +# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not +# change the gamma. +# Minimum value: 40, maximum value: 240, default value: 80. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_GAMMA = 113 + +# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML +# page will contain the date and time when the page was generated. Setting this +# to YES can help to show when doxygen was last run and thus if the +# documentation is up to date. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_TIMESTAMP = NO + +# If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML +# documentation will contain a main index with vertical navigation menus that +# are dynamically created via JavaScript. If disabled, the navigation index will +# consists of multiple levels of tabs that are statically embedded in every HTML +# page. Disable this option to support browsers that do not have JavaScript, +# like the Qt help browser. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_DYNAMIC_MENUS = YES + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_DYNAMIC_SECTIONS = NO + +# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries +# shown in the various tree structured indices initially; the user can expand +# and collapse entries dynamically later on. Doxygen will expand the tree to +# such a level that at most the specified number of entries are visible (unless +# a fully collapsed tree already exceeds this amount). So setting the number of +# entries 1 will produce a full collapsed tree by default. 0 is a special value +# representing an infinite number of entries and will result in a full expanded +# tree by default. +# Minimum value: 0, maximum value: 9999, default value: 100. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_INDEX_NUM_ENTRIES = 100 + +# If the GENERATE_DOCSET tag is set to YES, additional index files will be +# generated that can be used as input for Apple's Xcode 3 integrated development +# environment (see: +# https://developer.apple.com/xcode/), introduced with OSX 10.5 (Leopard). To +# create a documentation set, doxygen will generate a Makefile in the HTML +# output directory. Running make will produce the docset in that directory and +# running make install will install the docset in +# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at +# startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy +# genXcode/_index.html for more information. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_DOCSET = NO + +# This tag determines the name of the docset feed. A documentation feed provides +# an umbrella under which multiple documentation sets from a single provider +# (such as a company or product suite) can be grouped. +# The default value is: Doxygen generated docs. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_FEEDNAME = "Doxygen generated docs" + +# This tag determines the URL of the docset feed. A documentation feed provides +# an umbrella under which multiple documentation sets from a single provider +# (such as a company or product suite) can be grouped. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_FEEDURL = + +# This tag specifies a string that should uniquely identify the documentation +# set bundle. This should be a reverse domain-name style string, e.g. +# com.mycompany.MyDocSet. Doxygen will append .docset to the name. +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_BUNDLE_ID = org.doxygen.Project + +# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify +# the documentation publisher. This should be a reverse domain-name style +# string, e.g. com.mycompany.MyDocSet.documentation. +# The default value is: org.doxygen.Publisher. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_PUBLISHER_ID = org.doxygen.Publisher + +# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. +# The default value is: Publisher. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_PUBLISHER_NAME = Publisher + +# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three +# additional HTML index files: index.hhp, index.hhc, and index.hhk. The +# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop +# on Windows. In the beginning of 2021 Microsoft took the original page, with +# a.o. the download links, offline the HTML help workshop was already many years +# in maintenance mode). You can download the HTML help workshop from the web +# archives at Installation executable (see: +# http://web.archive.org/web/20160201063255/http://download.microsoft.com/downlo +# ad/0/A/9/0A939EF6-E31C-430F-A3DF-DFAE7960D564/htmlhelp.exe). +# +# The HTML Help Workshop contains a compiler that can convert all HTML output +# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML +# files are now used as the Windows 98 help format, and will replace the old +# Windows help format (.hlp) on all Windows platforms in the future. Compressed +# HTML files also contain an index, a table of contents, and you can search for +# words in the documentation. The HTML workshop also contains a viewer for +# compressed HTML files. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_HTMLHELP = NO + +# The CHM_FILE tag can be used to specify the file name of the resulting .chm +# file. You can add a path in front of the file if the result should not be +# written to the html output directory. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +CHM_FILE = + +# The HHC_LOCATION tag can be used to specify the location (absolute path +# including file name) of the HTML help compiler (hhc.exe). If non-empty, +# doxygen will try to run the HTML help compiler on the generated index.hhp. +# The file has to be specified with full path. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +HHC_LOCATION = + +# The GENERATE_CHI flag controls if a separate .chi index file is generated +# (YES) or that it should be included in the main .chm file (NO). +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +GENERATE_CHI = NO + +# The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc) +# and project file content. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +CHM_INDEX_ENCODING = + +# The BINARY_TOC flag controls whether a binary table of contents is generated +# (YES) or a normal table of contents (NO) in the .chm file. Furthermore it +# enables the Previous and Next buttons. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members to +# the table of contents of the HTML help documentation and to the tree view. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +TOC_EXPAND = NO + +# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and +# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that +# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help +# (.qch) of the generated HTML documentation. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_QHP = NO + +# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify +# the file name of the resulting .qch file. The path specified is relative to +# the HTML output folder. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QCH_FILE = + +# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help +# Project output. For more information please see Qt Help Project / Namespace +# (see: +# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace). +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_NAMESPACE = org.doxygen.Project + +# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt +# Help Project output. For more information please see Qt Help Project / Virtual +# Folders (see: +# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-folders). +# The default value is: doc. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_VIRTUAL_FOLDER = doc + +# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom +# filter to add. For more information please see Qt Help Project / Custom +# Filters (see: +# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_CUST_FILTER_NAME = + +# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the +# custom filter to add. For more information please see Qt Help Project / Custom +# Filters (see: +# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_CUST_FILTER_ATTRS = + +# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this +# project's filter section matches. Qt Help Project / Filter Attributes (see: +# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#filter-attributes). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_SECT_FILTER_ATTRS = + +# The QHG_LOCATION tag can be used to specify the location (absolute path +# including file name) of Qt's qhelpgenerator. If non-empty doxygen will try to +# run qhelpgenerator on the generated .qhp file. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHG_LOCATION = + +# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be +# generated, together with the HTML files, they form an Eclipse help plugin. To +# install this plugin and make it available under the help contents menu in +# Eclipse, the contents of the directory containing the HTML and XML files needs +# to be copied into the plugins directory of eclipse. The name of the directory +# within the plugins directory should be the same as the ECLIPSE_DOC_ID value. +# After copying Eclipse needs to be restarted before the help appears. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_ECLIPSEHELP = NO + +# A unique identifier for the Eclipse help plugin. When installing the plugin +# the directory name containing the HTML and XML files should also have this +# name. Each documentation set should have its own identifier. +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. + +ECLIPSE_DOC_ID = org.doxygen.Project + +# If you want full control over the layout of the generated HTML pages it might +# be necessary to disable the index and replace it with your own. The +# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top +# of each HTML page. A value of NO enables the index and the value YES disables +# it. Since the tabs in the index contain the same information as the navigation +# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +DISABLE_INDEX = NO + +# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index +# structure should be generated to display hierarchical information. If the tag +# value is set to YES, a side panel will be generated containing a tree-like +# index structure (just like the one that is generated for HTML Help). For this +# to work a browser that supports JavaScript, DHTML, CSS and frames is required +# (i.e. any modern browser). Windows users are probably better off using the +# HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can +# further fine tune the look of the index (see "Fine-tuning the output"). As an +# example, the default style sheet generated by doxygen has an example that +# shows how to put an image at the root of the tree instead of the PROJECT_NAME. +# Since the tree basically has the same information as the tab index, you could +# consider setting DISABLE_INDEX to YES when enabling this option. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_TREEVIEW = YES + +# When both GENERATE_TREEVIEW and DISABLE_INDEX are set to YES, then the +# FULL_SIDEBAR option determines if the side bar is limited to only the treeview +# area (value NO) or if it should extend to the full height of the window (value +# YES). Setting this to YES gives a layout similar to +# https://docs.readthedocs.io with more room for contents, but less room for the +# project logo, title, and description. If either GENERATE_TREEVIEW or +# DISABLE_INDEX is set to NO, this option has no effect. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FULL_SIDEBAR = NO + +# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that +# doxygen will group on one line in the generated HTML documentation. +# +# Note that a value of 0 will completely suppress the enum values from appearing +# in the overview section. +# Minimum value: 0, maximum value: 20, default value: 4. +# This tag requires that the tag GENERATE_HTML is set to YES. + +ENUM_VALUES_PER_LINE = 4 + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used +# to set the initial width (in pixels) of the frame in which the tree is shown. +# Minimum value: 0, maximum value: 1500, default value: 250. +# This tag requires that the tag GENERATE_HTML is set to YES. + +TREEVIEW_WIDTH = 250 + +# If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to +# external symbols imported via tag files in a separate window. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +EXT_LINKS_IN_WINDOW = NO + +# If the OBFUSCATE_EMAILS tag is set to YES, doxygen will obfuscate email +# addresses. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +OBFUSCATE_EMAILS = YES + +# If the HTML_FORMULA_FORMAT option is set to svg, doxygen will use the pdf2svg +# tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see +# https://inkscape.org) to generate formulas as SVG images instead of PNGs for +# the HTML output. These images will generally look nicer at scaled resolutions. +# Possible values are: png (the default) and svg (looks nicer but requires the +# pdf2svg or inkscape tool). +# The default value is: png. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FORMULA_FORMAT = png + +# Use this tag to change the font size of LaTeX formulas included as images in +# the HTML documentation. When you change the font size after a successful +# doxygen run you need to manually remove any form_*.png images from the HTML +# output directory to force them to be regenerated. +# Minimum value: 8, maximum value: 50, default value: 10. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FORMULA_FONTSIZE = 10 + +# The FORMULA_MACROFILE can contain LaTeX \newcommand and \renewcommand commands +# to create new LaTeX commands to be used in formulas as building blocks. See +# the section "Including formulas" for details. + +FORMULA_MACROFILE = + +# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see +# https://www.mathjax.org) which uses client side JavaScript for the rendering +# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX +# installed or if you want to formulas look prettier in the HTML output. When +# enabled you may also need to install MathJax separately and configure the path +# to it using the MATHJAX_RELPATH option. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +USE_MATHJAX = NO + +# With MATHJAX_VERSION it is possible to specify the MathJax version to be used. +# Note that the different versions of MathJax have different requirements with +# regards to the different settings, so it is possible that also other MathJax +# settings have to be changed when switching between the different MathJax +# versions. +# Possible values are: MathJax_2 and MathJax_3. +# The default value is: MathJax_2. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_VERSION = MathJax_2 + +# When MathJax is enabled you can set the default output format to be used for +# the MathJax output. For more details about the output format see MathJax +# version 2 (see: +# http://docs.mathjax.org/en/v2.7-latest/output.html) and MathJax version 3 +# (see: +# http://docs.mathjax.org/en/latest/web/components/output.html). +# Possible values are: HTML-CSS (which is slower, but has the best +# compatibility. This is the name for Mathjax version 2, for MathJax version 3 +# this will be translated into chtml), NativeMML (i.e. MathML. Only supported +# for NathJax 2. For MathJax version 3 chtml will be used instead.), chtml (This +# is the name for Mathjax version 3, for MathJax version 2 this will be +# translated into HTML-CSS) and SVG. +# The default value is: HTML-CSS. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_FORMAT = HTML-CSS + +# When MathJax is enabled you need to specify the location relative to the HTML +# output directory using the MATHJAX_RELPATH option. The destination directory +# should contain the MathJax.js script. For instance, if the mathjax directory +# is located at the same level as the HTML output directory, then +# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax +# Content Delivery Network so you can quickly see the result without installing +# MathJax. However, it is strongly recommended to install a local copy of +# MathJax from https://www.mathjax.org before deployment. The default value is: +# - in case of MathJax version 2: https://cdn.jsdelivr.net/npm/mathjax@2 +# - in case of MathJax version 3: https://cdn.jsdelivr.net/npm/mathjax@3 +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_RELPATH = + +# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax +# extension names that should be enabled during MathJax rendering. For example +# for MathJax version 2 (see +# https://docs.mathjax.org/en/v2.7-latest/tex.html#tex-and-latex-extensions): +# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols +# For example for MathJax version 3 (see +# http://docs.mathjax.org/en/latest/input/tex/extensions/index.html): +# MATHJAX_EXTENSIONS = ams +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_EXTENSIONS = + +# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces +# of code that will be used on startup of the MathJax code. See the MathJax site +# (see: +# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details. For an +# example see the documentation. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_CODEFILE = + +# When the SEARCHENGINE tag is enabled doxygen will generate a search box for +# the HTML output. The underlying search engine uses javascript and DHTML and +# should work on any modern browser. Note that when using HTML help +# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) +# there is already a search function so this one should typically be disabled. +# For large projects the javascript based search engine can be slow, then +# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to +# search using the keyboard; to jump to the search box use + S +# (what the is depends on the OS and browser, but it is typically +# , /