From 3194adca2a8fe3b7d1e3f3c72b6cbf9d08e987c4 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <lemire@gmail.com>
Date: Thu, 25 Aug 2022 17:16:48 -0400
Subject: [PATCH 001/162] Verifies fix on issue 382 (#383)

* More portable code

* Verifies fix on issue 382

* Corrected variable name.
---
 .github/workflows/ubuntu-debug-sani-ci.yml | 25 ++++++++++++++++++++++
 src/array_util.c                           |  5 +++--
 tests/cpp_unit.cpp                         | 16 ++++++++++++++
 3 files changed, 44 insertions(+), 2 deletions(-)
 create mode 100644 .github/workflows/ubuntu-debug-sani-ci.yml

diff --git a/.github/workflows/ubuntu-debug-sani-ci.yml b/.github/workflows/ubuntu-debug-sani-ci.yml
new file mode 100644
index 000000000..52848d873
--- /dev/null
+++ b/.github/workflows/ubuntu-debug-sani-ci.yml
@@ -0,0 +1,25 @@
+name: Ubuntu-Debug-Sanitized-CI
+
+'on':
+  - push
+  - pull_request
+
+
+jobs:
+  ci:
+    name: ubuntu-gcc
+    runs-on: ubuntu-latest
+
+    env:
+      CC: gcc
+      CXX: g++
+
+    steps: 
+      - uses: actions/checkout@v2
+      - name: Build and Test
+        run: |
+          mkdir build
+          cd build
+          cmake  -DCMAKE_BUILD_TYPE=Debug -DROARING_SANITIZE=ON ..
+          cmake --build . 
+          ctest . --output-on-failure
diff --git a/src/array_util.c b/src/array_util.c
index d01deae2a..48349105a 100644
--- a/src/array_util.c
+++ b/src/array_util.c
@@ -1912,8 +1912,9 @@ static inline bool _avx2_memequals(const void *s1, const void *s2, size_t n) {
     }
 
     while (ptr1 < end8) {
-        uint64_t v1 = *((const uint64_t*)ptr1);
-        uint64_t v2 = *((const uint64_t*)ptr2);
+        uint64_t v1, v2;
+        memcpy(&v1,ptr1,sizeof(uint64_t));
+        memcpy(&v2,ptr2,sizeof(uint64_t));
         if (v1 != v2) {
             return false;
         }
diff --git a/tests/cpp_unit.cpp b/tests/cpp_unit.cpp
index 5c3de8fca..d0ba16e93 100644
--- a/tests/cpp_unit.cpp
+++ b/tests/cpp_unit.cpp
@@ -699,6 +699,22 @@ DEFINE_TEST(test_cpp_frozen) {
     const Roaring r2 = Roaring::frozenView(buf, num_bytes);
     assert_true(r1 == r2);
 
+    {
+        Roaring r;
+        r.addRange(0, 100000);
+        r.flip(90000, 91000);
+        r.runOptimize();
+
+        // allocate a buffer and serialize to it
+        size_t num_bytes1 = r.getFrozenSizeInBytes();
+        char *buf1 = (char *)roaring_aligned_malloc(32, num_bytes1);
+        r.writeFrozen(buf1);
+
+        // ensure the frozen bitmap is the same as the original
+        const Roaring rr = Roaring::frozenView(buf1, num_bytes1);
+        assert_true(r == rr);
+        roaring_aligned_free(buf1);
+    }
 #if ROARING_EXCEPTIONS
     // try viewing a misaligned/invalid buffer
     try {

From 063a6c398b5778ca3e5e782d252102e4f77020a3 Mon Sep 17 00:00:00 2001
From: Soerian Lieve <soerian@gmail.com>
Date: Fri, 26 Aug 2022 04:08:37 +0100
Subject: [PATCH 002/162] Add add/removeRange to Roaring and Roaring64Map
 (#381)

These were already present in the C API, but the C++ API had limited
support.

Co-authored-by: Daniel Lemire <lemire@gmail.com>
---
 cpp/roaring.hh            |  27 ++++-
 cpp/roaring64map.hh       |  98 +++++++++++++++++
 tests/cpp_random_unit.cpp |   9 +-
 tests/cpp_unit.cpp        | 216 ++++++++++++++++++++++++++++++++++++++
 tests/roaring_checked.hh  |  31 ++++--
 5 files changed, 368 insertions(+), 13 deletions(-)

diff --git a/cpp/roaring.hh b/cpp/roaring.hh
index 8ee9e7b30..3853ae7b6 100644
--- a/cpp/roaring.hh
+++ b/cpp/roaring.hh
@@ -127,10 +127,17 @@ public:
     }
 
     /**
-     * Add all values from x (included) to y (excluded)
+     * Add all values in range [min, max)
      */
-    void addRange(const uint64_t x, const uint64_t y)  {
-        return api::roaring_bitmap_add_range(&roaring, x, y);
+    void addRange(const uint64_t min, const uint64_t max)  {
+        return api::roaring_bitmap_add_range(&roaring, min, max);
+    }
+
+    /**
+     * Add all values in range [min, max]
+     */
+    void addRangeClosed(const uint32_t min, const uint32_t max)  {
+        return api::roaring_bitmap_add_range_closed(&roaring, min, max);
     }
 
     /**
@@ -154,6 +161,20 @@ public:
         return api::roaring_bitmap_remove_checked(&roaring, x);
     }
 
+    /**
+     * Remove all values in range [min, max)
+     */
+    void removeRange(uint64_t min, uint64_t max) {
+        return api::roaring_bitmap_remove_range(&roaring, min, max);
+    }
+
+    /**
+     * Remove all values in range [min, max]
+     */
+    void removeRangeClosed(uint32_t min, uint32_t max) {
+        return api::roaring_bitmap_remove_range_closed(&roaring, min, max);
+    }
+
     /**
      * Return the largest value (if not empty)
      */
diff --git a/cpp/roaring64map.hh b/cpp/roaring64map.hh
index 6ec9ccdff..383271b05 100644
--- a/cpp/roaring64map.hh
+++ b/cpp/roaring64map.hh
@@ -118,6 +118,52 @@ public:
         return result;
     }
 
+    /**
+     * Add all values in range [min, max)
+     */
+    void addRange(uint64_t min, uint64_t max) {
+        if (min >= max) {
+            return;
+        }
+        addRangeClosed(min, max - 1);
+    }
+
+    /**
+     * Add all values in range [min, max]
+     */
+    void addRangeClosed(uint32_t min, uint32_t max) {
+        roarings[0].addRangeClosed(min, max);
+    }
+    void addRangeClosed(uint64_t min, uint64_t max) {
+        if (min > max) {
+            return;
+        }
+        uint32_t start_high = highBytes(min);
+        uint32_t start_low = lowBytes(min);
+        uint32_t end_high = highBytes(max);
+        uint32_t end_low = lowBytes(max);
+        if (start_high == end_high) {
+            roarings[start_high].addRangeClosed(start_low, end_low);
+            roarings[start_high].setCopyOnWrite(copyOnWrite);
+            return;
+        }
+        // we put std::numeric_limits<>::max/min in parenthesis to avoid a clash
+        // with the Windows.h header under Windows
+        roarings[start_high].addRangeClosed(
+            start_low, (std::numeric_limits<uint32_t>::max)());
+        roarings[start_high].setCopyOnWrite(copyOnWrite);
+        start_high++;
+        for (; start_high < end_high; ++start_high) {
+            roarings[start_high].addRangeClosed(
+                (std::numeric_limits<uint32_t>::min)(),
+                (std::numeric_limits<uint32_t>::max)());
+            roarings[start_high].setCopyOnWrite(copyOnWrite);
+        }
+        roarings[end_high].addRangeClosed(
+            (std::numeric_limits<uint32_t>::min)(), end_low);
+        roarings[end_high].setCopyOnWrite(copyOnWrite);
+    }
+
     /**
      * Add value n_args from pointer vals
      */
@@ -158,6 +204,58 @@ public:
         return false;
     }
 
+    /**
+     * Remove all values in range [min, max)
+     */
+    void removeRange(uint64_t min, uint64_t max) {
+        if (min >= max) {
+            return;
+        }
+        return removeRangeClosed(min, max - 1);
+    }
+
+    /**
+     * Remove all values in range [min, max]
+     */
+    void removeRangeClosed(uint32_t min, uint32_t max) {
+        return roarings[0].removeRangeClosed(min, max);
+    }
+    void removeRangeClosed(uint64_t min, uint64_t max) {
+        if (min > max) {
+            return;
+        }
+        uint32_t start_high = highBytes(min);
+        uint32_t start_low = lowBytes(min);
+        uint32_t end_high = highBytes(max);
+        uint32_t end_low = lowBytes(max);
+
+        if (roarings.empty() || end_high < roarings.cbegin()->first ||
+            start_high > (roarings.crbegin())->first) {
+            return;
+        }
+
+        auto start_iter = roarings.lower_bound(start_high);
+        auto end_iter = roarings.lower_bound(end_high);
+        if (start_iter->first == start_high) {
+            if (start_iter == end_iter) {
+                start_iter->second.removeRangeClosed(start_low, end_low);
+                return;
+            }
+            // we put std::numeric_limits<>::max/min in parenthesis
+            // to avoid a clash with the Windows.h header under Windows
+            start_iter->second.removeRangeClosed(
+                start_low, (std::numeric_limits<uint32_t>::max)());
+            start_iter++;
+        }
+
+        roarings.erase(start_iter, end_iter);
+
+        if (end_iter != roarings.cend() && end_iter->first == end_high) {
+            end_iter->second.removeRangeClosed(
+                (std::numeric_limits<uint32_t>::min)(), end_low);
+        }
+    }
+
     /**
      * Clear the bitmap
      */
diff --git a/tests/cpp_random_unit.cpp b/tests/cpp_random_unit.cpp
index 21749133e..37000ebf5 100644
--- a/tests/cpp_random_unit.cpp
+++ b/tests/cpp_random_unit.cpp
@@ -57,7 +57,7 @@ Roaring make_random_bitset() {
     Roaring r;
     int num_ops = rand() % 100;
     for (int i = 0; i < num_ops; ++i) {
-        switch (rand() % 4) {
+        switch (rand() % 5) {
           case 0:
             r.add(gravity);
             break;
@@ -68,11 +68,16 @@ Roaring make_random_bitset() {
             break; }
 
           case 2: {
+            uint32_t start = gravity + (rand() % 10) - 5;
+            r.removeRange(start, start + rand() % 5);
+            break; }
+
+          case 3: {
             uint32_t start = gravity + (rand() % 50) - 25;
             r.flip(start, rand() % 50);
             break; }
 
-          case 3: {  // tests remove(), select(), rank()
+          case 4: {  // tests remove(), select(), rank()
             uint32_t card = r.cardinality();
             if (card != 0) {
                 uint32_t rnk = rand() % card;
diff --git a/tests/cpp_unit.cpp b/tests/cpp_unit.cpp
index d0ba16e93..09953d9d2 100644
--- a/tests/cpp_unit.cpp
+++ b/tests/cpp_unit.cpp
@@ -3,12 +3,15 @@
  */
 
 #include <assert.h>
+#include <iostream>
 #include <roaring/misc/configreport.h>
 #include <roaring/roaring.h>  // access to pure C exported API for testing
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <time.h>
+#include <vector>
+
 
 #include <iostream>
 #include <type_traits>
@@ -613,6 +616,215 @@ DEFINE_TEST(test_cpp_add_remove_checked_64) {
     assert_true(roaring.isEmpty());
 }
 
+DEFINE_TEST(test_cpp_add_range) {
+    std::vector<std::pair<uint64_t, uint64_t>> ranges = {
+      {1, 5},
+      {1, 1},
+      {2, 1},
+    };
+    for (const auto &range : ranges) {
+        uint64_t min = range.first;
+        uint64_t max = range.second;
+        Roaring r1;
+        r1.addRangeClosed(min, max);
+        Roaring r2;
+        for (uint64_t v = min; v <= max; ++v) {
+            r2.add(v);
+        }
+        assert_true(r1 == r2);
+    }
+}
+
+DEFINE_TEST(test_cpp_remove_range) {
+    {
+        // min < r1.minimum, max > r1.maximum
+        Roaring r1 = Roaring::bitmapOf(3, 1, 2, 4);
+        r1.removeRangeClosed(0, 5);
+        assert_true(r1.isEmpty());
+    }
+    {
+        // min < r1.minimum, max < r1.maximum, max does not exactly match an
+        // element
+        Roaring r1 = Roaring::bitmapOf(3, 1, 2, 4);
+        r1.removeRangeClosed(0, 3);
+        Roaring r2 = Roaring::bitmapOf(1, 4);
+        assert_true(r1 == r2);
+    }
+    {
+        // min < r1.minimum, max < r1.maximum, max exactly matches an element
+        Roaring r1 = Roaring::bitmapOf(3, 1, 2, 4);
+        r1.removeRangeClosed(0, 2);
+        Roaring r2 = Roaring::bitmapOf(1, 4);
+        assert_true(r1 == r2);
+    }
+    {
+        // min > r1.minimum, max > r1.maximum, min does not exactly match an
+        // element
+        Roaring r1 = Roaring::bitmapOf(3, 1, 2, 4);
+        r1.removeRangeClosed(3, 5);
+        Roaring r2 = Roaring::bitmapOf(2, 1, 2);
+        assert_true(r1 == r2);
+    }
+    {
+        // min > r1.minimum, max > r1.maximum, min exactly matches an element
+        Roaring r1 = Roaring::bitmapOf(3, 1, 2, 4);
+        r1.removeRangeClosed(2, 5);
+        Roaring r2 = Roaring::bitmapOf(1, 1);
+        assert_true(r1 == r2);
+    }
+    {
+        // min > r1.minimum, max < r1.maximum, no elements between min and max
+        Roaring r1 = Roaring::bitmapOf(3, 1, 2, 4);
+        r1.removeRangeClosed(3, 3);
+        Roaring r2 = Roaring::bitmapOf(3, 1, 2, 4);
+        assert_true(r1 == r2);
+    }
+    {
+        // max < r1.minimum
+        Roaring r1 = Roaring::bitmapOf(3, 1, 2, 4);
+        r1.removeRangeClosed(0, 0);
+        Roaring r2 = Roaring::bitmapOf(3, 1, 2, 4);
+        assert_true(r1 == r2);
+    }
+    {
+        // min > r1.maximum
+        Roaring r1 = Roaring::bitmapOf(3, 1, 2, 4);
+        r1.removeRangeClosed(5, 6);
+        Roaring r2 = Roaring::bitmapOf(3, 1, 2, 4);
+        assert_true(r1 == r2);
+    }
+    {
+        // min > max
+        Roaring r1 = Roaring::bitmapOf(3, 1, 2, 4);
+        r1.removeRangeClosed(2, 1);
+        Roaring r2 = Roaring::bitmapOf(3, 1, 2, 4);
+        assert_true(r1 == r2);
+    }
+}
+
+DEFINE_TEST(test_cpp_add_range_64) {
+    {
+        // 32-bit integers
+        Roaring64Map r1;
+        r1.addRangeClosed(uint32_t(1), uint32_t(5));
+        Roaring64Map r2;
+        for (uint32_t v = 1; v <= 5; ++v) {
+            r2.add(v);
+        }
+        assert_true(r1 == r2);
+    }
+    std::vector<std::pair<uint64_t, uint64_t>> ranges = {
+        {uint64_t(1) << 32, (uint64_t(1) << 32) + 10},
+        {(uint64_t(1) << 32) - 10, (uint64_t(1) << 32) + 10},
+        {(uint64_t(1) << 32) + 2, (uint64_t(1) << 32) - 2}};
+    for (const auto &range : ranges) {
+        uint64_t min = range.first;
+        uint64_t max = range.second;
+        Roaring64Map r1;
+        r1.addRangeClosed(min, max);
+        Roaring64Map r2;
+        for (uint64_t v = min; v <= max; ++v) {
+            r2.add(v);
+        }
+        assert_true(r1 == r2);
+    }
+}
+
+DEFINE_TEST(test_cpp_remove_range_64) {
+    {
+        // 32-bit integers
+        Roaring64Map r1 =
+            Roaring64Map::bitmapOf(3, uint64_t(1), uint64_t(2), uint64_t(4));
+        r1.removeRangeClosed(uint32_t(2), uint32_t(3));
+        Roaring64Map r2 = Roaring64Map::bitmapOf(2, uint64_t(1), uint64_t(4));
+        assert_true(r1 == r2);
+    }
+    {
+        // min < r1.minimum, max > r1.maximum
+        Roaring64Map r1 = Roaring64Map::bitmapOf(
+            3, uint64_t(1) << 32, uint64_t(2) << 32, uint64_t(4) << 32);
+        r1.removeRangeClosed(uint64_t(0), uint64_t(5) << 32);
+        assert_true(r1.isEmpty());
+    }
+    {
+        // min < r1.minimum, max < r1.maximum, max does not exactly match an
+        // element
+        Roaring64Map r1 = Roaring64Map::bitmapOf(
+            3, uint64_t(1) << 32, uint64_t(2) << 32, uint64_t(4) << 32);
+        r1.removeRangeClosed(uint64_t(0), uint64_t(3) << 32);
+        Roaring64Map r2 = Roaring64Map::bitmapOf(1, uint64_t(4) << 32);
+        assert_true(r1 == r2);
+    }
+    {
+        // min < r1.minimum, max < r1.maximum, max exactly matches the high bits
+        // of an element
+        Roaring64Map r1 =
+            Roaring64Map::bitmapOf(4, uint64_t(1) << 32, uint64_t(2) << 32,
+                                   (uint64_t(2) << 32) + 1, uint64_t(4) << 32);
+        r1.removeRangeClosed(uint64_t(0), uint64_t(2) << 32);
+        Roaring64Map r2 = Roaring64Map::bitmapOf(2, (uint64_t(2) << 32) + 1,
+                                                 uint64_t(4) << 32);
+        assert_true(r1 == r2);
+    }
+    {
+        // min > r1.minimum, max > r1.maximum, min does not exactly match an
+        // element
+        Roaring64Map r1 = Roaring64Map::bitmapOf(
+            3, uint64_t(1) << 32, uint64_t(2) << 32, uint64_t(4) << 32);
+        r1.removeRangeClosed(uint64_t(3) << 32, uint64_t(5) << 32);
+        Roaring64Map r2 =
+            Roaring64Map::bitmapOf(2, uint64_t(1) << 32, uint64_t(2) << 32);
+        assert_true(r1 == r2);
+    }
+    {
+        // min > r1.minimum, max > r1.maximum, min exactly matches the high bits
+        // of an element
+        Roaring64Map r1 =
+            Roaring64Map::bitmapOf(4, uint64_t(1) << 32, uint64_t(2) << 32,
+                                   (uint64_t(2) << 32) + 1, uint64_t(4) << 32);
+        r1.removeRangeClosed((uint64_t(2) << 32) + 1, uint64_t(5) << 32);
+        Roaring64Map r2 =
+            Roaring64Map::bitmapOf(2, uint64_t(1) << 32, uint64_t(2) << 32);
+        assert_true(r1 == r2);
+    }
+    {
+        // min > r1.minimum, max < r1.maximum, no elements between min and max
+        Roaring64Map r1 = Roaring64Map::bitmapOf(
+            3, uint64_t(1) << 32, uint64_t(2) << 32, uint64_t(4) << 32);
+        r1.removeRangeClosed(uint64_t(3) << 32, (uint64_t(3) << 32) + 1);
+        Roaring64Map r2 = Roaring64Map::bitmapOf(
+            3, uint64_t(1) << 32, uint64_t(2) << 32, uint64_t(4) << 32);
+        assert_true(r1 == r2);
+    }
+    {
+        // max < r1.minimum
+        Roaring64Map r1 = Roaring64Map::bitmapOf(
+            3, uint64_t(1) << 32, uint64_t(2) << 32, uint64_t(4) << 32);
+        r1.removeRangeClosed(uint64_t(1), uint64_t(2));
+        Roaring64Map r2 = Roaring64Map::bitmapOf(
+            3, uint64_t(1) << 32, uint64_t(2) << 32, uint64_t(4) << 32);
+        assert_true(r1 == r2);
+    }
+    {
+        // min > r1.maximum
+        Roaring64Map r1 = Roaring64Map::bitmapOf(
+            3, uint64_t(1) << 32, uint64_t(2) << 32, uint64_t(4) << 32);
+        r1.removeRangeClosed(uint64_t(5) << 32, uint64_t(6) << 32);
+        Roaring64Map r2 = Roaring64Map::bitmapOf(
+            3, uint64_t(1) << 32, uint64_t(2) << 32, uint64_t(4) << 32);
+        assert_true(r1 == r2);
+    }
+    {
+        // min > max
+        Roaring64Map r1 = Roaring64Map::bitmapOf(
+            3, uint64_t(1) << 32, uint64_t(2) << 32, uint64_t(4) << 32);
+        r1.removeRangeClosed(uint64_t(2) << 32, uint64_t(1) << 32);
+        Roaring64Map r2 = Roaring64Map::bitmapOf(
+            3, uint64_t(1) << 32, uint64_t(2) << 32, uint64_t(4) << 32);
+        assert_true(r1 == r2);
+    }
+}
+
 DEFINE_TEST(test_cpp_clear_64) {
     Roaring64Map roaring;
 
@@ -920,6 +1132,10 @@ int main() {
         cmocka_unit_test(test_example_cpp_64_false),
         cmocka_unit_test(test_cpp_add_remove_checked),
         cmocka_unit_test(test_cpp_add_remove_checked_64),
+        cmocka_unit_test(test_cpp_add_range),
+        cmocka_unit_test(test_cpp_remove_range),
+        cmocka_unit_test(test_cpp_add_range_64),
+        cmocka_unit_test(test_cpp_remove_range_64),
         cmocka_unit_test(test_run_compression_cpp_64_true),
         cmocka_unit_test(test_run_compression_cpp_64_false),
         cmocka_unit_test(test_run_compression_cpp_true),
diff --git a/tests/roaring_checked.hh b/tests/roaring_checked.hh
index 9c7da3099..b06320199 100644
--- a/tests/roaring_checked.hh
+++ b/tests/roaring_checked.hh
@@ -117,15 +117,17 @@ class Roaring {
         return ans;
     }
 
-    void addRange(const uint64_t x, const uint64_t y)  {
-        plain.addRange(x, y);
+    void addRange(const uint64_t x, const uint64_t y) {
         if (x != y) {  // repeat add_range_closed() cast and bounding logic
-            uint32_t min = static_cast<uint32_t>(x);
-            uint32_t max = static_cast<uint32_t>(y - 1);
-            if (min <= max) {
-                for (uint32_t val = max; val != min - 1; --val)
-                    check.insert(val);
-            }
+            addRangeClosed(x, y - 1);
+        }
+    }
+
+    void addRangeClosed(uint32_t min, uint32_t max) {
+        plain.addRangeClosed(min, max);
+        if (min <= max) {
+            for (uint32_t val = max; val != min - 1; --val)
+                check.insert(val);
         }
     }
 
@@ -148,6 +150,19 @@ class Roaring {
         return ans;
     }
 
+    void removeRange(const uint64_t x, const uint64_t y) {
+        if (x != y) {  // repeat remove_range_closed() cast and bounding logic
+            removeRangeClosed(x, y - 1);
+        }
+    }
+
+    void removeRangeClosed(uint32_t min, uint32_t max) {
+        plain.removeRangeClosed(min, max);
+        if (min <= max) {
+            check.erase(check.lower_bound(min), check.upper_bound(max));
+        }
+    }
+
     uint32_t maximum() const {
         uint32_t ans = plain.maximum();
         assert(check.empty() ? ans == 0 : ans == *check.rbegin());

From efcb83dcdf332f02cde058a48574f5b7b14f73fb Mon Sep 17 00:00:00 2001
From: Soerian Lieve <soerian@fb.com>
Date: Fri, 26 Aug 2022 18:15:10 +0100
Subject: [PATCH 003/162] Check against maxbytes to read the size of the map
 (#384)

Previously, readSafe did not check whether it could read the map size in
the allotted maxbytes. In addition, it did not subtract the map size from
maxbytes when reading.

This fixes the above and adds some deserialization tests using binary
files. The valid files were produced with the script in
tests/cpp_unit_util.cpp. The invalid files were created manually.
---
 cpp/roaring64map.hh                  |  10 ++--
 tests/cpp_unit.cpp                   |  74 ++++++++++++++++++++++++++-
 tests/cpp_unit_util.cpp              |  49 ++++++++++++++++++
 tests/testdata/64map32bitvals.bin    | Bin 0 -> 48 bytes
 tests/testdata/64mapempty.bin        | Bin 0 -> 8 bytes
 tests/testdata/64mapemptyinput.bin   |   0
 tests/testdata/64maphighvals.bin     | Bin 0 -> 470 bytes
 tests/testdata/64mapinvalidsize.bin  | Bin 0 -> 9 bytes
 tests/testdata/64mapkeytoosmall.bin  | Bin 0 -> 11 bytes
 tests/testdata/64mapsizetoosmall.bin | Bin 0 -> 7 bytes
 tests/testdata/64mapspreadvals.bin   | Bin 0 -> 408 bytes
 11 files changed, 125 insertions(+), 8 deletions(-)
 create mode 100644 tests/cpp_unit_util.cpp
 create mode 100644 tests/testdata/64map32bitvals.bin
 create mode 100644 tests/testdata/64mapempty.bin
 create mode 100644 tests/testdata/64mapemptyinput.bin
 create mode 100644 tests/testdata/64maphighvals.bin
 create mode 100644 tests/testdata/64mapinvalidsize.bin
 create mode 100644 tests/testdata/64mapkeytoosmall.bin
 create mode 100644 tests/testdata/64mapsizetoosmall.bin
 create mode 100644 tests/testdata/64mapspreadvals.bin

diff --git a/cpp/roaring64map.hh b/cpp/roaring64map.hh
index 383271b05..65596f838 100644
--- a/cpp/roaring64map.hh
+++ b/cpp/roaring64map.hh
@@ -730,19 +730,17 @@ public:
      * space compared to the portable format (e.g., for very sparse bitmaps).
      */
     static Roaring64Map readSafe(const char *buf, size_t maxbytes) {
+        if (maxbytes < sizeof(uint64_t)) {
+            ROARING_TERMINATE("ran out of bytes");
+        }
         Roaring64Map result;
-        // get map size
         uint64_t map_size;
         std::memcpy(&map_size, buf, sizeof(uint64_t));
         buf += sizeof(uint64_t);
+        maxbytes -= sizeof(uint64_t);
         for (uint64_t lcv = 0; lcv < map_size; lcv++) {
-            // get map key
             if(maxbytes < sizeof(uint32_t)) {
-#if ROARING_EXCEPTIONS
-                throw std::runtime_error("ran out of bytes");
-#else
                 ROARING_TERMINATE("ran out of bytes");
-#endif
             }
             uint32_t key;
             std::memcpy(&key, buf, sizeof(uint32_t));
diff --git a/tests/cpp_unit.cpp b/tests/cpp_unit.cpp
index 09953d9d2..f7be3c922 100644
--- a/tests/cpp_unit.cpp
+++ b/tests/cpp_unit.cpp
@@ -13,9 +13,12 @@
 #include <vector>
 
 
+#include <fstream>
 #include <iostream>
 #include <type_traits>
+#include <vector>
 
+#include "config.h"
 #include "roaring.hh"
 using roaring::Roaring;  // the C++ wrapper class
 
@@ -1036,8 +1039,6 @@ DEFINE_TEST(test_cpp_flip) {
         Roaring r1 = Roaring::bitmapOf(3, 1, 3, 6);
         r1.flip(2, 5);
         Roaring r2 = Roaring::bitmapOf(4, 1, 2, 4, 6);
-        r1.printf();
-        r2.printf();
         assert_true(r1 == r2);
     }
     {
@@ -1120,6 +1121,65 @@ DEFINE_TEST(test_cpp_flip_64) {
     }
 }
 
+// Returns true on success, false on exception.
+bool test64Deserialize(const std::string& filename) {
+    std::ifstream in(TEST_DATA_DIR + filename, std::ios::binary);
+    std::vector<char> buf1(std::istreambuf_iterator<char>(in), {});
+    printf("Reading %lu bytes\n", buf1.size());
+    Roaring64Map roaring;
+#if ROARING_EXCEPTIONS
+    try {
+        roaring = Roaring64Map::readSafe(buf1.data(), buf1.size());
+    } catch (...) {
+        return false;
+    }
+#else
+    roaring = Roaring64Map::readSafe(buf1.data(), buf1.size());
+#endif
+    std::vector<char> buf2(roaring.getSizeInBytes());
+    assert_true(buf1.size() == buf2.size());
+    assert_true(roaring.write(buf2.data()) == buf2.size());
+    for (size_t i = 0; i < buf1.size(); ++i) {
+        assert_true(buf1[i] == buf2[i]);
+    }
+    return true;
+}
+
+// The valid files were created with cpp_unit_util.cpp.
+DEFINE_TEST(test_cpp_deserialize_64_empty) {
+  assert_true(test64Deserialize("64mapempty.bin"));
+}
+
+DEFINE_TEST(test_cpp_deserialize_64_32bit_vals) {
+  assert_true(test64Deserialize("64map32bitvals.bin"));
+}
+
+DEFINE_TEST(test_cpp_deserialize_64_spread_vals) {
+  assert_true(test64Deserialize("64mapspreadvals.bin"));
+}
+
+DEFINE_TEST(test_cpp_deserialize_64_high_vals) {
+  assert_true(test64Deserialize("64maphighvals.bin"));
+}
+
+#if ROARING_EXCEPTIONS
+DEFINE_TEST(test_cpp_deserialize_64_empty_input) {
+  assert_false(test64Deserialize("64mapemptyinput.bin"));
+}
+
+DEFINE_TEST(test_cpp_deserialize_64_size_too_small) {
+  assert_false(test64Deserialize("64mapsizetoosmall.bin"));
+}
+
+DEFINE_TEST(test_cpp_deserialize_64_invalid_size) {
+  assert_false(test64Deserialize("64mapinvalidsize.bin"));
+}
+
+DEFINE_TEST(test_cpp_deserialize_64_key_too_small) {
+  assert_false(test64Deserialize("64mapkeytoosmall.bin"));
+}
+#endif
+
 int main() {
     roaring::misc::tellmeall();
     const struct CMUnitTest tests[] = {
@@ -1148,6 +1208,16 @@ int main() {
         cmocka_unit_test(test_cpp_frozen_64),
         cmocka_unit_test(test_cpp_flip),
         cmocka_unit_test(test_cpp_flip_64),
+        cmocka_unit_test(test_cpp_deserialize_64_empty),
+        cmocka_unit_test(test_cpp_deserialize_64_32bit_vals),
+        cmocka_unit_test(test_cpp_deserialize_64_spread_vals),
+        cmocka_unit_test(test_cpp_deserialize_64_high_vals),
+#if ROARING_EXCEPTIONS
+        cmocka_unit_test(test_cpp_deserialize_64_empty_input),
+        cmocka_unit_test(test_cpp_deserialize_64_size_too_small),
+        cmocka_unit_test(test_cpp_deserialize_64_invalid_size),
+        cmocka_unit_test(test_cpp_deserialize_64_key_too_small),
+#endif
         cmocka_unit_test(issue316),
         cmocka_unit_test(test_issue304),
         cmocka_unit_test(issue_336),
diff --git a/tests/cpp_unit_util.cpp b/tests/cpp_unit_util.cpp
new file mode 100644
index 000000000..7a0b0b553
--- /dev/null
+++ b/tests/cpp_unit_util.cpp
@@ -0,0 +1,49 @@
+#include <fstream>
+#include <vector>
+
+#include "roaring.hh"
+#include "roaring64map.hh"
+
+using namespace roaring;
+
+void writeToFile(const Roaring64Map& roaring, const std::string& filename) {
+    std::vector<char> buf(roaring.getSizeInBytes());
+    roaring.write(buf.data());
+    std::ofstream out(filename, std::ios::binary);
+    out.write(buf.data(), buf.size());
+}
+
+// Utility to create files with valid serialized Roaring64Maps.
+int main() {
+    {
+        Roaring64Map roaring;
+        writeToFile(roaring, "64mapempty.bin");
+    }
+    {
+        Roaring64Map roaring;
+        for (uint32_t v = 0; v < 10; ++v) {
+          roaring.add(v);
+        }
+        writeToFile(roaring, "64map32bitvals.bin");
+    }
+    {
+        Roaring64Map roaring;
+        for (uint64_t high = 0; high < 10; ++high) {
+          for (uint64_t low = 0; low < 10; ++low) {
+            roaring.add((high << 32) + low);
+          }
+        }
+        writeToFile(roaring, "64mapspreadvals.bin");
+    }
+    {
+        Roaring64Map roaring;
+        uint64_t max32 = (std::numeric_limits<uint32_t>::max)();
+        for (uint64_t high = max32 - 10; high <= max32; ++high) {
+          for (uint64_t low = max32 - 10; low <= max32; ++low) {
+            roaring.add((high << 32) + low);
+          }
+        }
+        writeToFile(roaring, "64maphighvals.bin");
+    }
+    return EXIT_SUCCESS;
+}
diff --git a/tests/testdata/64map32bitvals.bin b/tests/testdata/64map32bitvals.bin
new file mode 100644
index 0000000000000000000000000000000000000000..475b894417e44cff61d8810057fc1530cef05718
GIT binary patch
literal 48
ocmZQ%KmaQP1_nkjmy<yNOfxbtF)%Z*Ft9SPF|admFmN&e021r~vj6}9

literal 0
HcmV?d00001

diff --git a/tests/testdata/64mapempty.bin b/tests/testdata/64mapempty.bin
new file mode 100644
index 0000000000000000000000000000000000000000..1b1cb4d44c57c2d7a5122870fa6ac3e62ff7e94e
GIT binary patch
literal 8
KcmZQzfB*mh2mk>9

literal 0
HcmV?d00001

diff --git a/tests/testdata/64mapemptyinput.bin b/tests/testdata/64mapemptyinput.bin
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/testdata/64maphighvals.bin b/tests/testdata/64maphighvals.bin
new file mode 100644
index 0000000000000000000000000000000000000000..54abac71f271227c74a16f26ca7b0b0eae55bd8a
GIT binary patch
literal 470
zcma*jyA8k~6hqNROPY*A&HgL#hupj<3)qci=~TY#PXTf=3qB#*?OmueI)lk#b9h`;
aSvFaASq@oFSuR;_Ssqz_pK8xL6y*yyRW=F$

literal 0
HcmV?d00001

diff --git a/tests/testdata/64mapinvalidsize.bin b/tests/testdata/64mapinvalidsize.bin
new file mode 100644
index 0000000000000000000000000000000000000000..48a2754f69fcd1d54f423ee5b8bae5e2f8b0b91b
GIT binary patch
literal 9
McmZQz00TxY000#L3;+NC

literal 0
HcmV?d00001

diff --git a/tests/testdata/64mapkeytoosmall.bin b/tests/testdata/64mapkeytoosmall.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3a768cf467e9a5e39ca199a3cd093a52a59490f1
GIT binary patch
literal 11
OcmZQz00Tw_MlJvV8vqRe

literal 0
HcmV?d00001

diff --git a/tests/testdata/64mapsizetoosmall.bin b/tests/testdata/64mapsizetoosmall.bin
new file mode 100644
index 0000000000000000000000000000000000000000..cd2112d98f7d268da7add022b82f0eac791bb279
GIT binary patch
literal 7
McmZQz00IFn001!n8vp<R

literal 0
HcmV?d00001

diff --git a/tests/testdata/64mapspreadvals.bin b/tests/testdata/64mapspreadvals.bin
new file mode 100644
index 0000000000000000000000000000000000000000..83c72f6ba32721c6756bc78cba163441c92b16bd
GIT binary patch
literal 408
zcma*eyA8lF3<J^g-%6EHQfL3cVyH=bY6yZ)7yrEvl&ry>S*SERgUMnOxf@r{zi~~v
PF5QrBO1Grj(uH&%!J7vn

literal 0
HcmV?d00001


From 693997497367645268568c4aa6fdfb05039c8f48 Mon Sep 17 00:00:00 2001
From: Zachary Dremann <dremann@gmail.com>
Date: Fri, 26 Aug 2022 16:25:41 -0400
Subject: [PATCH 004/162] Introduce `roaring_bitmap_*_bulk` operations (#363)

* implement bitmap contains multi

* typo

* fix commit and repair implementations

* fix check result after call ra_advance, check the high 16 bits instean.

* resolve comment and add document

* add unit test for contains multi

* add benchmark for contains_multi

* fix unittest

* fix unittest

* fix unittest

* add const to array length

* fix unittest

* add static inline declaration

* remove declaration in .c

* update codes via comments

* Applying various fixes.

* Add roaring_bitmap_add_bulk

roaring_bitmap_add_bulk is a generalization of roaring_bitmap_add_many,
caching the container for the last inserted item, and avoiding lookiing
the container up if another item is inserted in the same container.

Use the new function in the implementation of roaring_bitmap_add_many
and roaring_bitmap_of

* Add a test to add in bulk

* Allow `roaring_bitmap_add_many` to be used with an unaligned ptr

* Use the correct type for the container pointer in the bulk context
struct

* TMP: trying something

* Fix RDTSC_FINAL for CLOCK_THREAD_CPUTIME_ID

* Add a benchmark for add_bulk

* clang-format

* Don't load the whole context

* Reorder tests

* Improvements based on assembly output

* Inline

* Go back to using pointers into context

* Add docs for optimization

* Check the removals in the unit test

* clang-format

* Be smarter about benchmark clocks

* Remove initialized bool

* Posix should always have CLOCK_REALTIME

* Posix is a lie

* Implement a bulk contains function

* Be more fair to add_many, don't count time building the array

* Remove roaring_bitmap_contains_multi, use roaring_bitmap_contains_bulk

* Actually run bulk add unit test

* Fix incorrect behavior of roaring_bitmap_contains_bulk

* Fix compliation as c++

* Add extra logging for error only on windows

* Check if tests are being built with NDEBUG

* Use cmocka's `assert_true`, which is always evaluated

* Add more documentation to the `roaring_bulk_context_t` type

Co-authored-by: arthur <arthurkiller21@gmail.com>
Co-authored-by: Daniel Lemire <lemire@gmail.com>
---
 benchmarks/CMakeLists.txt            |   1 +
 benchmarks/add_benchmark.c           |  24 ++++-
 benchmarks/benchmark.h               |  72 +++++---------
 benchmarks/containsmulti_benchmark.c | 121 ++++++++++++++++++++++++
 include/roaring/roaring.h            |  59 +++++++++++-
 include/roaring/roaring_array.h      |   4 +-
 src/roaring.c                        | 134 +++++++++++++++++++--------
 src/roaring_array.c                  |   5 +-
 tests/toplevel_unit.c                |  77 ++++++++++++++-
 9 files changed, 394 insertions(+), 103 deletions(-)
 create mode 100644 benchmarks/containsmulti_benchmark.c

diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt
index 0d5fabcaa..4a3eb0e8e 100644
--- a/benchmarks/CMakeLists.txt
+++ b/benchmarks/CMakeLists.txt
@@ -12,6 +12,7 @@ if(NOT WIN32)
     add_c_benchmark(intersect_range_benchmark)
     target_link_libraries(add_benchmark m)
     add_c_benchmark(frozen_benchmark)
+    add_c_benchmark(containsmulti_benchmark)
 endif()
 add_c_benchmark(bitset_container_benchmark)
 add_c_benchmark(array_container_benchmark)
diff --git a/benchmarks/add_benchmark.c b/benchmarks/add_benchmark.c
index cc143e3dd..574b285c8 100644
--- a/benchmarks/add_benchmark.c
+++ b/benchmarks/add_benchmark.c
@@ -96,15 +96,33 @@ void run_test(uint32_t spanlen, uint32_t intvlen, double density, order_t order)
     printf("          %6.1f\n", array_min(results, num_passes));
 
     printf("  roaring_bitmap_add_many():");
+    for (int p = 0; p < num_passes; p++) {
+        roaring_bitmap_t *r = roaring_bitmap_create();
+        uint32_t values[intvlen * count];
+        for (int64_t i = 0; i < count; i++) {
+            for (uint32_t j = 0; j < intvlen; j++) {
+                values[i * intvlen + j] = offsets[i] + j;
+            }
+        }
+        RDTSC_START(cycles_start);
+        for (int64_t i = 0; i < count; i++) {
+            roaring_bitmap_add_many(r, intvlen, values + (i * intvlen));
+        }
+        RDTSC_FINAL(cycles_final);
+        results[p] = (cycles_final - cycles_start) * 1.0 / count / intvlen;
+        roaring_bitmap_free(r);
+    }
+    printf("     %6.1f\n", array_min(results, num_passes));
+
+    printf("  roaring_bitmap_add_bulk():");
     for (int p = 0; p < num_passes; p++) {
         roaring_bitmap_t *r = roaring_bitmap_create();
         RDTSC_START(cycles_start);
-        uint32_t values[intvlen];
+        roaring_bulk_context_t context = {0};
         for (int64_t i = 0; i < count; i++) {
             for (uint32_t j = 0; j < intvlen; j++) {
-                values[j] = offsets[i] + j;
+                roaring_bitmap_add_bulk(r, &context, offsets[i] + j);
             }
-            roaring_bitmap_add_many(r, intvlen, values);
         }
         RDTSC_FINAL(cycles_final);
         results[p] = (cycles_final - cycles_start) * 1.0 / count / intvlen;
diff --git a/benchmarks/benchmark.h b/benchmarks/benchmark.h
index fee613fd9..e3a6ad166 100644
--- a/benchmarks/benchmark.h
+++ b/benchmarks/benchmark.h
@@ -37,69 +37,39 @@
         (cycles) = ((uint64_t)cyc_high << 32) | cyc_low;                      \
     } while (0)
 
-#elif defined(__linux__) && defined(__GLIBC__)
-
-#include <time.h>
-#ifdef CLOCK_THREAD_CPUTIME_ID
-#define RDTSC_START(cycles) \
-  do { \
-    struct timespec ts; \
-    clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts); \
-    cycles = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; \
-  } while (0)
-
-#define RDTSC_FINAL(cycles) \
-  do { \
-    struct timespec ts; \
-    clock_gettime(CLOCK_REALTIME, &ts); \
-    cycles = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; \
-  } while (0)
-
-#elif defined(CLOCK_REALTIME)  // #ifdef CLOCK_THREAD_CPUTIME_ID
-#define RDTSC_START(cycles) \
-  do { \
-    struct timespec ts; \
-    clock_gettime(CLOCK_REALTIME, &ts); \
-    cycles = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; \
-  } while (0)
-
-#define RDTSC_FINAL(cycles) \
-  do { \
-    struct timespec ts; \
-    clock_gettime(CLOCK_REALTIME, &ts); \
-    cycles = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; \
-  } while (0)
-
-#else
-#define RDTSC_START(cycles) \
-  do { \
-    cycles = clock(); \
-  } while(0)
+#else  // defined(CROARING_IS_X64) && defined(ROARING_INLINE_ASM)
+
+#if defined(CLOCK_THREAD_CPUTIME_ID)
+#define RDTSC_CLOCK_ID CLOCK_THREAD_CPUTIME_ID
+#elif defined(CLOCK_MONOTONIC)
+#define RDTSC_CLOCK_ID CLOCK_MONOTONIC
+#elif defined(CLOCK_REALTIME)
+#define RDTSC_CLOCK_ID CLOCK_REALTIME
+#endif
 
-#define RDTSC_FINAL(cycles) \
-  do { \
-    cycles = clock(); \
-  } while(0)
+#if defined(RDTSC_CLOCK_ID)
+#define RDTSC_START(cycles)                                     \
+    do {                                                        \
+        struct timespec ts;                                     \
+        clock_gettime(RDTSC_CLOCK_ID, &ts);                     \
+        cycles = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; \
+    } while (0)
 
-#endif // #ifdef CLOCK_THREAD_CPUTIME_ID
+#define RDTSC_FINAL(cycles) RDTSC_START(cycles)
 
-#else
+#else  // defined(RDTSC_CLOCK_ID)
 
 /**
-* Other architectures do not support rdtsc ?
+* Fall back to the `clock` function
 */
-#include <time.h>
-
 #define RDTSC_START(cycles) \
     do {                    \
         cycles = clock();   \
     } while (0)
 
-#define RDTSC_FINAL(cycles) \
-    do {                    \
-        cycles = clock();   \
-    } while (0)
+#define RDTSC_FINAL(cycles) RDTSC_START(cycles)
 
+#endif
 #endif
 
 /*
diff --git a/benchmarks/containsmulti_benchmark.c b/benchmarks/containsmulti_benchmark.c
new file mode 100644
index 000000000..e92d82e5a
--- /dev/null
+++ b/benchmarks/containsmulti_benchmark.c
@@ -0,0 +1,121 @@
+#define _GNU_SOURCE
+#include <roaring/roaring.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include "benchmark.h"
+#include "random.h"
+#include "numbersfromtextfiles.h"
+
+void contains_multi_via_contains(roaring_bitmap_t* bm, const uint32_t* values, bool* results, const size_t count) {
+    for (size_t i = 0; i < count; ++i) {
+        results[i] = roaring_bitmap_contains(bm, values[i]);
+    }
+}
+
+void contains_multi_bulk(roaring_bitmap_t* bm, const uint32_t* values, bool* results, const size_t count) {
+    roaring_bulk_context_t context = {0};
+    for (size_t i = 0; i < count; ++i) {
+        results[i] = roaring_bitmap_contains_bulk(bm, &context, values[i]);
+    }
+}
+
+int compare_uint32(const void* a, const void* b) {
+    uint32_t arg1 = *(const uint32_t*)a;
+    uint32_t arg2 = *(const uint32_t*)b;
+    if (arg1 < arg2) return -1;
+    if (arg1 > arg2) return 1;
+    return 0;
+}
+
+int main(int argc, char* argv[]) {
+    (void)&read_all_integer_files; // suppress unused warning
+
+    if (argc < 2) {
+        printf("Usage: %s <comma_separated_integers_file> ...\n", argv[0]);
+        printf("Example: %s ~/CRoaring/benchmarks/realdata/weather_sept_85/*\n", argv[0]);
+        return 1;
+    }
+
+    size_t fields = argc-1;
+    uint32_t* values[argc];
+    size_t count[argc];
+
+    roaring_bitmap_t* bm = roaring_bitmap_create();
+    for (int i = 1; i < argc; i++) {
+        size_t t_count = 0;
+        uint32_t* t_values = read_integer_file(argv[i], &t_count);
+        if (t_count == 0) {
+            printf("No integers found in %s\n", argv[i]);
+            return 1;
+        }
+        roaring_bitmap_add_many(bm, t_count, t_values);
+
+        shuffle_uint32(t_values, t_count);
+
+        values[i-1] = t_values;
+        count[i-1] = t_count;
+    }
+    //roaring_bitmap_run_optimize(bm);
+
+    printf("Data:\n");
+    printf("  cardinality: %"PRIu64"\n", roaring_bitmap_get_cardinality(bm));
+    printf("  buckets: %d\n", (int)bm->high_low_container.size);
+    printf("  range: %"PRIu32"-%"PRIu32"\n", roaring_bitmap_minimum(bm) >> 16, roaring_bitmap_maximum(bm) >> 16);
+
+    const int num_passes = 10;
+    printf("Cycles/element: %d\n", num_passes);
+    uint64_t cycles_start, cycles_final;
+
+    printf("                          roaring_bitmap_contains:");
+    for (int p = 0; p < num_passes; p++) {
+        bool result[count[p]];
+        RDTSC_START(cycles_start);
+        contains_multi_via_contains(bm, values[p], result, count[p]);
+        RDTSC_FINAL(cycles_final);
+        printf(" %10f", (cycles_final - cycles_start) * 1.0 / count[p]);
+    }
+    printf("\n");
+
+    printf("                     roaring_bitmap_contains_bulk:");
+    for (int p = 0; p < num_passes; p++) {
+        bool result[count[p]];
+        RDTSC_START(cycles_start);
+        contains_multi_bulk(bm, values[p], result, count[p]);
+        RDTSC_FINAL(cycles_final);
+        printf(" %10f", (cycles_final - cycles_start) * 1.0 / count[p]);
+    }
+    printf("\n");
+
+    // sort input array
+    for (size_t i = 0; i < fields; ++i) {
+        qsort(values[i], count[i], sizeof(uint32_t), compare_uint32);
+    }
+
+    printf("        roaring_bitmap_contains with sorted input:");
+    for (int p = 0; p < num_passes; p++) {
+        bool result[count[p]];
+        RDTSC_START(cycles_start);
+        contains_multi_via_contains(bm, values[p], result, count[p]);
+        RDTSC_FINAL(cycles_final);
+        printf(" %10f", (cycles_final - cycles_start) * 1.0 / count[p]);
+    }
+    printf("\n");
+
+    printf("   roaring_bitmap_contains_bulk with sorted input:");
+    for (int p = 0; p < num_passes; p++) {
+        bool result[count[p]];
+        RDTSC_START(cycles_start);
+        contains_multi_bulk(bm, values[p], result, count[p]);
+        RDTSC_FINAL(cycles_final);
+        printf(" %10f", (cycles_final - cycles_start) * 1.0 / count[p]);
+    }
+    printf("\n");
+
+    roaring_bitmap_free(bm);
+    for (size_t i = 0; i < fields; ++i) {
+        free(values[i]);
+    }
+    return 0;
+}
diff --git a/include/roaring/roaring.h b/include/roaring/roaring.h
index e82d05b1b..415152445 100644
--- a/include/roaring/roaring.h
+++ b/include/roaring/roaring.h
@@ -257,9 +257,48 @@ void roaring_bitmap_andnot_inplace(roaring_bitmap_t *r1,
  */
 void roaring_bitmap_free(const roaring_bitmap_t *r);
 
+/**
+ * A bit of context usable with `roaring_bitmap_*_bulk()` functions
+ *
+ * Should be initialized with `{0}` (or `memset()` to all zeros).
+ * Callers should treat it as an opaque type.
+ *
+ * A context may only be used with a single bitmap
+ * (unless re-initialized to zero), and any modification to a bitmap
+ * (other than modifications performed with `_bulk()` functions with the context
+ * passed) will invalidate any contexts associated with that bitmap.
+ */
+typedef struct roaring_bulk_context_s {
+    ROARING_CONTAINER_T *container;
+    int idx;
+    uint16_t key;
+    uint8_t typecode;
+} roaring_bulk_context_t;
+
+/**
+ * Add an item, using context from a previous insert for speed optimization.
+ *
+ * `context` will be used to store information between calls to make bulk
+ * operations faster. `*context` should be zero-initialized before the first
+ * call to this function.
+ *
+ * Modifying the bitmap in any way (other than `-bulk` suffixed functions)
+ * will invalidate the stored context, calling this function with a non-zero
+ * context after doing any modification invokes undefined behavior.
+ *
+ * In order to exploit this optimization, the caller should call this function
+ * with values with the same "key" (high 16 bits of the value) consecutively.
+ */
+void roaring_bitmap_add_bulk(roaring_bitmap_t *r,
+                             roaring_bulk_context_t *context, uint32_t val);
+
 /**
  * Add value n_args from pointer vals, faster than repeatedly calling
  * `roaring_bitmap_add()`
+ *
+ * In order to exploit this optimization, the caller should attempt to keep
+ * values with the same "key" (high 16 bits of the value) as consecutive
+ * elements in `vals`
  */
 void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args,
                              const uint32_t *vals);
@@ -335,6 +374,25 @@ bool roaring_bitmap_contains_range(const roaring_bitmap_t *r,
                                    uint64_t range_start,
                                    uint64_t range_end);
 
+/**
+ * Check if an items is present, using context from a previous insert for speed
+ * optimization.
+ *
+ * `context` will be used to store information between calls to make bulk
+ * operations faster. `*context` should be zero-initialized before the first
+ * call to this function.
+ *
+ * Modifying the bitmap in any way (other than `-bulk` suffixed functions)
+ * will invalidate the stored context, calling this function with a non-zero
+ * context after doing any modification invokes undefined behavior.
+ *
+ * In order to exploit this optimization, the caller should call this function
+ * with values with the same "key" (high 16 bits of the value) consecutively.
+ */
+bool roaring_bitmap_contains_bulk(const roaring_bitmap_t *r,
+                                  roaring_bulk_context_t *context,
+                                  uint32_t val);
+
 /**
  * Get the cardinality of the bitmap (number of elements).
  */
@@ -814,4 +872,3 @@ uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it,
         using namespace ::roaring::api;
     #endif
 #endif
-
diff --git a/include/roaring/roaring_array.h b/include/roaring/roaring_array.h
index fd201662b..24ce7cad2 100644
--- a/include/roaring/roaring_array.h
+++ b/include/roaring/roaring_array.h
@@ -93,7 +93,9 @@ inline container_t *ra_get_container_at_index(
 /**
  * Retrieves the key at index i
  */
-uint16_t ra_get_key_at_index(const roaring_array_t *ra, uint16_t i);
+inline uint16_t ra_get_key_at_index(const roaring_array_t *ra, uint16_t i) {
+    return ra->keys[i];
+}
 
 /**
  * Add a new key-value pair at index i
diff --git a/src/roaring.c b/src/roaring.c
index 303f727c1..cc717bb29 100644
--- a/src/roaring.c
+++ b/src/roaring.c
@@ -87,46 +87,91 @@ bool roaring_bitmap_init_with_capacity(roaring_bitmap_t *r, uint32_t cap) {
     return ra_init_with_capacity(&r->high_low_container, cap);
 }
 
+static inline void add_bulk_impl(roaring_bitmap_t *r,
+                                 roaring_bulk_context_t *context,
+                                 uint32_t val) {
+    uint16_t key = val >> 16;
+    if (context->container == NULL || context->key != key) {
+        uint8_t typecode;
+        int idx;
+        context->container = containerptr_roaring_bitmap_add(
+            r, val, &typecode, &idx);
+        context->typecode = typecode;
+        context->idx = idx;
+        context->key = key;
+    } else {
+        // no need to seek the container, it is at hand
+        // because we already have the container at hand, we can do the
+        // insertion directly, bypassing the roaring_bitmap_add call
+        uint8_t new_typecode;
+        container_t *container2 = container_add(
+            context->container, val & 0xFFFF, context->typecode, &new_typecode);
+        if (container2 != context->container) {
+            // rare instance when we need to change the container type
+            container_free(context->container, context->typecode);
+            ra_set_container_at_index(&r->high_low_container, context->idx,
+                                      container2, new_typecode);
+            context->typecode = new_typecode;
+            context->container = container2;
+        }
+    }
+}
 
 void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args,
                              const uint32_t *vals) {
-    container_t *container = NULL;  // hold value of last container touched
-    uint8_t typecode = 0;    // typecode of last container touched
-    uint32_t prev = 0;       // previous valued inserted
-    size_t i = 0;            // index of value
-    int containerindex = 0;
-    if (n_args == 0) return;
     uint32_t val;
-    memcpy(&val, vals + i, sizeof(val));
-    container =
-        containerptr_roaring_bitmap_add(r, val, &typecode, &containerindex);
-    prev = val;
-    i++;
-    for (; i < n_args; i++) {
-        memcpy(&val, vals + i, sizeof(val));
-        if (((prev ^ val) >> 16) ==
-            0) {  // no need to seek the container, it is at hand
-            // because we already have the container at hand, we can do the
-            // insertion
-            // automatically, bypassing the roaring_bitmap_add call
-            uint8_t newtypecode = typecode;
-            container_t *container2 =
-                container_add(container, val & 0xFFFF, typecode, &newtypecode);
-            if (container2 != container) {  // rare instance when we need to
-                                            // change the container type
-                container_free(container, typecode);
-                ra_set_container_at_index(&r->high_low_container,
-                                          containerindex, container2,
-                                          newtypecode);
-                typecode = newtypecode;
-                container = container2;
-            }
-        } else {
-            container = containerptr_roaring_bitmap_add(r, val, &typecode,
-                                                        &containerindex);
+    const uint32_t *start = vals;
+    const uint32_t *end = vals + n_args;
+    const uint32_t *current_val = start;
+
+    if (n_args == 0) {
+        return;
+    }
+
+    uint8_t typecode;
+    int idx;
+    container_t *container;
+    val = *current_val;
+    container = containerptr_roaring_bitmap_add(r, val, &typecode, &idx);
+    roaring_bulk_context_t context = {container, idx, (uint16_t)(val >> 16), typecode};
+
+    for (; current_val != end; current_val++) {
+        memcpy(&val, current_val, sizeof(val));
+        add_bulk_impl(r, &context, val);
+    }
+}
+
+void roaring_bitmap_add_bulk(roaring_bitmap_t *r,
+                             roaring_bulk_context_t *context, uint32_t val) {
+    add_bulk_impl(r, context, val);
+}
+
+bool roaring_bitmap_contains_bulk(const roaring_bitmap_t *r,
+                                  roaring_bulk_context_t *context,
+                                  uint32_t val)
+{
+    uint16_t key = val >> 16;
+    if (context->container == NULL || context->key != key) {
+        int32_t start_idx = -1;
+        if (context->container != NULL && context->key < key) {
+            start_idx = context->idx;
+        }
+        int idx = ra_advance_until(&r->high_low_container, key, start_idx);
+        if (idx == ra_get_size(&r->high_low_container)) {
+            return false;
+        }
+        uint8_t typecode;
+        context->container = ra_get_container_at_index(&r->high_low_container, idx, &typecode);
+        context->typecode = typecode;
+        context->idx = idx;
+        context->key = ra_get_key_at_index(&r->high_low_container, idx);
+        // ra_advance_until finds the next key >= the target, we found a later container.
+        if (context->key != key) {
+            return false;
         }
-        prev = val;
     }
+    // context is now set up
+    return container_contains(context->container, val & 0xFFFF, context->typecode);
 }
 
 roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals) {
@@ -139,11 +184,12 @@ roaring_bitmap_t *roaring_bitmap_of(size_t n_args, ...) {
     // todo: could be greatly optimized but we do not expect this call to ever
     // include long lists
     roaring_bitmap_t *answer = roaring_bitmap_create();
+    roaring_bulk_context_t context = {0};
     va_list ap;
     va_start(ap, n_args);
-    for (size_t i = 1; i <= n_args; i++) {
+    for (size_t i = 0; i < n_args; i++) {
         uint32_t val = va_arg(ap, uint32_t);
-        roaring_bitmap_add(answer, val);
+        roaring_bitmap_add_bulk(answer, &context, val);
     }
     va_end(ap);
     return answer;
@@ -1413,14 +1459,24 @@ size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r,
 
 roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf) {
     const char *bufaschar = (const char *)buf;
-    if (*(const unsigned char *)buf == CROARING_SERIALIZATION_ARRAY_UINT32) {
+    if (bufaschar[0] == CROARING_SERIALIZATION_ARRAY_UINT32) {
         /* This looks like a compressed set of uint32_t elements */
         uint32_t card;
         memcpy(&card, bufaschar + 1, sizeof(uint32_t));
         const uint32_t *elems =
             (const uint32_t *)(bufaschar + 1 + sizeof(uint32_t));
-
-        return roaring_bitmap_of_ptr(card, elems);
+        roaring_bitmap_t *bitmap = roaring_bitmap_create();
+        if (bitmap == NULL) {
+            return NULL;
+        }
+        roaring_bulk_context_t context = {0};
+        for (uint32_t i = 0; i < card; i++) {
+            // elems may not be aligned, read with memcpy
+            uint32_t elem;
+            memcpy(&elem, elems + i, sizeof(elem));
+            roaring_bitmap_add_bulk(bitmap, &context, elem);
+        }
+        return bitmap;
     } else if (bufaschar[0] == CROARING_SERIALIZATION_CONTAINER) {
         return roaring_bitmap_portable_deserialize(bufaschar + 1);
     } else
diff --git a/src/roaring_array.c b/src/roaring_array.c
index 2e1b2c671..5151e7062 100644
--- a/src/roaring_array.c
+++ b/src/roaring_array.c
@@ -319,9 +319,8 @@ extern inline container_t *ra_get_container_at_index(
     const roaring_array_t *ra, uint16_t i,
     uint8_t *typecode);
 
-uint16_t ra_get_key_at_index(const roaring_array_t *ra, uint16_t i) {
-    return ra->keys[i];
-}
+extern inline uint16_t ra_get_key_at_index(const roaring_array_t *ra,
+                                           uint16_t i);
 
 extern inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x);
 
diff --git a/tests/toplevel_unit.c b/tests/toplevel_unit.c
index 3a4de4b3b..011cc4011 100644
--- a/tests/toplevel_unit.c
+++ b/tests/toplevel_unit.c
@@ -57,6 +57,63 @@ DEFINE_TEST(range_contains) {
     roaring_bitmap_free(bm);
 }
 
+DEFINE_TEST(contains_bulk) {
+    roaring_bitmap_t *bm = roaring_bitmap_create();
+    roaring_bulk_context_t context = {0};
+
+    // Ensure checking an empty bitmap is okay
+    assert_true(!roaring_bitmap_contains_bulk(bm, &context, 0));
+    assert_true(!roaring_bitmap_contains_bulk(bm, &context, 0xFFFFFFFF));
+
+    // create RLE container from [0, 1000]
+    roaring_bitmap_add_range_closed(bm, 0, 1000);
+
+    // add array container from 77000
+    for (uint32_t i = 77000; i < 87000; i+=2) {
+        roaring_bitmap_add(bm, i);
+    }
+    // add bitset container from 132000
+    for (uint32_t i = 132000; i < 140000; i+=2) {
+        roaring_bitmap_add(bm, i);
+    }
+
+    roaring_bitmap_add(bm, UINT32_MAX);
+
+    uint32_t values[] = {
+      1000,   // 1
+      1001,   // 0
+      77000,  // 1
+      77001,  // 0
+      77002,  // 1
+      1002,  // 0
+      132000, // 1
+      132001, // 0
+      132002, // 1
+      77003,  // 0
+      UINT32_MAX, // 1
+      UINT32_MAX - 1, // 0
+    };
+    size_t test_count = sizeof(values) / sizeof(values[0]);
+
+    for (size_t i = 0; i < test_count; i++) {
+        roaring_bulk_context_t empty_context = {0};
+        bool expected_contains = roaring_bitmap_contains(bm, values[i]);
+        assert_true(expected_contains == roaring_bitmap_contains_bulk(bm, &empty_context, values[i]));
+        assert_true(expected_contains == roaring_bitmap_contains_bulk(bm, &context, values[i]));
+
+        if (expected_contains) {
+            assert_int_equal(context.key, values[i] >> 16);
+        }
+        if (context.container != NULL) {
+            assert_in_range(context.idx, 0, bm->high_low_container.size - 1);
+            assert_ptr_equal(context.container, bm->high_low_container.containers[context.idx]);
+            assert_int_equal(context.key, bm->high_low_container.keys[context.idx]);
+            assert_int_equal(context.typecode, bm->high_low_container.typecodes[context.idx]);
+        }
+    }
+    roaring_bitmap_free(bm);
+}
+
 DEFINE_TEST(is_really_empty) {
     roaring_bitmap_t *bm = roaring_bitmap_create();
     assert_true(roaring_bitmap_is_empty(bm));
@@ -94,10 +151,6 @@ void can_copy_empty(bool copy_on_write) {
     roaring_bitmap_free(bm2);
 }
 
-
-
-
-
 bool check_serialization(roaring_bitmap_t *bitmap) {
     const int32_t size = roaring_bitmap_portable_size_in_bytes(bitmap);
     char *data = (char *)malloc(size);
@@ -109,7 +162,6 @@ bool check_serialization(roaring_bitmap_t *bitmap) {
     return ret;
 }
 
-
 DEFINE_TEST(issue245) {
     roaring_bitmap_t *bitmap = roaring_bitmap_create();
     const uint32_t targetEntries = 2048;
@@ -888,6 +940,19 @@ DEFINE_TEST(test_addremove) {
     roaring_bitmap_free(bm);
 }
 
+DEFINE_TEST(test_addremove_bulk) {
+    roaring_bitmap_t *bm = roaring_bitmap_create();
+    roaring_bulk_context_t context = {0};
+    for (uint32_t value = 33057; value < 147849; value += 8) {
+        roaring_bitmap_add_bulk(bm, &context, value);
+    }
+    for (uint32_t value = 33057; value < 147849; value += 8) {
+        assert_true(roaring_bitmap_remove_checked(bm, value));
+    }
+    assert_true(roaring_bitmap_is_empty(bm));
+    roaring_bitmap_free(bm);
+}
+
 DEFINE_TEST(test_addremoverun) {
     roaring_bitmap_t *bm = roaring_bitmap_create();
     for (uint32_t value = 33057; value < 147849; value += 8) {
@@ -4182,6 +4247,7 @@ int main() {
         cmocka_unit_test(issue208),
         cmocka_unit_test(issue208b),
         cmocka_unit_test(range_contains),
+        cmocka_unit_test(contains_bulk),
         cmocka_unit_test(inplaceorwide),
         cmocka_unit_test(test_contains_range),
         cmocka_unit_test(check_range_contains_from_end),
@@ -4206,6 +4272,7 @@ int main() {
         cmocka_unit_test(test_maximum_minimum),
         cmocka_unit_test(test_stats),
         cmocka_unit_test(test_addremove),
+        cmocka_unit_test(test_addremove_bulk),
         cmocka_unit_test(test_addremoverun),
         cmocka_unit_test(test_basic_add),
         cmocka_unit_test(test_remove_withrun),

From d56295437c33f3651a7c7d0977d992899da3ea55 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <lemire@gmail.com>
Date: Fri, 26 Aug 2022 16:27:05 -0400
Subject: [PATCH 005/162] Preparing new release

---
 CMakeLists.txt                    | 6 +++---
 include/roaring/roaring_version.h | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3d386aeab..007d911b6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -17,10 +17,10 @@ if(CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_C_COMPILER_VERSION VERSIO
 endif()
 set(ROARING_LIB_NAME roaring)
 set(PROJECT_VERSION_MAJOR 0)
-set(PROJECT_VERSION_MINOR 6)
+set(PROJECT_VERSION_MINOR 7)
 set(PROJECT_VERSION_PATCH 0)
-set(ROARING_LIB_VERSION "0.6.0" CACHE STRING "Roaring library version")
-set(ROARING_LIB_SOVERSION "4" CACHE STRING "Roaring library soversion")
+set(ROARING_LIB_VERSION "0.7.0" CACHE STRING "Roaring library version")
+set(ROARING_LIB_SOVERSION "5" CACHE STRING "Roaring library soversion")
 
 option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON)
 if(NOT ROARING_EXCEPTIONS)
diff --git a/include/roaring/roaring_version.h b/include/roaring/roaring_version.h
index 8b37799c6..fe719d5f5 100644
--- a/include/roaring/roaring_version.h
+++ b/include/roaring/roaring_version.h
@@ -1,10 +1,10 @@
 // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand 
 #ifndef ROARING_INCLUDE_ROARING_VERSION 
 #define ROARING_INCLUDE_ROARING_VERSION 
-#define ROARING_VERSION "0.6.0"
+#define ROARING_VERSION "0.7.0"
 enum { 
     ROARING_VERSION_MAJOR = 0,
-    ROARING_VERSION_MINOR = 6,
+    ROARING_VERSION_MINOR = 7,
     ROARING_VERSION_REVISION = 0
 }; 
 #endif // ROARING_INCLUDE_ROARING_VERSION 

From f73706df96d428f1a5e1deb175082b4474c03a14 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <lemire@gmail.com>
Date: Fri, 26 Aug 2022 16:31:42 -0400
Subject: [PATCH 006/162] Update vs17-ci.yml

---
 .github/workflows/vs17-ci.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/vs17-ci.yml b/.github/workflows/vs17-ci.yml
index cc5b8451b..8d6e0d65e 100644
--- a/.github/workflows/vs17-ci.yml
+++ b/.github/workflows/vs17-ci.yml
@@ -12,7 +12,7 @@ jobs:
         include:
           - {gen: Visual Studio 17 2022, arch: Win32}
           - {gen: Visual Studio 17 2022, arch: x64}
-   steps:
+    steps:
       - name: checkout
         uses: actions/checkout@v2
       - name: Configure
@@ -24,4 +24,4 @@ jobs:
       - name: Run basic tests
         run: |
           cd build
-          ctest -C Release --output-on-failure 
\ No newline at end of file
+          ctest -C Release --output-on-failure 

From 74345633dcb55415e1c12307b90bf70066c0a8c9 Mon Sep 17 00:00:00 2001
From: Zachary Dremann <dremann@gmail.com>
Date: Mon, 29 Aug 2022 11:14:38 -0400
Subject: [PATCH 007/162] Use a single binary search for array contains_range
 (#385)

* Use a single binary search for array contains_range

* Correct logic
---
 include/roaring/containers/array.h | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/include/roaring/containers/array.h b/include/roaring/containers/array.h
index 47bd93185..758703569 100644
--- a/include/roaring/containers/array.h
+++ b/include/roaring/containers/array.h
@@ -369,18 +369,24 @@ void array_container_offset(const array_container_t *c,
 //* Check whether a range of values from range_start (included) to range_end (excluded) is present. */
 static inline bool array_container_contains_range(const array_container_t *arr,
                                                     uint32_t range_start, uint32_t range_end) {
-
+    const int32_t range_count = range_end - range_start;
     const uint16_t rs_included = range_start;
     const uint16_t re_included = range_end - 1;
 
-    const uint16_t *carr = (const uint16_t *) arr->array;
-
-    const int32_t start = advanceUntil(carr, -1, arr->cardinality, rs_included);
-    const int32_t end = advanceUntil(carr, start - 1, arr->cardinality, re_included);
+    // Empty range is always included
+    if (range_count <= 0) {
+        return true;
+    }
+    if (range_count > arr->cardinality) {
+        return false;
+    }
 
-    return (start < arr->cardinality) && (end < arr->cardinality)
-            && (((uint16_t)(end - start)) == re_included - rs_included)
-            && (carr[start] == rs_included) && (carr[end] == re_included);
+    const int32_t start = binarySearch(arr->array, arr->cardinality, rs_included);
+    // If this sorted array contains all items in the range:
+    // * the start item must be found
+    // * the last item in range range_count must exist, and be the expected end value
+    return (start >= 0) && (arr->cardinality >= start + range_count) &&
+           (arr->array[start + range_count - 1] == re_included);
 }
 
 /* Returns the smallest value (assumes not empty) */

From 6c25108834be21735655db1bdde317d7133047f7 Mon Sep 17 00:00:00 2001
From: Eric Sproul <github@nanobyte.org>
Date: Wed, 7 Sep 2022 11:46:57 -0400
Subject: [PATCH 008/162] Fix truncated MIT license text (#389)

Make the copy in license-comment.h match.
---
 LICENSE               | 3 ++-
 src/license-comment.h | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/LICENSE b/LICENSE
index 8f567d348..8b0ad80d7 100644
--- a/LICENSE
+++ b/LICENSE
@@ -232,4 +232,5 @@ PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
 SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
-IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
\ No newline at end of file
+IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/src/license-comment.h b/src/license-comment.h
index 43d200549..d50fb11d4 100644
--- a/src/license-comment.h
+++ b/src/license-comment.h
@@ -46,7 +46,7 @@
  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
  * IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE
+ * DEALINGS IN THE SOFTWARE.
  *
  * SPDX-License-Identifier: MIT
  */

From 6bdbd81ec094f4ffc255b4c95673a6c1f573bbb9 Mon Sep 17 00:00:00 2001
From: Greg Sadetsky <lepetitg@gmail.com>
Date: Wed, 7 Sep 2022 11:47:55 -0400
Subject: [PATCH 009/162] Fix link in Readme (#388)

---
 tests/testdata/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/testdata/README.md b/tests/testdata/README.md
index cecbb6981..62e730066 100644
--- a/tests/testdata/README.md
+++ b/tests/testdata/README.md
@@ -1,4 +1,4 @@
 # test data
 
 These bitmaps were generated from Java : 
-https://github.com/RoaringBitmap/RoaringBitmap/blob/master/examples/SerializeToDiskExample.java
+https://github.com/RoaringBitmap/RoaringBitmap/blob/master/examples/src/main/java/SerializeToDiskExample.java

From 697c014af3f3f8d85d42d99739d6436e5b6bb809 Mon Sep 17 00:00:00 2001
From: Soerian Lieve <soerian@fb.com>
Date: Wed, 7 Sep 2022 16:49:36 +0100
Subject: [PATCH 010/162] Add doublechecked::Roaring64Map + tests, fix issues
 found by it (#387)

* Use roaring::Roaring inside namespace

* Check for invalid range in Roaring64Map::flip

* Add doublechecked::Roaring64Map and tests

* Skip empty values in Roaring64Map::isSubset

* Fix an overflow when run_end == uint16_t max

Previously, when run_end == uint16_t max value, this would loop forever.
---
 cpp/roaring64map.hh           |   9 +-
 src/containers/convert.c      |   3 +-
 tests/cpp_random_unit.cpp     | 231 ++++++++++++++-
 tests/cpp_unit.cpp            |  20 ++
 tests/roaring64map_checked.hh | 515 ++++++++++++++++++++++++++++++++++
 5 files changed, 762 insertions(+), 16 deletions(-)
 create mode 100644 tests/roaring64map_checked.hh

diff --git a/cpp/roaring64map.hh b/cpp/roaring64map.hh
index 65596f838..1e51824c3 100644
--- a/cpp/roaring64map.hh
+++ b/cpp/roaring64map.hh
@@ -18,10 +18,11 @@ A C++ header for 64-bit Roaring Bitmaps, implemented by way of a map of many
 #include <utility>
 
 #include "roaring.hh"
-using roaring::Roaring;
 
 namespace roaring {
 
+using roaring::Roaring;
+
 class Roaring64MapSetBitForwardIterator;
 class Roaring64MapSetBitBiDirectionalIterator;
 
@@ -436,6 +437,9 @@ public:
      */
     bool isSubset(const Roaring64Map &r) const {
         for (const auto &map_entry : roarings) {
+            if (map_entry.second.isEmpty()) {
+                continue;
+            }
             auto roaring_iter = r.roarings.find(map_entry.first);
             if (roaring_iter == r.roarings.cend())
                 return false;
@@ -522,6 +526,9 @@ public:
      * areas outside the range are passed through unchanged.
      */
     void flip(uint64_t range_start, uint64_t range_end) {
+        if (range_start >= range_end) {
+          return;
+        }
         uint32_t start_high = highBytes(range_start);
         uint32_t start_low = lowBytes(range_start);
         uint32_t end_high = highBytes(range_end);
diff --git a/src/containers/convert.c b/src/containers/convert.c
index b60ac4773..a87babff0 100644
--- a/src/containers/convert.c
+++ b/src/containers/convert.c
@@ -101,10 +101,11 @@ container_t *convert_to_bitset_or_array_container(
         for (int rlepos = 0; rlepos < rc->n_runs; ++rlepos) {
             uint16_t run_start = rc->runs[rlepos].value;
             uint16_t run_end = run_start + rc->runs[rlepos].length;
-            for (uint16_t run_value = run_start; run_value <= run_end;
+            for (uint16_t run_value = run_start; run_value < run_end;
                  ++run_value) {
                 answer->array[answer->cardinality++] = run_value;
             }
+            answer->array[answer->cardinality++] = run_end;
         }
         assert(card == answer->cardinality);
         *resulttype = ARRAY_CONTAINER_TYPE;
diff --git a/tests/cpp_random_unit.cpp b/tests/cpp_random_unit.cpp
index 37000ebf5..0cc29c6e1 100644
--- a/tests/cpp_random_unit.cpp
+++ b/tests/cpp_random_unit.cpp
@@ -1,9 +1,10 @@
 //
 // cpp_random_unit.cpp
 //
-// The `roaring_checked.hh` variation of the C++ wrapper for roaring bitmaps
-// keeps a C++ `std::set` in sync with changes made using the object's methods.
-// That class has the same name (Roaring) and is in namespace `doublecheck`.
+// The `roaring_checked.hh` / `roaring64map_checked.hh variations of the C++
+// wrapper for roaring bitmaps keep a C++ `std::set` in sync with changes made
+// using the object's methods. Those classes have the same name and are in
+// namespace `doublecheck`.
 //
 // This test generates bitsets with randomized content and runs through the
 // various operations with them.
@@ -19,22 +20,23 @@
 // https://www.llvm.org/docs/LibFuzzer.html
 //
 
-#include <type_traits>
 #include <assert.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <time.h>
-#include <iostream>
 
+#include <iostream>
+#include <type_traits>
 #include <vector>
 
 #include "roaring_checked.hh"
 using doublechecked::Roaring;  // so `Roaring` means `doublecheck::Roaring`
+#include "roaring64map_checked.hh"
+using doublechecked::Roaring64Map;
 
 #include "test.h"
 
-
 // The tests can run as long as one wants.  Ideally, the sanitizer options
 // for `address` and `undefined behavior` should be enabled (see the CMake
 // option ROARING_SANITIZE).
@@ -46,12 +48,12 @@ const unsigned long NUM_STEPS = 1000;
 //
 const int NUM_ROARS = 30;
 
-// If we generated data fully at random in the uint32_t space, then sets would
-// be unlikely to intersect very often.  Use a rolling focal point to kind of
-// distribute the values near enough to each other to be likely to interfere.
-//
+// If we generated data fully at random in the uint32_t / uint64_t space, then
+// sets would be unlikely to intersect very often.  Use a rolling focal point to
+// kind of distribute the values near enough to each other to be likely to
+// interfere.
 uint32_t gravity;
-
+uint64_t gravity64;
 
 Roaring make_random_bitset() {
     Roaring r;
@@ -74,7 +76,7 @@ Roaring make_random_bitset() {
 
           case 3: {
             uint32_t start = gravity + (rand() % 50) - 25;
-            r.flip(start, rand() % 50);
+            r.flip(start, start + rand() % 50);
             break; }
 
           case 4: {  // tests remove(), select(), rank()
@@ -97,6 +99,53 @@ Roaring make_random_bitset() {
     return r;
 }
 
+Roaring64Map make_random_bitset64() {
+    Roaring64Map r;
+    int num_ops = rand() % 100;
+    for (int i = 0; i < num_ops; ++i) {
+        switch (rand() % 5) {
+            case 0:
+                r.add(gravity64);
+                break;
+
+            case 1: {
+                uint64_t start = gravity64 + (rand() % 50) - 25;
+                r.addRange(start, start + rand() % 100);
+                break;
+            }
+
+            case 2: {
+                uint64_t start = gravity64 + (rand() % 10) - 5;
+                r.removeRange(start, start + rand() % 5);
+                break;
+            }
+
+            case 3: {
+                uint64_t start = gravity64 + (rand() % 50) - 25;
+                r.flip(start, start + rand() % 50);
+                break;
+            }
+
+            case 4: {  // tests remove(), select(), rank()
+                uint64_t card = r.cardinality();
+                if (card != 0) {
+                    uint64_t rnk = rand() % card;
+                    uint64_t element;
+                    assert_true(r.select(rnk, &element));
+                    assert_int_equal(rnk + 1, r.rank(element));
+                    r.remove(rnk);
+                }
+                break;
+            }
+
+            default:
+                assert_true(false);
+        }
+        gravity64 += (rand() % 200) - 100;
+    }
+    assert_true(r.does_std_set_match_roaring());
+    return r;
+}
 
 DEFINE_TEST(sanity_check_doublechecking) {
     Roaring r;
@@ -120,6 +169,26 @@ DEFINE_TEST(sanity_check_doublechecking) {
     assert_true(r.does_std_set_match_roaring());
 }
 
+DEFINE_TEST(sanity_check_doublechecking_64) {
+    Roaring64Map r;
+    while (r.isEmpty()) r = make_random_bitset64();
+
+    // Pick a random element out of the guaranteed non-empty bitset
+    //
+    uint64_t rnk = rand() % r.cardinality();
+    uint64_t element;
+    assert_true(r.select(rnk, &element));
+
+    // Deliberately get check (the std::set) out of sync to ensure match fails
+    //
+    r.check.erase(element);
+    assert_false(r.does_std_set_match_roaring());
+
+    // Put the std::set back in sync so the destructor doesn't assert
+    //
+    r.check.insert(element);
+    assert_true(r.does_std_set_match_roaring());
+}
 
 DEFINE_TEST(random_doublecheck_test) {
     //
@@ -233,7 +302,7 @@ DEFINE_TEST(random_doublecheck_test) {
                 gravity = element;
             }
             uint32_t start = gravity + (rand() % 50) - 25;
-            out.flip(start, rand() % 50);
+            out.flip(start, start + rand() % 50);
             break; }
 
           default:
@@ -286,13 +355,147 @@ DEFINE_TEST(random_doublecheck_test) {
     }
 }
 
+DEFINE_TEST(random_doublecheck_test_64) {
+    //
+    // Make a group of bitsets to choose from when performing operations.
+    //
+    std::vector<Roaring64Map> roars;
+    for (int i = 0; i < NUM_ROARS; ++i)
+        roars.insert(roars.end(), make_random_bitset64());
+
+    for (unsigned long step = 0; step < NUM_STEPS; ++step) {
+        //
+        // Each step modifies the chosen `out` bitset...possibly just
+        // overwriting it completely.
+        //
+        Roaring64Map &out = roars[rand() % NUM_ROARS];
+
+        // The left and right bitsets may be used as inputs for operations.
+        // They can be a reference to the same object as out, or can be
+        // references to each other (which is good to test those conditions).
+        //
+        const Roaring64Map &left = roars[rand() % NUM_ROARS];
+        const Roaring64Map &right = roars[rand() % NUM_ROARS];
+
+#ifdef ROARING_CPP_RANDOM_PRINT_STATUS
+        printf("[%lu]: %llu %llu %llu\n", step,
+               static_cast<unsigned long long>(left.cardinality()),
+               static_cast<unsigned long long>(right.cardinality()),
+               static_cast<unsigned long long>(out.cardinality()));
+#endif
+
+        int op = rand() % 6;
+
+        switch (op) {
+            case 0: {  // AND
+                out = left & right;
+                if (&out != &left) assert_true(out.isSubset(left));
+                if (&out != &right) assert_true(out.isSubset(right));
+                break;
+            }
+
+            case 1: {  // ANDNOT
+                out = left - right;
+                if (&out != &left) assert_true(out.isSubset(left));
+                break;
+            }
+
+            case 2: {  // OR
+                out = left | right;
+                if (&out != &left) assert_true(left.isSubset(out));
+                if (&out != &right) assert_true(right.isSubset(out));
+                break;
+            }
+
+            case 3: {  // XOR
+                out = left ^ right;
+                break;
+            }
+
+            case 4: {  // FASTUNION
+                const Roaring64Map *inputs[3] = {&out, &left, &right};
+                out = Roaring64Map::fastunion(
+                    3, inputs);  // result checked internally
+                break;
+            }
+
+            case 5: {  // FLIP
+                uint64_t card = out.cardinality();
+                if (card != 0) {  // pick gravity point inside set somewhere
+                    uint64_t rnk = rand() % card;
+                    uint64_t element;
+                    assert_true(out.select(rnk, &element));
+                    assert_int_equal(rnk + 1, out.rank(element));
+                    gravity64 = element;
+                }
+                uint64_t start = gravity64 + (rand() % 50) - 25;
+                out.flip(start, start + rand() % 50);
+                break;
+            }
+
+            default:
+                assert_true(false);
+        }
+
+        // Periodically apply a post-processing step to the out bitset
+        //
+        int post = rand() % 15;
+        switch (post) {
+            case 0:
+                out.removeRunCompression();
+                break;
+
+            case 1:
+                out.runOptimize();
+                break;
+
+            case 2:
+                out.shrinkToFit();
+                break;
+
+            default:
+                break;
+        }
+
+        // Explicitly ask if the `std::set` matches the roaring bitmap in out
+        //
+        assert_true(out.does_std_set_match_roaring());
+
+        // Do some arbitrary query operations.  No need to test the results, as
+        // the doublecheck code ensures the `std::set` matches internally.
+        //
+        out.isEmpty();
+        out.minimum();
+        out.maximum();
+        for (int i = -50; i < 50; ++i) {
+            out.contains(gravity64 + i);
+        }
+
+        // When doing random intersections, the tendency is that sets will
+        // lose all their data points over time.  So empty sets are usually
+        // re-seeded with more data, but a few get through to test empty cases.
+        //
+        if (out.isEmpty() && (rand() % 10 != 0)) out = make_random_bitset64();
+    }
+}
 
 int main() {
+    uint64_t seed = time(nullptr);
+    srand(seed);
+    printf("Seed: %lu\n", seed);
+
     gravity = rand() % 10000;  // starting focal point
 
+    // Make the 64-bit gravity focus around the edge of a 32-bit value to better
+    // test edge cases.
+    gravity64 = (static_cast<uint64_t>(rand()) << 32) + rand() % 20000 - 10000;
+
     const struct CMUnitTest tests[] = {
         cmocka_unit_test(sanity_check_doublechecking),
-        cmocka_unit_test(random_doublecheck_test)};
+        cmocka_unit_test(sanity_check_doublechecking_64),
+        cmocka_unit_test(random_doublecheck_test),
+        cmocka_unit_test(random_doublecheck_test_64),
+    };
 
     return cmocka_run_group_tests(tests, NULL, NULL);
 }
diff --git a/tests/cpp_unit.cpp b/tests/cpp_unit.cpp
index f7be3c922..7fe65fadb 100644
--- a/tests/cpp_unit.cpp
+++ b/tests/cpp_unit.cpp
@@ -1121,6 +1121,24 @@ DEFINE_TEST(test_cpp_flip_64) {
     }
 }
 
+DEFINE_TEST(test_cpp_is_subset_64) {
+  Roaring64Map r1 = Roaring64Map::bitmapOf(1, uint64_t(1));
+  Roaring64Map r2 = Roaring64Map::bitmapOf(1, uint64_t(1) << 32);
+  Roaring64Map r3 = r1 & r2;
+  assert_true(r3.isSubset(r1));
+  assert_true(r3.isSubset(r2));
+}
+
+DEFINE_TEST(test_cpp_remove_run_compression) {
+  Roaring r;
+  uint32_t max = (std::numeric_limits<uint32_t>::max)();
+  for (uint32_t i = max - 10; i != 0; ++i) {
+    r.add(i);
+  }
+  r.runOptimize();
+  r.removeRunCompression();
+}
+
 // Returns true on success, false on exception.
 bool test64Deserialize(const std::string& filename) {
     std::ifstream in(TEST_DATA_DIR + filename, std::ios::binary);
@@ -1222,6 +1240,8 @@ int main() {
         cmocka_unit_test(test_issue304),
         cmocka_unit_test(issue_336),
         cmocka_unit_test(issue_372),
+        cmocka_unit_test(test_cpp_is_subset_64),
+        cmocka_unit_test(test_cpp_remove_run_compression),
     };
     return cmocka_run_group_tests(tests, NULL, NULL);
 }
diff --git a/tests/roaring64map_checked.hh b/tests/roaring64map_checked.hh
new file mode 100644
index 000000000..8455d3e9b
--- /dev/null
+++ b/tests/roaring64map_checked.hh
@@ -0,0 +1,515 @@
+//
+// roaring64map_checked.hh
+//
+// PURPOSE:
+//
+// This file implements a class which maintains a `class Roaring64Map` bitset in
+// sync with a C++ `std::set` of 64-bit integers.  It asserts if it ever
+// notices a difference between the result the roaring bitset gives and the
+// result that the set would give.
+//
+// The doublechecked class is a drop-in replacement for the plain C++ class.
+// Hence any codebase that uses that class could act as a test...if it wished.
+//
+// USAGE:
+//
+// The checked class has the same name (Roaring64Map) in `namespace doublechecked`.
+// So switching between versions could be done easily with a command-line
+// `-D` setting for a #define, e.g.:
+//
+//     #ifdef ROARING_DOUBLECHECK_CPP
+//         #include "roaring64map_checked.hh"
+//         using doublechecked::Roaring64Map;
+//     #else
+//         #include "roaring64map.hh"
+//     #endif
+
+#ifndef INCLUDE_ROARING_64_MAP_CHECKED_HH_
+#define INCLUDE_ROARING_64_MAP_CHECKED_HH_
+
+#include <stdarg.h>
+
+#include <algorithm>
+#include <new>
+#include <stdexcept>
+#include <string>
+
+#include <set>  // sorted set, typically a red-black tree implementation
+#include <assert.h>
+
+#define ROARING_CPP_NAMESPACE unchecked  // can't be overridden if global
+#include "roaring64map.hh"  // contains Roaring64Map unchecked class
+
+namespace doublechecked {  // put the checked class in its own namespace
+
+class Roaring64Map {
+  public:  // members public to allow tests access to them
+    roaring::Roaring64Map plain;  // ordinary Roaring64Map bitset wrapper class
+    std::set<uint64_t> check;  // contents kept in sync with `plain`
+
+  public:
+    Roaring64Map() : plain() {
+    }
+
+    Roaring64Map(size_t n, const uint32_t *data) : plain (n, data) {
+        for (size_t i = 0; i < n; ++i)
+            check.insert(data[i]);
+    }
+
+    Roaring64Map(const Roaring64Map &r) {
+        plain = r.plain;
+        check = r.check;
+    }
+
+    Roaring64Map(Roaring64Map &&r) noexcept {
+        plain = std::move(r.plain);
+        check = std::move(r.check);
+    }
+
+    // This constructor is unique to doublecheck::Roaring64Map(), for making a
+    // doublechecked version from an unchecked version.  Note that this alone
+    // is somewhat toothless for checking...e.g. running an operation and then
+    // accepting that all the values in it were correct doesn't do much.  So
+    // the results of such constructions should be validated another way.
+    //
+    Roaring64Map(roaring::Roaring64Map &&other_plain) {
+        plain = std::move(other_plain);
+        for (auto value : plain)
+            check.insert(value);
+    }
+
+    // Note: This does not call `::Roaring64Map::bitmapOf()` because variadics can't
+    // forward their parameters.  But this is all the code does, so it's fine.
+    //
+    static Roaring64Map bitmapOf(size_t n, ...) {
+        doublechecked::Roaring64Map ans;
+        va_list vl;
+        va_start(vl, n);
+        for (size_t i = 0; i < n; i++) {
+            ans.add(va_arg(vl, uint32_t));
+        }
+        va_end(vl);
+        return ans;
+    }
+
+    void add(uint32_t x) {
+        plain.add(x);
+        check.insert(x);
+    }
+    void add(uint64_t x) {
+        plain.add(x);
+        check.insert(x);
+    }
+
+    bool addChecked(uint32_t x) {
+        bool ans = plain.addChecked(x);
+        bool was_in_set = check.insert(x).second;  // insert -> pair<iter,bool>
+        assert(ans == was_in_set);
+        (void)was_in_set;  // unused besides assert
+        return ans;
+    }
+    bool addChecked(uint64_t x) {
+        bool ans = plain.addChecked(x);
+        bool was_in_set = check.insert(x).second;  // insert -> pair<iter,bool>
+        assert(ans == was_in_set);
+        (void)was_in_set;  // unused besides assert
+        return ans;
+    }
+
+    void addRange(const uint64_t x, const uint64_t y) {
+        if (x != y) {  // repeat add_range_closed() cast and bounding logic
+            addRangeClosed(x, y - 1);
+        }
+    }
+
+    void addRangeClosed(uint32_t min, uint32_t max) {
+        plain.addRangeClosed(min, max);
+        if (min <= max) {
+            for (uint32_t val = max; val != min - 1; --val)
+                check.insert(val);
+        }
+    }
+    void addRangeClosed(uint64_t min, uint64_t max) {
+        plain.addRangeClosed(min, max);
+        if (min <= max) {
+            for (uint64_t val = max; val != min - 1; --val)
+                check.insert(val);
+        }
+    }
+
+    void addMany(size_t n_args, const uint32_t *vals) {
+        plain.addMany(n_args, vals);
+        for (size_t i = 0; i < n_args; ++i)
+            check.insert(vals[i]);
+    }
+    void addMany(size_t n_args, const uint64_t *vals) {
+        plain.addMany(n_args, vals);
+        for (size_t i = 0; i < n_args; ++i)
+            check.insert(vals[i]);
+    }
+
+    void remove(uint32_t x) {
+        plain.remove(x);
+        check.erase(x);
+    }
+    void remove(uint64_t x) {
+        plain.remove(x);
+        check.erase(x);
+    }
+
+    bool removeChecked(uint32_t x) {
+        bool ans = plain.removeChecked(x);
+        size_t num_removed = check.erase(x);
+        assert(ans == (num_removed == 1));
+        (void)num_removed;  // unused besides assert
+        return ans;
+    }
+    bool removeChecked(uint64_t x) {
+        bool ans = plain.removeChecked(x);
+        size_t num_removed = check.erase(x);
+        assert(ans == (num_removed == 1));
+        (void)num_removed;  // unused besides assert
+        return ans;
+    }
+
+    void removeRange(const uint64_t x, const uint64_t y) {
+        if (x != y) {  // repeat remove_range_closed() cast and bounding logic
+            removeRangeClosed(x, y - 1);
+        }
+    }
+
+    void removeRangeClosed(uint32_t min, uint32_t max) {
+        plain.removeRangeClosed(min, max);
+        if (min <= max) {
+            check.erase(check.lower_bound(min), check.upper_bound(max));
+        }
+    }
+    void removeRangeClosed(uint64_t min, uint64_t max) {
+        plain.removeRangeClosed(min, max);
+        if (min <= max) {
+            check.erase(check.lower_bound(min), check.upper_bound(max));
+        }
+    }
+
+    uint64_t maximum() const {
+        uint64_t ans = plain.maximum();
+        assert(check.empty() ? ans == 0 : ans == *check.rbegin());
+        return ans;
+    }
+
+    uint64_t minimum() const {
+        uint64_t ans = plain.minimum();
+        assert(check.empty()
+            ? ans == (std::numeric_limits<uint64_t>::max)()
+            : ans == *check.begin());
+        return ans;
+    }
+
+    bool contains(uint32_t x) const {
+        bool ans = plain.contains(x);
+        assert(ans == (check.find(x) != check.end()));
+        return ans;
+    }
+    bool contains(uint64_t x) const {
+        bool ans = plain.contains(x);
+        assert(ans == (check.find(x) != check.end()));
+        return ans;
+    }
+
+
+    // This method is exclusive to `doublechecked::Roaring64Map`
+    //
+    bool does_std_set_match_roaring() const {
+        auto it_check = check.begin();
+        auto it_check_end = check.end();
+        auto it_plain = plain.begin();
+        auto it_plain_end = plain.end();
+
+        for (; it_check != it_check_end; ++it_check, ++it_plain) {
+            if (it_plain == it_plain_end)
+                return false;
+            if (*it_check != *it_plain)
+                return false;
+        }
+        return it_plain == plain.end();  // should have visited all values
+    }
+
+    ~Roaring64Map() {
+        assert(does_std_set_match_roaring());  // always check on destructor
+    }
+
+    Roaring64Map &operator=(const Roaring64Map &r) {
+        plain = r.plain;
+        check = r.check;
+        return *this;
+    }
+
+    Roaring64Map &operator=(Roaring64Map &&r) noexcept {
+        plain = std::move(r.plain);
+        check = std::move(r.check);
+        return *this;
+    }
+
+    Roaring64Map &operator&=(const Roaring64Map &r) {
+        plain &= r.plain;
+
+        auto it = check.begin();
+        auto r_it = r.check.begin();
+        while (it != check.end() && r_it != r.check.end()) {
+            if (*it < *r_it) { it = check.erase(it); }
+            else if (*r_it < *it) { ++r_it; }
+            else { ++it; ++r_it; }  // overlapped
+        }
+        check.erase(it, check.end());  // erase rest of check not in r.check
+
+        return *this;
+    }
+
+    Roaring64Map &operator-=(const Roaring64Map &r) {
+        plain -= r.plain;
+
+        for (auto value : r.check)
+            check.erase(value);  // Note std::remove() is not for ordered sets
+
+        return *this;
+    }
+
+    Roaring64Map &operator|=(const Roaring64Map &r) {
+        plain |= r.plain;
+
+        check.insert(r.check.begin(), r.check.end());  // won't add duplicates
+
+        return *this;
+    }
+
+    Roaring64Map &operator^=(const Roaring64Map &r) {
+        plain ^= r.plain;
+
+        auto it = check.begin();
+        auto it_end = check.end();
+        auto r_it = r.check.begin();
+        auto r_it_end = r.check.end();
+        if (it == it_end) { check = r.check; }  // this empty
+        else if (r_it == r_it_end) { }  // r empty
+        else if (*it > *r.check.rbegin() || *r_it > *check.rbegin()) {
+            check.insert(r.check.begin(), r.check.end());  // obvious disjoint
+        } else while (r_it != r_it_end) {  // may overlap
+            if (it == it_end) { check.insert(*r_it); ++r_it; }
+            else if (*it == *r_it) {  // remove overlapping value
+                it = check.erase(it);  // returns *following* iterator
+                ++r_it;
+            }
+            else if (*it < *r_it) { ++it; }  // keep value from this
+            else { check.insert(*r_it); ++r_it; }  // add value from r
+        }
+
+        return *this;
+    }
+
+    void swap(Roaring64Map &r) {
+        std::swap(r.plain, plain);
+        std::swap(r.check, check);
+    }
+
+    uint64_t cardinality() const {
+        uint64_t ans = plain.cardinality();
+        assert(ans == check.size());
+        return ans;
+    }
+
+    bool isEmpty() const {
+        bool ans = plain.isEmpty();
+        assert(ans == check.empty());
+        return ans;
+    }
+
+    bool isSubset(const Roaring64Map &r) const {  // is `this` subset of `r`?
+        bool ans = plain.isSubset(r.plain);
+        assert(ans == std::includes(
+            r.check.begin(), r.check.end(),  // containing range
+            check.begin(), check.end()  // range to test for containment
+        ));
+        return ans;
+    }
+
+    bool isStrictSubset(const Roaring64Map &r) const {  // is `this` subset of `r`?
+        bool ans = plain.isStrictSubset(r.plain);
+        assert(ans == (std::includes(
+            r.check.begin(), r.check.end(),  // containing range
+            check.begin(), check.end()  // range to test for containment
+        ) && r.check.size() > check.size()));
+        return ans;
+    }
+
+    void toUint64Array(uint64_t *ans) const {
+        plain.toUint64Array(ans);
+        // TBD: doublecheck
+    }
+
+    bool operator==(const Roaring64Map &r) const {
+        bool ans = (plain == r.plain);
+        assert(ans == (check == r.check));
+        return ans;
+    }
+
+    void flip(uint64_t range_start, uint64_t range_end) {
+        plain.flip(range_start, range_end);
+
+        if (range_start < range_end) {
+            auto hint = check.lower_bound(range_start);  // *hint stays as >= i
+            auto it_end = check.end();
+            for (uint64_t i = range_start; i < range_end; ++i) {
+                if (hint == it_end || *hint > i)  // i not present, so add
+                    check.insert(hint, i);  // leave hint past i
+                else  // *hint == i, must adjust hint and erase
+                    hint = check.erase(hint);  // returns *following* iterator
+            }
+        }
+    }
+
+    bool removeRunCompression() {
+        return plain.removeRunCompression();
+    }
+
+    bool runOptimize() {
+        return plain.runOptimize();
+    }
+
+    size_t shrinkToFit() {
+        return plain.shrinkToFit();
+    }
+
+    void iterate(roaring::api::roaring_iterator64 iterator, void *ptr) const {
+        plain.iterate(iterator, ptr);
+        assert(does_std_set_match_roaring());  // checks equivalent iteration
+    }
+
+    bool select(uint64_t rnk, uint64_t *element) const {
+        bool ans = plain.select(rnk, element);
+
+        auto it = check.begin();
+        auto it_end = check.end();
+        for (uint64_t i = 0; it != it_end && i < rnk; ++i)
+            ++it;
+        assert(ans == (it != it_end) && (ans ? *it == *element : true));
+
+        return ans;
+    }
+
+    uint64_t rank(uint64_t x) const {
+        uint64_t ans = plain.rank(x);
+
+        uint64_t count = 0;
+        auto it = check.begin();
+        auto it_end = check.end();
+        for (; it != it_end && *it <= x; ++it)
+            ++count;
+        assert(ans == count);
+
+        return ans;
+    }
+
+    size_t write(char *buf, bool portable = true) const {
+        return plain.write(buf, portable);
+    }
+
+    static Roaring64Map read(const char *buf, bool portable = true) {
+        auto plain = roaring::Roaring64Map::read(buf, portable);
+        return Roaring64Map(std::move(plain));
+    }
+
+    static Roaring64Map readSafe(const char *buf, size_t maxbytes) {
+        auto plain = roaring::Roaring64Map::readSafe(buf, maxbytes);
+        return Roaring64Map(std::move(plain));
+    }
+
+    size_t getSizeInBytes(bool portable = true) const {
+        return plain.getSizeInBytes(portable);
+    }
+
+    Roaring64Map operator&(const Roaring64Map &o) const {
+        Roaring64Map ans(plain & o.plain);
+
+        Roaring64Map inplace(*this);
+        assert(ans == (inplace &= o));  // validate against in-place version
+
+        return ans;
+    }
+
+    Roaring64Map operator-(const Roaring64Map &o) const {
+        Roaring64Map ans(plain - o.plain);
+
+        Roaring64Map inplace(*this);
+        assert(ans == (inplace -= o));  // validate against in-place version
+
+        return ans;
+    }
+
+    Roaring64Map operator|(const Roaring64Map &o) const {
+        Roaring64Map ans(plain | o.plain);
+
+        Roaring64Map inplace(*this);
+        assert(ans == (inplace |= o));  // validate against in-place version
+
+        return ans;
+    }
+
+    Roaring64Map operator^(const Roaring64Map &o) const {
+        Roaring64Map ans(plain ^ o.plain);
+
+        Roaring64Map inplace(*this);
+        assert(ans == (inplace ^= o));  // validate against in-place version
+
+        return ans;
+    }
+
+    void setCopyOnWrite(bool val) {
+        plain.setCopyOnWrite(val);
+    }
+
+    void printf() const {
+        plain.printf();
+    }
+
+    std::string toString() const {
+        return plain.toString();
+    }
+
+    bool getCopyOnWrite() const {
+        return plain.getCopyOnWrite();
+    }
+
+    static Roaring64Map fastunion(size_t n, const Roaring64Map **inputs) {
+        auto plain_inputs = new const roaring::Roaring64Map*[n];
+        for (size_t i = 0; i < n; ++i)
+            plain_inputs[i] = &inputs[i]->plain;
+        Roaring64Map ans(roaring::Roaring64Map::fastunion(n, plain_inputs));
+        delete[] plain_inputs;
+
+        if (n == 0)
+            assert(ans.cardinality() == 0);
+        else {
+            Roaring64Map temp = *inputs[0];
+            for (size_t i = 1; i < n; ++i)
+                temp |= *inputs[i];
+            assert(temp == ans);
+        }
+
+        return ans;
+    }
+
+    typedef roaring::Roaring64MapSetBitForwardIterator const_iterator;
+
+    const_iterator begin() const {
+        return roaring::Roaring64MapSetBitForwardIterator(plain);
+    }
+
+    const_iterator &end() const {
+        static roaring::Roaring64MapSetBitForwardIterator e(plain, true);
+        return e;
+    }
+};
+
+}  // end `namespace doublechecked`
+
+#endif  // INCLUDE_ROARING_64_MAP_CHECKED_HH_

From b9e137425be17a55eea0635c87f01acefcdeeda1 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <lemire@gmail.com>
Date: Wed, 7 Sep 2022 11:50:47 -0400
Subject: [PATCH 011/162] Preparing release

---
 CMakeLists.txt                    | 4 ++--
 include/roaring/roaring_version.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 007d911b6..7320d9848 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,8 +18,8 @@ endif()
 set(ROARING_LIB_NAME roaring)
 set(PROJECT_VERSION_MAJOR 0)
 set(PROJECT_VERSION_MINOR 7)
-set(PROJECT_VERSION_PATCH 0)
-set(ROARING_LIB_VERSION "0.7.0" CACHE STRING "Roaring library version")
+set(PROJECT_VERSION_PATCH 1)
+set(ROARING_LIB_VERSION "0.7.1" CACHE STRING "Roaring library version")
 set(ROARING_LIB_SOVERSION "5" CACHE STRING "Roaring library soversion")
 
 option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON)
diff --git a/include/roaring/roaring_version.h b/include/roaring/roaring_version.h
index fe719d5f5..33cff59fa 100644
--- a/include/roaring/roaring_version.h
+++ b/include/roaring/roaring_version.h
@@ -1,10 +1,10 @@
 // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand 
 #ifndef ROARING_INCLUDE_ROARING_VERSION 
 #define ROARING_INCLUDE_ROARING_VERSION 
-#define ROARING_VERSION "0.7.0"
+#define ROARING_VERSION "0.7.1"
 enum { 
     ROARING_VERSION_MAJOR = 0,
     ROARING_VERSION_MINOR = 7,
-    ROARING_VERSION_REVISION = 0
+    ROARING_VERSION_REVISION = 1
 }; 
 #endif // ROARING_INCLUDE_ROARING_VERSION 

From c68ae9dd4f5dfc9fe94d1023f7cdeb61a4db0aee Mon Sep 17 00:00:00 2001
From: Daniel Lemire <lemire@gmail.com>
Date: Wed, 26 Oct 2022 16:51:51 -0400
Subject: [PATCH 012/162] Update README.md

---
 README.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 4589a8cbd..50f2b24d2 100644
--- a/README.md
+++ b/README.md
@@ -651,10 +651,11 @@ https://groups.google.com/forum/#!forum/roaring-bitmaps
 
 # References about Roaring
 
-- Daniel Lemire, Owen Kaser, Nathan Kurz, Luca Deri, Chris O'Hara, François Saint-Jacques, Gregory Ssi-Yan-Kai, Roaring Bitmaps: Implementation of an Optimized Software Library, Software: Practice and Experience (to appear) [arXiv:1709.07821](https://arxiv.org/abs/1709.07821)
+- Daniel Lemire, Owen Kaser, Nathan Kurz, Luca Deri, Chris O'Hara, François Saint-Jacques, Gregory Ssi-Yan-Kai, Roaring Bitmaps: Implementation of an Optimized Software Library, Software: Practice and Experience Volume 48, Issue 4 April 2018 Pages 867-895
+ [arXiv:1709.07821](https://arxiv.org/abs/1709.07821)
 -  Samy Chambi, Daniel Lemire, Owen Kaser, Robert Godin,
 Better bitmap performance with Roaring bitmaps,
 Software: Practice and Experience Volume 46, Issue 5, pages 709–719, May 2016
-http://arxiv.org/abs/1402.6407 This paper used data from http://lemire.me/data/realroaring2014.html
-- Daniel Lemire, Gregory Ssi-Yan-Kai, Owen Kaser, Consistently faster and smaller compressed bitmaps with Roaring, Software: Practice and Experience (accepted in 2016, to appear) http://arxiv.org/abs/1603.06549
+http://arxiv.org/abs/1402.6407 
+- Daniel Lemire, Gregory Ssi-Yan-Kai, Owen Kaser, Consistently faster and smaller compressed bitmaps with Roaring, Software: Practice and Experience Volume 46, Issue 11, pages 1547-1569, November 2016 http://arxiv.org/abs/1603.06549
 - Samy Chambi, Daniel Lemire, Robert Godin, Kamel Boukhalfa, Charles Allen, Fangjin Yang, Optimizing Druid with Roaring bitmaps, IDEAS 2016, 2016. http://r-libre.teluq.ca/950/

From a591cf3854713dfe1cb4a2bf159e413b92d95ca6 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <lemire@gmail.com>
Date: Thu, 27 Oct 2022 08:41:57 -0400
Subject: [PATCH 013/162] Update README.md

---
 README.md | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 50f2b24d2..705ddb790 100644
--- a/README.md
+++ b/README.md
@@ -651,11 +651,9 @@ https://groups.google.com/forum/#!forum/roaring-bitmaps
 
 # References about Roaring
 
-- Daniel Lemire, Owen Kaser, Nathan Kurz, Luca Deri, Chris O'Hara, François Saint-Jacques, Gregory Ssi-Yan-Kai, Roaring Bitmaps: Implementation of an Optimized Software Library, Software: Practice and Experience Volume 48, Issue 4 April 2018 Pages 867-895
- [arXiv:1709.07821](https://arxiv.org/abs/1709.07821)
+- Daniel Lemire, Owen Kaser, Nathan Kurz, Luca Deri, Chris O'Hara, François Saint-Jacques, Gregory Ssi-Yan-Kai, Roaring Bitmaps: Implementation of an Optimized Software Library, Software: Practice and Experience Volume 48, Issue 4 April 2018 Pages 867-895 [arXiv:1709.07821](https://arxiv.org/abs/1709.07821)
 -  Samy Chambi, Daniel Lemire, Owen Kaser, Robert Godin,
 Better bitmap performance with Roaring bitmaps,
-Software: Practice and Experience Volume 46, Issue 5, pages 709–719, May 2016
-http://arxiv.org/abs/1402.6407 
-- Daniel Lemire, Gregory Ssi-Yan-Kai, Owen Kaser, Consistently faster and smaller compressed bitmaps with Roaring, Software: Practice and Experience Volume 46, Issue 11, pages 1547-1569, November 2016 http://arxiv.org/abs/1603.06549
+Software: Practice and Experience Volume 46, Issue 5, pages 709–719, May 2016  [arXiv:1402.6407](http://arxiv.org/abs/1402.6407)
+- Daniel Lemire, Gregory Ssi-Yan-Kai, Owen Kaser, Consistently faster and smaller compressed bitmaps with Roaring, Software: Practice and Experience Volume 46, Issue 11, pages 1547-1569, November 2016 [arXiv:1603.06549](http://arxiv.org/abs/1603.06549)
 - Samy Chambi, Daniel Lemire, Robert Godin, Kamel Boukhalfa, Charles Allen, Fangjin Yang, Optimizing Druid with Roaring bitmaps, IDEAS 2016, 2016. http://r-libre.teluq.ca/950/

From 6ef3a39f6db9ae0c9644918e8f8ae839090e8538 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Fri, 4 Nov 2022 11:00:45 -0400
Subject: [PATCH 014/162] Fix for issue 394. (#395)

---
 amalgamation.sh                |  4 ++--
 include/roaring/isadetection.h |  6 ++++++
 include/roaring/portability.h  | 12 ++++++++++--
 3 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/amalgamation.sh b/amalgamation.sh
index 1281384ef..849f7327d 100755
--- a/amalgamation.sh
+++ b/amalgamation.sh
@@ -43,12 +43,12 @@ $SCRIPTPATH/cpp/roaring64map.hh
 "
 
 # internal .h files => These are used in the implementation but aren't part of
-# the API.  They're all embedded at the head of the amalgamated C file, and
+# the API.  They are all embedded at the head of the amalgamated C file, and
 # need to be in this order.
 #
 ALL_PRIVATE_H="
-$SCRIPTPATH/include/roaring/isadetection.h
 $SCRIPTPATH/include/roaring/portability.h
+$SCRIPTPATH/include/roaring/isadetection.h
 $SCRIPTPATH/include/roaring/containers/perfparameters.h
 $SCRIPTPATH/include/roaring/containers/container_defs.h
 $SCRIPTPATH/include/roaring/array_util.h
diff --git a/include/roaring/isadetection.h b/include/roaring/isadetection.h
index cfea20070..732903756 100644
--- a/include/roaring/isadetection.h
+++ b/include/roaring/isadetection.h
@@ -46,9 +46,15 @@ POSSIBILITY OF SUCH DAMAGE.
 #ifndef ROARING_ISADETECTION_H
 #define ROARING_ISADETECTION_H
 
+// isadetection.h does not define any macro (except for ROARING_ISADETECTION_H).
+
 #include <stdint.h>
 #include <stdbool.h>
 #include <stdlib.h>
+
+// We need portability.h to be included first, see
+// https://github.com/RoaringBitmap/CRoaring/issues/394
+#include <roaring/portability.h>
 #if CROARING_REGULAR_VISUAL_STUDIO
 #include <intrin.h>
 #elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
diff --git a/include/roaring/portability.h b/include/roaring/portability.h
index a72dcf6bc..3f43e97fe 100644
--- a/include/roaring/portability.h
+++ b/include/roaring/portability.h
@@ -46,7 +46,6 @@
 #define _XOPEN_SOURCE 700
 #endif // !(defined(_XOPEN_SOURCE)) || (_XOPEN_SOURCE < 700)
 
-#include "isadetection.h"
 #include <stdbool.h>
 #include <stdint.h>
 #include <stdlib.h>  // will provide posix_memalign with _POSIX_C_SOURCE as defined above
@@ -327,4 +326,13 @@ static inline int hamming(uint64_t x) {
 #define CROARING_UNTARGET_REGION
 #endif
 
-#endif /* INCLUDE_PORTABILITY_H_ */
+
+// We need portability.h to be included first,
+// but we also always want isadetection.h to be
+// included (right after).
+// See https://github.com/RoaringBitmap/CRoaring/issues/394
+// There is no scenario where we want portability.h to
+// be included, but not isadetection.h: the latter is a
+// strict requirement.
+#include <roaring/isadetection.h> // include it last!
+#endif /* INCLUDE_PORTABILITY_H_ */
\ No newline at end of file

From e6106589a86f7f9dcbef560ffd285f8534bacb3d Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Fri, 4 Nov 2022 11:02:15 -0400
Subject: [PATCH 015/162] Preparing release.

---
 CMakeLists.txt                    | 4 ++--
 include/roaring/roaring_version.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7320d9848..ecd15f912 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,8 +18,8 @@ endif()
 set(ROARING_LIB_NAME roaring)
 set(PROJECT_VERSION_MAJOR 0)
 set(PROJECT_VERSION_MINOR 7)
-set(PROJECT_VERSION_PATCH 1)
-set(ROARING_LIB_VERSION "0.7.1" CACHE STRING "Roaring library version")
+set(PROJECT_VERSION_PATCH 2)
+set(ROARING_LIB_VERSION "0.7.2" CACHE STRING "Roaring library version")
 set(ROARING_LIB_SOVERSION "5" CACHE STRING "Roaring library soversion")
 
 option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON)
diff --git a/include/roaring/roaring_version.h b/include/roaring/roaring_version.h
index 33cff59fa..12f856758 100644
--- a/include/roaring/roaring_version.h
+++ b/include/roaring/roaring_version.h
@@ -1,10 +1,10 @@
 // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand 
 #ifndef ROARING_INCLUDE_ROARING_VERSION 
 #define ROARING_INCLUDE_ROARING_VERSION 
-#define ROARING_VERSION "0.7.1"
+#define ROARING_VERSION "0.7.2"
 enum { 
     ROARING_VERSION_MAJOR = 0,
     ROARING_VERSION_MINOR = 7,
-    ROARING_VERSION_REVISION = 1
+    ROARING_VERSION_REVISION = 2
 }; 
 #endif // ROARING_INCLUDE_ROARING_VERSION 

From 6e67f976626196c1aa0d435aaccfe070b99457ce Mon Sep 17 00:00:00 2001
From: Corey Kosak <kosak@users.noreply.github.com>
Date: Sun, 6 Nov 2022 14:09:56 -0500
Subject: [PATCH 016/162] Unit test cleanups: change assert -> assert_true, a
 couple other small things (#396)

---
 tests/toplevel_unit.c | 190 +++++++++++++++++++++---------------------
 1 file changed, 95 insertions(+), 95 deletions(-)

diff --git a/tests/toplevel_unit.c b/tests/toplevel_unit.c
index 011cc4011..05052ff31 100644
--- a/tests/toplevel_unit.c
+++ b/tests/toplevel_unit.c
@@ -31,7 +31,7 @@ static inline uint32_t minimum_uint32(uint32_t a, uint32_t b) {
 }
 
 // arrays expected to both be sorted.
-static int array_equals(uint32_t *a1, int32_t size1, uint32_t *a2,
+static int array_equals(const uint32_t *a1, int32_t size1, const uint32_t *a2,
                         int32_t size2) {
     if (size1 != size2) return 0;
     for (int i = 0; i < size1; ++i) {
@@ -135,24 +135,24 @@ void can_copy_empty(bool copy_on_write) {
     roaring_bitmap_t *bm1 = roaring_bitmap_create();
     roaring_bitmap_set_copy_on_write(bm1, copy_on_write);
     roaring_bitmap_t *bm2 = roaring_bitmap_copy(bm1);
-    assert(roaring_bitmap_get_cardinality(bm1) == 0);
-    assert(roaring_bitmap_get_cardinality(bm2) == 0);
-    assert(roaring_bitmap_is_empty(bm1));
-    assert(roaring_bitmap_is_empty(bm2));
+    assert_true(roaring_bitmap_get_cardinality(bm1) == 0);
+    assert_true(roaring_bitmap_get_cardinality(bm2) == 0);
+    assert_true(roaring_bitmap_is_empty(bm1));
+    assert_true(roaring_bitmap_is_empty(bm2));
     roaring_bitmap_add(bm1, 3);
     roaring_bitmap_add(bm2, 5);
-    assert(roaring_bitmap_get_cardinality(bm1) == 1);
-    assert(roaring_bitmap_get_cardinality(bm2) == 1);
-    assert(roaring_bitmap_contains(bm1,3));
-    assert(roaring_bitmap_contains(bm2,5));
-    assert(!roaring_bitmap_contains(bm2,3));
-    assert(!roaring_bitmap_contains(bm1,5));
+    assert_true(roaring_bitmap_get_cardinality(bm1) == 1);
+    assert_true(roaring_bitmap_get_cardinality(bm2) == 1);
+    assert_true(roaring_bitmap_contains(bm1,3));
+    assert_true(roaring_bitmap_contains(bm2,5));
+    assert_true(!roaring_bitmap_contains(bm2,3));
+    assert_true(!roaring_bitmap_contains(bm1,5));
     roaring_bitmap_free(bm1);
     roaring_bitmap_free(bm2);
 }
 
 bool check_serialization(roaring_bitmap_t *bitmap) {
-    const int32_t size = roaring_bitmap_portable_size_in_bytes(bitmap);
+    const size_t size = roaring_bitmap_portable_size_in_bytes(bitmap);
     char *data = (char *)malloc(size);
     roaring_bitmap_portable_serialize(bitmap, data);
     roaring_bitmap_t *deserializedBitmap = roaring_bitmap_portable_deserialize(data);
@@ -197,7 +197,7 @@ DEFINE_TEST(issue208) {
         roaring_bitmap_add(r, i);
     }
     uint32_t rank = roaring_bitmap_rank(r, 63);
-    assert(rank == 32);
+    assert_true(rank == 32);
     roaring_bitmap_free(r);
 }
 
@@ -212,7 +212,7 @@ DEFINE_TEST(issue208b) {
     for (uint32_t i = 65536 - 64; i < 65536; i++) {
         uint32_t expected = i - (65536 - 64) + 8196 / 2 + 1;
         uint32_t rank = roaring_bitmap_rank(r, i);
-        assert(rank == expected);
+        assert_true(rank == expected);
     }
     roaring_bitmap_free(r);
 }
@@ -245,12 +245,12 @@ void can_add_to_copies(bool copy_on_write) {
     roaring_bitmap_set_copy_on_write(bm1, copy_on_write);
     roaring_bitmap_add(bm1, 3);
     roaring_bitmap_t *bm2 = roaring_bitmap_copy(bm1);
-    assert(roaring_bitmap_get_cardinality(bm1) == 1);
-    assert(roaring_bitmap_get_cardinality(bm2) == 1);
+    assert_true(roaring_bitmap_get_cardinality(bm1) == 1);
+    assert_true(roaring_bitmap_get_cardinality(bm2) == 1);
     roaring_bitmap_add(bm2, 4);
     roaring_bitmap_add(bm1, 5);
-    assert(roaring_bitmap_get_cardinality(bm1) == 2);
-    assert(roaring_bitmap_get_cardinality(bm2) == 2);
+    assert_true(roaring_bitmap_get_cardinality(bm1) == 2);
+    assert_true(roaring_bitmap_get_cardinality(bm2) == 2);
     roaring_bitmap_free(bm1);
     roaring_bitmap_free(bm2);
 }
@@ -271,7 +271,7 @@ void convert_all_containers(roaring_bitmap_t* r, uint8_t dst_type) {
             r->high_low_container.containers[i] = dst_container;
             r->high_low_container.typecodes[i] = ARRAY_CONTAINER_TYPE;
         }
-        assert(r->high_low_container.typecodes[i] == ARRAY_CONTAINER_TYPE);
+        assert_true(r->high_low_container.typecodes[i] == ARRAY_CONTAINER_TYPE);
 
         // second step: convert ARRAY to dst_type
         if (dst_type == BITSET_CONTAINER_TYPE) {
@@ -287,7 +287,7 @@ void convert_all_containers(roaring_bitmap_t* r, uint8_t dst_type) {
             r->high_low_container.containers[i] = dst_container;
             r->high_low_container.typecodes[i] = RUN_CONTAINER_TYPE;
         }
-        assert(r->high_low_container.typecodes[i] == dst_type);
+        assert_true(r->high_low_container.typecodes[i] == dst_type);
     }
 }
 
@@ -519,7 +519,7 @@ DEFINE_TEST(check_iterate_to_end) {
     roaring_init_iterator(r1, &iterator);
     uint64_t count = 0;
     while(iterator.has_value) {
-      assert(iterator.current_value + (s - count) == bignumber);
+      assert_true(iterator.current_value + (s - count) == bignumber);
       count++;
       roaring_advance_uint32_iterator(&iterator);
     }
@@ -539,7 +539,7 @@ DEFINE_TEST(check_iterate_to_beginning) {
         uint64_t count = 0;
         while(iterator.has_value) {
             count++;
-            assert(iterator.current_value + count == bignumber);
+            assert_true(iterator.current_value + count == bignumber);
             roaring_previous_uint32_iterator(&iterator);
         }
         assert_true(count == s);
@@ -645,14 +645,14 @@ void test_example(bool copy_on_write) {
     // we can also go in reverse and go from arrays to bitmaps
     uint64_t card1 = roaring_bitmap_get_cardinality(r1);
     uint32_t *arr1 = (uint32_t *)malloc(card1 * sizeof(uint32_t));
-    assert(arr1 != NULL);
+    assert_true(arr1 != NULL);
     roaring_bitmap_to_uint32_array(r1, arr1);
 
     // we can go from arrays to bitmaps from "offset" by "limit"
     size_t offset = 100;
     size_t limit = 1000;
     uint32_t *arr3 = (uint32_t *)malloc(limit * sizeof(uint32_t));
-    assert(arr3 != NULL);
+    assert_true(arr3 != NULL);
     roaring_bitmap_range_uint32_array(r1, offset, limit, arr3);
     free(arr3);
 
@@ -721,10 +721,10 @@ void test_example(bool copy_on_write) {
     roaring_bitmap_free(t);
      // we can also check whether there is a bitmap at a memory location without reading it
     size_t sizeofbitmap = roaring_bitmap_portable_deserialize_size(serializedbytes,expectedsize);
-    assert(sizeofbitmap == expectedsize);  // sizeofbitmap would be zero if no bitmap were found
+    assert_true(sizeofbitmap == expectedsize);  // sizeofbitmap would be zero if no bitmap were found
     // we can also read the bitmap "safely" by specifying a byte size limit:
     t = roaring_bitmap_portable_deserialize_safe(serializedbytes,expectedsize);
-    assert(roaring_bitmap_equals(r1, t));  // what we recover is equal
+    assert_true(roaring_bitmap_equals(r1, t));  // what we recover is equal
     roaring_bitmap_free(t);
     free(serializedbytes);
 
@@ -904,19 +904,19 @@ void can_remove_from_copies(bool copy_on_write) {
     roaring_bitmap_set_copy_on_write(bm1, copy_on_write);
     roaring_bitmap_add(bm1, 3);
     roaring_bitmap_t *bm2 = roaring_bitmap_copy(bm1);
-    assert(roaring_bitmap_get_cardinality(bm1) == 1);
-    assert(roaring_bitmap_get_cardinality(bm2) == 1);
+    assert_true(roaring_bitmap_get_cardinality(bm1) == 1);
+    assert_true(roaring_bitmap_get_cardinality(bm2) == 1);
     roaring_bitmap_add(bm2, 4);
     roaring_bitmap_add(bm1, 5);
-    assert(roaring_bitmap_get_cardinality(bm1) == 2);
-    assert(roaring_bitmap_get_cardinality(bm2) == 2);
+    assert_true(roaring_bitmap_get_cardinality(bm1) == 2);
+    assert_true(roaring_bitmap_get_cardinality(bm2) == 2);
     roaring_bitmap_remove(bm1, 5);
-    assert(roaring_bitmap_get_cardinality(bm1) == 1);
+    assert_true(roaring_bitmap_get_cardinality(bm1) == 1);
     roaring_bitmap_remove(bm1, 4);
-    assert(roaring_bitmap_get_cardinality(bm1) == 1);
-    assert(roaring_bitmap_get_cardinality(bm2) == 2);
+    assert_true(roaring_bitmap_get_cardinality(bm1) == 1);
+    assert_true(roaring_bitmap_get_cardinality(bm2) == 2);
     roaring_bitmap_remove(bm2, 4);
-    assert(roaring_bitmap_get_cardinality(bm2) == 1);
+    assert_true(roaring_bitmap_get_cardinality(bm2) == 1);
     roaring_bitmap_free(bm1);
     roaring_bitmap_free(bm2);
 }
@@ -1265,8 +1265,8 @@ DEFINE_TEST(test_portable_serialize) {
     arr2 = (uint32_t *)malloc(card2 * sizeof(uint32_t));
     roaring_bitmap_to_uint32_array(r2, arr2);
 
-    assert(array_equals(arr1, card1, arr2, card2));
-    assert(roaring_bitmap_equals(r1, r2));
+    assert_true(array_equals(arr1, card1, arr2, card2));
+    assert_true(roaring_bitmap_equals(r1, r2));
     free(arr1);
     free(arr2);
     free(serialized);
@@ -1531,9 +1531,9 @@ DEFINE_TEST(test_intersection_array_x_array) {
 
 DEFINE_TEST(test_intersection_array_x_array_inplace) {
     roaring_bitmap_t *r1 = roaring_bitmap_create();
-    assert(r1);
+    assert_true(r1);
     roaring_bitmap_t *r2 = roaring_bitmap_create();
-    assert(r2);
+    assert_true(r2);
 
     for (uint32_t i = 0; i < 100; ++i) {
         roaring_bitmap_add(r1, 2 * i);
@@ -1554,9 +1554,9 @@ DEFINE_TEST(test_intersection_array_x_array_inplace) {
 
 DEFINE_TEST(test_intersection_bitset_x_bitset) {
     roaring_bitmap_t *r1 = roaring_bitmap_create();
-    assert(r1);
+    assert_true(r1);
     roaring_bitmap_t *r2 = roaring_bitmap_create();
-    assert(r2);
+    assert_true(r2);
 
     for (uint32_t i = 0; i < 20000; ++i) {
         roaring_bitmap_add(r1, 2 * i);
@@ -1586,9 +1586,9 @@ DEFINE_TEST(test_intersection_bitset_x_bitset) {
 
 DEFINE_TEST(test_intersection_bitset_x_bitset_inplace) {
     roaring_bitmap_t *r1 = roaring_bitmap_create();
-    assert(r1);
+    assert_true(r1);
     roaring_bitmap_t *r2 = roaring_bitmap_create();
-    assert(r2);
+    assert_true(r2);
 
     for (uint32_t i = 0; i < 20000; ++i) {
         roaring_bitmap_add(r1, 2 * i);
@@ -1612,10 +1612,10 @@ DEFINE_TEST(test_intersection_bitset_x_bitset_inplace) {
 void test_union(bool copy_on_write) {
     roaring_bitmap_t *r1 = roaring_bitmap_create();
     roaring_bitmap_set_copy_on_write(r1, copy_on_write);
-    assert(r1);
+    assert_true(r1);
     roaring_bitmap_t *r2 = roaring_bitmap_create();
     roaring_bitmap_set_copy_on_write(r2, copy_on_write);
-    assert(r2);
+    assert_true(r2);
 
     for (uint32_t i = 0; i < 100; ++i) {
         roaring_bitmap_add(r1, 2 * i);
@@ -1652,8 +1652,8 @@ static roaring_bitmap_t *gen_bitmap(double start_density,
     for (int i = 0; i < universe_size; i += run_length) {
         d = start_density + i * density_gradient;
         double r = our_rand() / (double)OUR_RAND_MAX;
-        assert(r <= 1.0);
-        assert(r >= 0);
+        assert_true(r <= 1.0);
+        assert_true(r >= 0);
         if (r < d && !(i >= blank_range_start && i < blank_range_end))
             for (int j = 0; j < run_length; ++j) roaring_bitmap_add(ans, i + j);
     }
@@ -2611,7 +2611,7 @@ DEFINE_TEST(test_bitset_to_run) {
     }
 
     roaring_bitmap_t *r1 = make_roaring_from_array(ans, ans_ctr);
-    assert(roaring_bitmap_run_optimize(r1));
+    assert_true(roaring_bitmap_run_optimize(r1));
 
     uint64_t card = roaring_bitmap_get_cardinality(r1);
     uint32_t *arr = (uint32_t *)malloc(card * sizeof(uint32_t));
@@ -3197,8 +3197,8 @@ DEFINE_TEST(test_rand_flips) {
             double f3 = our_rand() / (double)OUR_RAND_MAX;
             int pos = (int)(f1 * f2 * f3 *
                             range);  // denser at the start, sparser at end
-            assert(pos < range);
-            assert(pos >= 0);
+            assert_true(pos < range);
+            assert_true(pos >= 0);
             roaring_bitmap_add(r, pos);
             input[pos] = 1;
         }
@@ -3253,8 +3253,8 @@ DEFINE_TEST(test_inplace_rand_flips) {
             double f3 = our_rand() / (double)OUR_RAND_MAX;
             int pos = (int)(f1 * f2 * f3 *
                             range);  // denser at the start, sparser at end
-            assert(pos < range);
-            assert(pos >= 0);
+            assert_true(pos < range);
+            assert_true(pos >= 0);
             roaring_bitmap_add(r, pos);
             input[pos] = 1;
         }
@@ -3340,7 +3340,7 @@ DEFINE_TEST(select_test) {
             double f3 = our_rand() / (double)OUR_RAND_MAX;
             uint32_t pos = (uint32_t)(f1 * f2 * f3 *
                             range);  // denser at the start, sparser at end
-            assert(pos < range);
+            assert_true(pos < range);
             roaring_bitmap_add(r, pos);
             input[pos] = 1;
         }
@@ -3646,7 +3646,7 @@ void test_iterator_generate_data(uint32_t **values_out, uint32_t *count_out) {
     // max allowed value
     values[count++] = UINT32_MAX;
 
-    assert(count <= capacity);
+    assert_true(count <= capacity);
     *values_out = values;
     *count_out = count;
 }
@@ -3660,8 +3660,8 @@ void read_compare(roaring_bitmap_t* r, const uint32_t* ref_values, uint32_t ref_
     uint32_t* buffer = (uint32_t*)malloc(
             sizeof(uint32_t) * (step == UINT32_MAX ? 65536 : step));
     while (ref_count > 0) {
-        assert(iter->has_value == true);
-        assert(iter->current_value == ref_values[0]);
+        assert_true(iter->has_value == true);
+        assert_true(iter->current_value == ref_values[0]);
 
         uint32_t num_ask = step;
         if (step == UINT32_MAX) {
@@ -3676,20 +3676,20 @@ void read_compare(roaring_bitmap_t* r, const uint32_t* ref_values, uint32_t ref_
         }
 
         uint32_t num_got = roaring_read_uint32_iterator(iter, buffer, num_ask);
-        assert(num_got == minimum_uint32(num_ask, ref_count));
+        assert_true(num_got == minimum_uint32(num_ask, ref_count));
         for (uint32_t i = 0; i < num_got; i++) {
-            assert(ref_values[i] == buffer[i]);
+            assert_true(ref_values[i] == buffer[i]);
         }
         ref_values += num_got;
         ref_count -= num_got;
     }
 
-    assert(iter->has_value == false);
-    assert(iter->current_value == UINT32_MAX);
+    assert_true(iter->has_value == false);
+    assert_true(iter->current_value == UINT32_MAX);
 
-    assert(roaring_read_uint32_iterator(iter, buffer, step) == 0);
-    assert(iter->has_value == false);
-    assert(iter->current_value == UINT32_MAX);
+    assert_true(roaring_read_uint32_iterator(iter, buffer, step) == 0);
+    assert_true(iter->has_value == false);
+    assert_true(iter->current_value == UINT32_MAX);
 
     free(buffer);
     roaring_free_uint32_iterator(iter);
@@ -3750,13 +3750,13 @@ void test_previous_iterator(uint8_t type) {
     uint32_t count = 0;
 
     do {
-        assert(iterator.has_value);
+        assert_true(iterator.has_value);
         ++count;
-        assert((int64_t)ref_count - (int64_t)count >= 0); // sanity check
-        assert(ref_values[ref_count - count] == iterator.current_value);
+        assert_true((int64_t)ref_count - (int64_t)count >= 0); // sanity check
+        assert_true(ref_values[ref_count - count] == iterator.current_value);
     } while (roaring_previous_uint32_iterator(&iterator));
 
-    assert(ref_count == count);
+    assert_true(ref_count == count);
 
     roaring_bitmap_free(r);
     free(ref_values);
@@ -3795,11 +3795,11 @@ void test_iterator_reuse_retry_count(int retry_count){
     }
 
     // sanity checks
-    assert(roaring_bitmap_contains(with_edges, 0));
-    assert(roaring_bitmap_contains(with_edges, UINT32_MAX));
-    assert(!roaring_bitmap_contains(without_edges, 0));
-    assert(!roaring_bitmap_contains(without_edges, UINT32_MAX));
-    assert(roaring_bitmap_get_cardinality(with_edges) - 2 == roaring_bitmap_get_cardinality(without_edges));
+    assert_true(roaring_bitmap_contains(with_edges, 0));
+    assert_true(roaring_bitmap_contains(with_edges, UINT32_MAX));
+    assert_true(!roaring_bitmap_contains(without_edges, 0));
+    assert_true(!roaring_bitmap_contains(without_edges, UINT32_MAX));
+    assert_true(roaring_bitmap_get_cardinality(with_edges) - 2 == roaring_bitmap_get_cardinality(without_edges));
 
     const roaring_bitmap_t* bitmaps[] = {with_edges, without_edges};
     int num_bitmaps = sizeof(bitmaps) / sizeof(bitmaps[0]);
@@ -3807,7 +3807,7 @@ void test_iterator_reuse_retry_count(int retry_count){
     for (int i = 0; i < num_bitmaps; ++i){
         roaring_uint32_iterator_t iterator;
         roaring_init_iterator(bitmaps[i], &iterator);
-        assert(iterator.has_value);
+        assert_true(iterator.has_value);
         uint32_t first_value = iterator.current_value;
 
         uint32_t count = 0;
@@ -3815,7 +3815,7 @@ void test_iterator_reuse_retry_count(int retry_count){
             count++;
             roaring_advance_uint32_iterator(&iterator);
         }
-        assert(count == roaring_bitmap_get_cardinality(bitmaps[i]));
+        assert_true(count == roaring_bitmap_get_cardinality(bitmaps[i]));
 
         // Test advancing the iterator more times than necessary
         for (int retry = 0; retry < retry_count; ++retry) {
@@ -3829,7 +3829,7 @@ void test_iterator_reuse_retry_count(int retry_count){
             count++;
             roaring_previous_uint32_iterator(&iterator);
         }
-        assert(count == roaring_bitmap_get_cardinality(bitmaps[i]));
+        assert_true(count == roaring_bitmap_get_cardinality(bitmaps[i]));
 
         // Test decrement the iterator more times than necessary
         for (int retry = 0; retry < retry_count; ++retry) {
@@ -3837,8 +3837,8 @@ void test_iterator_reuse_retry_count(int retry_count){
         }
 
         roaring_advance_uint32_iterator(&iterator);
-        assert(iterator.has_value);
-        assert(first_value == iterator.current_value);
+        assert_true(iterator.has_value);
+        assert_true(first_value == iterator.current_value);
     }
 
 
@@ -4006,8 +4006,8 @@ DEFINE_TEST(test_add_range) {
         roaring_bitmap_set_copy_on_write(r1, true);
         roaring_bitmap_t *r2 = roaring_bitmap_copy(r1);
         roaring_bitmap_add_range(r1, 0, 1);
-        assert(roaring_bitmap_get_cardinality(r1) == 1);
-        assert(roaring_bitmap_get_cardinality(r2) == 1);
+        assert_true(roaring_bitmap_get_cardinality(r1) == 1);
+        assert_true(roaring_bitmap_get_cardinality(r2) == 1);
         roaring_bitmap_free(r2);
         roaring_bitmap_free(r1);
     }
@@ -4157,9 +4157,9 @@ DEFINE_TEST(test_remove_many) {
         sbs_add_range(sbs, 0, 65535);
         for (uint32_t v = 0; v <= 65535; v++) {
             sbs_remove_many(sbs, 1, &v);
-            assert(roaring_bitmap_get_cardinality(sbs->roaring) == 65535-v);
+            assert_true(roaring_bitmap_get_cardinality(sbs->roaring) == 65535-v);
         }
-        assert(sbs_is_empty(sbs));
+        assert_true(sbs_is_empty(sbs));
         sbs_free(sbs);
     }
 
@@ -4172,22 +4172,22 @@ DEFINE_TEST(test_range_cardinality) {
     roaring_bitmap_add_range(r, s*2, s*10);
 
     // single container (minhb == maxhb)
-    assert(roaring_bitmap_range_cardinality(r, s*2, s*3) == s);
-    assert(roaring_bitmap_range_cardinality(r, s*2+100, s*3) == s-100);
-    assert(roaring_bitmap_range_cardinality(r, s*2, s*3-200) == s-200);
-    assert(roaring_bitmap_range_cardinality(r, s*2+100, s*3-200) == s-300);
+    assert_true(roaring_bitmap_range_cardinality(r, s*2, s*3) == s);
+    assert_true(roaring_bitmap_range_cardinality(r, s*2+100, s*3) == s-100);
+    assert_true(roaring_bitmap_range_cardinality(r, s*2, s*3-200) == s-200);
+    assert_true(roaring_bitmap_range_cardinality(r, s*2+100, s*3-200) == s-300);
 
     // multiple containers (maxhb > minhb)
-    assert(roaring_bitmap_range_cardinality(r, s*2, s*5) == s*3);
-    assert(roaring_bitmap_range_cardinality(r, s*2+100, s*5) == s*3-100);
-    assert(roaring_bitmap_range_cardinality(r, s*2, s*5-200) == s*3-200);
-    assert(roaring_bitmap_range_cardinality(r, s*2+100, s*5-200) == s*3-300);
+    assert_true(roaring_bitmap_range_cardinality(r, s*2, s*5) == s*3);
+    assert_true(roaring_bitmap_range_cardinality(r, s*2+100, s*5) == s*3-100);
+    assert_true(roaring_bitmap_range_cardinality(r, s*2, s*5-200) == s*3-200);
+    assert_true(roaring_bitmap_range_cardinality(r, s*2+100, s*5-200) == s*3-300);
 
     // boundary checks
-    assert(roaring_bitmap_range_cardinality(r, s*20, s*21) == 0);
-    assert(roaring_bitmap_range_cardinality(r, 100, 100) == 0);
-    assert(roaring_bitmap_range_cardinality(r, 0, s*7) == s*5);
-    assert(roaring_bitmap_range_cardinality(r, s*7, UINT64_MAX) == s*3);
+    assert_true(roaring_bitmap_range_cardinality(r, s*20, s*21) == 0);
+    assert_true(roaring_bitmap_range_cardinality(r, 100, 100) == 0);
+    assert_true(roaring_bitmap_range_cardinality(r, 0, s*7) == s*5);
+    assert_true(roaring_bitmap_range_cardinality(r, s*7, UINT64_MAX) == s*3);
 
     roaring_bitmap_free(r);
 }
@@ -4200,8 +4200,8 @@ void frozen_serialization_compare(roaring_bitmap_t *r1) {
     const roaring_bitmap_t *r2 =
         roaring_bitmap_frozen_view(buf, num_bytes);
 
-    assert(roaring_bitmap_equals(r1, r2));
-    assert(roaring_bitmap_frozen_view(buf+1, num_bytes-1) == NULL);
+    assert_true(roaring_bitmap_equals(r1, r2));
+    assert_true(roaring_bitmap_frozen_view(buf+1, num_bytes-1) == NULL);
 
     roaring_bitmap_free(r1);
     roaring_bitmap_free(r2);
@@ -4232,7 +4232,7 @@ DEFINE_TEST(test_frozen_serialization_max_containers) {
     for (int64_t i = 0; i < 65536; i++) {
         roaring_bitmap_add(r, 65536 * i);
     }
-    assert(r->high_low_container.size == 65536);
+    assert_true(r->high_low_container.size == 65536);
     frozen_serialization_compare(r);
 }
 

From 690341827477d47c47910986f4070b3276fe01b9 Mon Sep 17 00:00:00 2001
From: Corey Kosak <kosak@users.noreply.github.com>
Date: Mon, 7 Nov 2022 14:30:04 -0500
Subject: [PATCH 017/162] Improve efficiency of outer map operations for
 Roaring64Map (#390)

* Improve efficiency of overloaded operators of outer map operations

* typo

* Change to snake case
---
 cpp/roaring64map.hh | 237 ++++++++++++++++++++++++++++++++++++++------
 tests/cpp_unit.cpp  |  98 ++++++++++++++++++
 2 files changed, 304 insertions(+), 31 deletions(-)

diff --git a/cpp/roaring64map.hh b/cpp/roaring64map.hh
index 1e51824c3..a769317cf 100644
--- a/cpp/roaring64map.hh
+++ b/cpp/roaring64map.hh
@@ -309,16 +309,58 @@ public:
     }
 
     /**
-     * Compute the intersection between the current bitmap and the provided
-     * bitmap, writing the result in the current bitmap. The provided bitmap
-     * is not modified.
+     * Compute the intersection of the current bitmap and the provided bitmap,
+     * writing the result in the current bitmap. The provided bitmap is not
+     * modified.
      */
-    Roaring64Map &operator&=(const Roaring64Map &r) {
-        for (auto &map_entry : roarings) {
-            if (r.roarings.count(map_entry.first) == 1)
-                map_entry.second &= r.roarings.at(map_entry.first);
-            else
-                map_entry.second = Roaring();
+    Roaring64Map &operator&=(const Roaring64Map &other) {
+        if (this == &other) {
+            // ANDing *this with itself is a no-op.
+            return *this;
+        }
+
+        // Logic table summarizing what to do when a given outer key is
+        // present vs. absent from self and other.
+        //
+        // self     other    (self & other)  work to do
+        // --------------------------------------------
+        // absent   absent   empty           None
+        // absent   present  empty           None
+        // present  absent   empty           Erase self
+        // present  present  empty or not    Intersect self with other, but
+        //                                   erase self if result is empty.
+        //
+        // Because there is only work to do when a key is present in 'self', the
+        // main for loop iterates over entries in 'self'.
+
+        decltype(roarings.begin()) self_next;
+        for (auto self_iter = roarings.begin(); self_iter != roarings.end();
+             self_iter = self_next) {
+            // Do the 'next' operation now, so we don't have to worry about
+            // invalidation of self_iter down below with the 'erase' operation.
+            self_next = std::next(self_iter);
+
+            auto self_key = self_iter->first;
+            auto &self_bitmap = self_iter->second;
+
+            auto other_iter = other.roarings.find(self_key);
+            if (other_iter == other.roarings.end()) {
+                // 'other' doesn't have self_key. In the logic table above,
+                // this reflects the case (self.present & other.absent).
+                // So, erase self.
+                roarings.erase(self_iter);
+                continue;
+            }
+
+            // Both sides have self_key. In the logic table above, this reflects
+            // the case (self.present & other.present). So, intersect self with
+            // other.
+            const auto &other_bitmap = other_iter->second;
+            self_bitmap &= other_bitmap;
+            if (self_bitmap.isEmpty()) {
+                // ...but if intersection is empty, remove it altogether.
+                roarings.erase(self_iter);
+            }
         }
         return *this;
     }
@@ -328,44 +370,177 @@ public:
      * bitmap, writing the result in the current bitmap. The provided bitmap
      * is not modified.
      */
-    Roaring64Map &operator-=(const Roaring64Map &r) {
-        for (auto &map_entry : roarings) {
-            if (r.roarings.count(map_entry.first) == 1)
-                map_entry.second -= r.roarings.at(map_entry.first);
+    Roaring64Map &operator-=(const Roaring64Map &other) {
+        if (this == &other) {
+            // Subtracting *this from itself results in the empty map.
+            roarings.clear();
+            return *this;
+        }
+
+        // Logic table summarizing what to do when a given outer key is
+        // present vs. absent from self and other.
+        //
+        // self     other    (self - other)  work to do
+        // --------------------------------------------
+        // absent   absent   empty           None
+        // absent   present  empty           None
+        // present  absent   unchanged       None
+        // present  present  empty or not    Subtract other from self, but
+        //                                   erase self if result is empty
+        //
+        // Because there is only work to do when a key is present in both 'self'
+        // and 'other', the main while loop ping-pongs back and forth until it
+        // finds the next key that is the same on both sides.
+
+        auto self_iter = roarings.begin();
+        auto other_iter = other.roarings.cbegin();
+
+        while (self_iter != roarings.end() &&
+               other_iter != other.roarings.cend()) {
+            auto self_key = self_iter->first;
+            auto other_key = other_iter->first;
+            if (self_key < other_key) {
+                // Because self_key is < other_key, advance self_iter to the
+                // first point where self_key >= other_key (or end).
+                self_iter = roarings.lower_bound(other_key);
+                continue;
+            }
+
+            if (self_key > other_key) {
+                // Because self_key is > other_key, advance other_iter to the
+                // first point where other_key >= self_key (or end).
+                other_iter = other.roarings.lower_bound(self_key);
+                continue;
+            }
+
+            // Both sides have self_key. In the logic table above, this reflects
+            // the case (self.present & other.present). So subtract other from
+            // self.
+            auto &self_bitmap = self_iter->second;
+            const auto &other_bitmap = other_iter->second;
+            self_bitmap -= other_bitmap;
+
+            if (self_bitmap.isEmpty()) {
+                // ...but if subtraction is empty, remove it altogether.
+                self_iter = roarings.erase(self_iter);
+            } else {
+                ++self_iter;
+            }
+            ++other_iter;
         }
         return *this;
     }
 
     /**
-     * Compute the union between the current bitmap and the provided bitmap,
+     * Compute the union of the current bitmap and the provided bitmap,
      * writing the result in the current bitmap. The provided bitmap is not
      * modified.
      *
      * See also the fastunion function to aggregate many bitmaps more quickly.
      */
-    Roaring64Map &operator|=(const Roaring64Map &r) {
-        for (const auto &map_entry : r.roarings) {
-            if (roarings.count(map_entry.first) == 0) {
-                roarings[map_entry.first] = map_entry.second;
-                roarings[map_entry.first].setCopyOnWrite(copyOnWrite);
-            } else
-                roarings[map_entry.first] |= map_entry.second;
+    Roaring64Map &operator|=(const Roaring64Map &other) {
+        if (this == &other) {
+            // ORing *this with itself is a no-op.
+            return *this;
+        }
+
+        // Logic table summarizing what to do when a given outer key is
+        // present vs. absent from self and other.
+        //
+        // self     other    (self | other)  work to do
+        // --------------------------------------------
+        // absent   absent   empty           None
+        // absent   present  not empty       Copy other to self and set flags
+        // present  absent   unchanged       None
+        // present  present  not empty       self |= other
+        //
+        // Because there is only work to do when a key is present in 'other',
+        // the main for loop iterates over entries in 'other'.
+
+        for (const auto &other_entry : other.roarings) {
+            const auto &other_bitmap = other_entry.second;
+
+            // Try to insert other_bitmap into self at other_key. We take
+            // advantage of the fact that std::map::insert will not overwrite an
+            // existing entry.
+            auto insert_result = roarings.insert(other_entry);
+            auto self_iter = insert_result.first;
+            auto insert_happened = insert_result.second;
+            auto &self_bitmap = self_iter->second;
+
+            if (insert_happened) {
+                // Key was not present in self, so insert was performed above.
+                // In the logic table above, this reflects the case
+                // (self.absent | other.present). Because the copy has already
+                // happened, thanks to the 'insert' operation above, we just
+                // need to set the copyOnWrite flag.
+                self_bitmap.setCopyOnWrite(copyOnWrite);
+                continue;
+            }
+
+            // Both sides have self_key, and the insert was not performed. In
+            // the logic table above, this reflects the case
+            // (self.present & other.present). So OR other into self.
+            self_bitmap |= other_bitmap;
         }
         return *this;
     }
 
     /**
-     * Compute the symmetric union between the current bitmap and the provided
-     * bitmap, writing the result in the current bitmap. The provided bitmap
-     * is not modified.
+     * Compute the XOR of the current bitmap and the provided bitmap, writing
+     * the result in the current bitmap. The provided bitmap is not modified.
      */
-    Roaring64Map &operator^=(const Roaring64Map &r) {
-        for (const auto &map_entry : r.roarings) {
-            if (roarings.count(map_entry.first) == 0) {
-                roarings[map_entry.first] = map_entry.second;
-                roarings[map_entry.first].setCopyOnWrite(copyOnWrite);
-            } else
-                roarings[map_entry.first] ^= map_entry.second;
+    Roaring64Map &operator^=(const Roaring64Map &other) {
+        if (this == &other) {
+            // XORing *this with itself results in the empty map.
+            roarings.clear();
+            return *this;
+        }
+
+        // Logic table summarizing what to do when a given outer key is
+        // present vs. absent from self and other.
+        //
+        // self     other    (self ^ other)  work to do
+        // --------------------------------------------
+        // absent   absent   empty           None
+        // absent   present  non-empty       Copy other to self and set flags
+        // present  absent   unchanged       None
+        // present  present  empty or not    XOR other into self, but erase self
+        //                                   if result is empty.
+        //
+        // Because there is only work to do when a key is present in 'other',
+        // the main for loop iterates over entries in 'other'.
+
+        for (const auto &other_entry : other.roarings) {
+            const auto &other_bitmap = other_entry.second;
+
+            // Try to insert other_bitmap into self at other_key. We take
+            // advantage of the fact that std::map::insert will not overwrite an
+            // existing entry.
+            auto insert_result = roarings.insert(other_entry);
+            auto self_iter = insert_result.first;
+            auto insert_happened = insert_result.second;
+            auto &self_bitmap = self_iter->second;
+
+            if (insert_happened) {
+                // Key was not present in self, so insert was performed above.
+                // In the logic table above, this reflects the case
+                // (self.absent ^ other.present). Because the copy has already
+                // happened, thanks to the 'insert' operation above, we just
+                // need to set the copyOnWrite flag.
+                self_bitmap.setCopyOnWrite(copyOnWrite);
+                continue;
+            }
+
+            // Both sides have self_key, and the insert was not performed. In
+            // the logic table above, this reflects the case
+            // (self.present ^ other.present). So XOR other into self.
+            self_bitmap ^= other_bitmap;
+
+            if (self_bitmap.isEmpty()) {
+                // ...but if intersection is empty, remove it altogether.
+                roarings.erase(self_iter);
+            }
         }
         return *this;
     }
diff --git a/tests/cpp_unit.cpp b/tests/cpp_unit.cpp
index 7fe65fadb..f01557456 100644
--- a/tests/cpp_unit.cpp
+++ b/tests/cpp_unit.cpp
@@ -10,6 +10,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <time.h>
+#include <random>
 #include <vector>
 
 
@@ -25,6 +26,8 @@ using roaring::Roaring;  // the C++ wrapper class
 #include "roaring64map.hh"
 using roaring::Roaring64Map;  // C++ class extended for 64-bit numbers
 
+#include "roaring64map_checked.hh"
+
 #include "test.h"
 
 static_assert(std::is_nothrow_move_constructible<Roaring>::value,
@@ -828,6 +831,97 @@ DEFINE_TEST(test_cpp_remove_range_64) {
     }
 }
 
+std::pair<doublechecked::Roaring64Map, doublechecked::Roaring64Map>
+    make_two_big_roaring64_maps() {
+    // Insert a large number of pseudorandom numbers into two sets.
+    const uint32_t randomSeed = 0xdeadbeef;
+    const size_t numValues = 1000000;  // 1 million
+
+    doublechecked::Roaring64Map roaring1;
+    doublechecked::Roaring64Map roaring2;
+
+    std::default_random_engine engine(randomSeed);
+    std::uniform_int_distribution<uint64_t> rng;
+
+    for (size_t i = 0; i < numValues; ++i) {
+        auto value = rng(engine);
+        auto choice = rng(engine) % 4;
+        switch (choice) {
+            case 0: {
+                // Value is added only to set 1.
+                roaring1.add(value);
+                break;
+            }
+
+            case 1: {
+                // Value is added only to set 2.
+                roaring2.add(value);
+                break;
+            }
+
+            case 2: {
+                // Value is added to both sets.
+                roaring1.add(value);
+                roaring2.add(value);
+                break;
+            }
+
+            case 3: {
+                // Value is added to set 1, and a slightly different value
+                // is added to set 2. This makes it likely that they are in
+                // the same "outer" bin, but at a different "inner" position.
+                roaring1.add(value);
+                roaring2.add(value + 1);
+                break;
+            }
+
+            default:
+                assert_true(false);
+        }
+    }
+    return std::make_pair(std::move(roaring1), std::move(roaring2));
+}
+
+DEFINE_TEST(test_cpp_union_64) {
+    auto two_maps = make_two_big_roaring64_maps();
+
+    auto &lhs = two_maps.first;
+    const auto &rhs = two_maps.second;
+
+    lhs |= rhs;
+    assert_true(lhs.does_std_set_match_roaring());
+}
+
+DEFINE_TEST(test_cpp_intersect_64) {
+    auto two_maps = make_two_big_roaring64_maps();
+
+    auto &lhs = two_maps.first;
+    const auto &rhs = two_maps.second;
+
+    lhs &= rhs;
+    assert_true(lhs.does_std_set_match_roaring());
+}
+
+DEFINE_TEST(test_cpp_difference_64) {
+    auto two_maps = make_two_big_roaring64_maps();
+
+    auto &lhs = two_maps.first;
+    const auto &rhs = two_maps.second;
+
+    lhs -= rhs;
+    assert_true(lhs.does_std_set_match_roaring());
+}
+
+DEFINE_TEST(test_cpp_xor_64) {
+    auto two_maps = make_two_big_roaring64_maps();
+
+    auto &lhs = two_maps.first;
+    const auto &rhs = two_maps.second;
+
+    lhs ^= rhs;
+    assert_true(lhs.does_std_set_match_roaring());
+}
+
 DEFINE_TEST(test_cpp_clear_64) {
     Roaring64Map roaring;
 
@@ -1218,6 +1312,10 @@ int main() {
         cmocka_unit_test(test_run_compression_cpp_64_false),
         cmocka_unit_test(test_run_compression_cpp_true),
         cmocka_unit_test(test_run_compression_cpp_false),
+        cmocka_unit_test(test_cpp_union_64),
+        cmocka_unit_test(test_cpp_intersect_64),
+        cmocka_unit_test(test_cpp_difference_64),
+        cmocka_unit_test(test_cpp_xor_64),
         cmocka_unit_test(test_cpp_clear_64),
         cmocka_unit_test(test_cpp_move_64),
         cmocka_unit_test(test_roaring64_iterate_multi_roaring),

From df61baee67c8ac6393c39993d0138552992de0f5 Mon Sep 17 00:00:00 2001
From: Uku Raudvere <u.raudvere@gmail.com>
Date: Tue, 8 Nov 2022 15:39:54 +0200
Subject: [PATCH 018/162] Use ISO 8601 UTC dates as amalgamation timestamps
 (#401)

---
 amalgamation.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/amalgamation.sh b/amalgamation.sh
index 849f7327d..ed3e54000 100755
--- a/amalgamation.sh
+++ b/amalgamation.sh
@@ -5,7 +5,7 @@
 ########################################################################
 SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
 
-timestamp=$(date)  # capture to label files with their generation time
+timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ")  # capture to label files with their generation time
 
 function newline {
     echo ""

From d569c3cc5abd679860d6096db8c9a6bb87508b25 Mon Sep 17 00:00:00 2001
From: Corey Kosak <kosak@users.noreply.github.com>
Date: Tue, 8 Nov 2022 17:08:34 -0500
Subject: [PATCH 019/162] Improve remove-type operations (#398)

* Improve remove-type operations

* Respond to review feedback.

* snake case
---
 cpp/roaring64map.hh           | 161 ++++++++++++++++++++++++++++------
 tests/cpp_unit.cpp            |  89 ++++++++++++++++++-
 tests/roaring64map_checked.hh |  27 ++++--
 3 files changed, 244 insertions(+), 33 deletions(-)

diff --git a/cpp/roaring64map.hh b/cpp/roaring64map.hh
index a769317cf..6dfe6ec35 100644
--- a/cpp/roaring64map.hh
+++ b/cpp/roaring64map.hh
@@ -182,31 +182,73 @@ public:
     }
 
     /**
-     * Remove value x
+     * Removes value x.
+     */
+    void remove(uint32_t x) {
+        auto iter = roarings.begin();
+        // Since x is a uint32_t, highbytes(x) == 0. The inner bitmap we are
+        // looking for, if it exists, will be at the first slot of 'roarings'.
+        if (iter == roarings.end() || iter->first != 0) {
+            return;
+        }
+        auto &bitmap = iter->second;
+        bitmap.remove(x);
+        eraseIfEmpty(iter);
+    }
+
+    /**
+     * Removes value x.
      */
-    void remove(uint32_t x) { roarings[0].remove(x); }
     void remove(uint64_t x) {
-        auto roaring_iter = roarings.find(highBytes(x));
-        if (roaring_iter != roarings.cend())
-            roaring_iter->second.remove(lowBytes(x));
+        auto iter = roarings.find(highBytes(x));
+        if (iter == roarings.end()) {
+            return;
+        }
+        auto &bitmap = iter->second;
+        bitmap.remove(lowBytes(x));
+        eraseIfEmpty(iter);
     }
 
     /**
-     * Remove value x
-     * Returns true if a new value was removed, false if the value was not existing.
+     * Removes value x
+     * Returns true if a new value was removed, false if the value was not
+     * present.
      */
     bool removeChecked(uint32_t x) {
-        return roarings[0].removeChecked(x);
+        auto iter = roarings.begin();
+        // Since x is a uint32_t, highbytes(x) == 0. The inner bitmap we are
+        // looking for, if it exists, will be at the first slot of 'roarings'.
+        if (iter == roarings.end() || iter->first != 0) {
+            return false;
+        }
+        auto &bitmap = iter->second;
+        if (!bitmap.removeChecked(x)) {
+            return false;
+        }
+        eraseIfEmpty(iter);
+        return true;
     }
+
+    /**
+     * Remove value x
+     * Returns true if a new value was removed, false if the value was not
+     * present.
+     */
     bool removeChecked(uint64_t x) {
-        auto roaring_iter = roarings.find(highBytes(x));
-        if (roaring_iter != roarings.cend())
-            return roaring_iter->second.removeChecked(lowBytes(x));
-        return false;
+        auto iter = roarings.find(highBytes(x));
+        if (iter == roarings.end()) {
+            return false;
+        }
+        auto &bitmap = iter->second;
+        if (!bitmap.removeChecked(lowBytes(x))) {
+            return false;
+        }
+        eraseIfEmpty(iter);
+        return true;
     }
 
     /**
-     * Remove all values in range [min, max)
+     * Removes all values in the half-open interval [min, max).
      */
     void removeRange(uint64_t min, uint64_t max) {
         if (min >= max) {
@@ -216,11 +258,24 @@ public:
     }
 
     /**
-     * Remove all values in range [min, max]
+     * Removes all values in the closed interval [min, max].
      */
     void removeRangeClosed(uint32_t min, uint32_t max) {
-        return roarings[0].removeRangeClosed(min, max);
+        auto iter = roarings.begin();
+        // Since min and max are uint32_t, highbytes(min or max) == 0. The inner
+        // bitmap we are looking for, if it exists, will be at the first slot of
+        // 'roarings'.
+        if (iter == roarings.end() || iter->first != 0) {
+            return;
+        }
+        auto &bitmap = iter->second;
+        bitmap.removeRangeClosed(min, max);
+        eraseIfEmpty(iter);
     }
+
+    /**
+     * Removes all values in the closed interval [min, max].
+     */
     void removeRangeClosed(uint64_t min, uint64_t max) {
         if (min > max) {
             return;
@@ -230,35 +285,75 @@ public:
         uint32_t end_high = highBytes(max);
         uint32_t end_low = lowBytes(max);
 
+        // We put std::numeric_limits<>::max in parentheses to avoid a
+        // clash with the Windows.h header under Windows.
+        const uint32_t uint32_max = (std::numeric_limits<uint32_t>::max)();
+
+        // If the outer map is empty, end_high is less than the first key,
+        // or start_high is greater than the last key, then exit now because
+        // there is no work to do.
         if (roarings.empty() || end_high < roarings.cbegin()->first ||
             start_high > (roarings.crbegin())->first) {
             return;
         }
 
+        // If we get here, start_iter points to the first entry in the outer map
+        // with key >= start_high. Such an entry is known to exist (i.e. the
+        // iterator will not be equal to end()) because start_high <= the last
+        // key in the map (thanks to the above if statement).
         auto start_iter = roarings.lower_bound(start_high);
+        // end_iter points to the first entry in the outer map with
+        // key >= end_high, if such a key exists. Otherwise, it equals end().
         auto end_iter = roarings.lower_bound(end_high);
+
+        // Note that the 'lower_bound' method will find the start and end slots,
+        // if they exist; otherwise it will find the next-higher slots.
+        // In the case where 'start' landed on an existing slot, we need to do a
+        // partial erase of that slot, and likewise for 'end'. But all the slots
+        // in between can be fully erased. More precisely:
+        //
+        // 1. If the start point falls on an existing entry, there are two
+        //    subcases:
+        //    a. if the end point falls on that same entry, remove the closed
+        //       interval [start_low, end_low] from that entry and we are done.
+        //    b. Otherwise, remove the closed interval [start_low, uint32_max]
+        //       from that entry, advance start_iter, and fall through to step 2.
+        // 2. Completely erase all slots in the half-open interval
+        //    [start_iter, end_iter)
+        // 3. If the end point falls on an existing entry, remove the closed
+        //    interval [0, end_high] from it.
+
+        // Step 1. If the start point falls on an existing entry...
         if (start_iter->first == start_high) {
+            auto &start_inner = start_iter->second;
+            // 1a. if the end point falls on that same entry...
             if (start_iter == end_iter) {
-                start_iter->second.removeRangeClosed(start_low, end_low);
+                start_inner.removeRangeClosed(start_low, end_low);
+                eraseIfEmpty(start_iter);
                 return;
             }
-            // we put std::numeric_limits<>::max/min in parenthesis
-            // to avoid a clash with the Windows.h header under Windows
-            start_iter->second.removeRangeClosed(
-                start_low, (std::numeric_limits<uint32_t>::max)());
-            start_iter++;
+
+            // 1b. Otherwise, remove the closed range [start_low, uint32_max]...
+            start_inner.removeRangeClosed(start_low, uint32_max);
+            // Advance start_iter, but keep the old value so we can check the
+            // bitmap we just modified for emptiness and erase if it necessary.
+            auto temp = start_iter++;
+            eraseIfEmpty(temp);
         }
 
+        // 2. Completely erase all slots in the half-open interval...
         roarings.erase(start_iter, end_iter);
 
-        if (end_iter != roarings.cend() && end_iter->first == end_high) {
-            end_iter->second.removeRangeClosed(
-                (std::numeric_limits<uint32_t>::min)(), end_low);
+        // 3. If the end point falls on an existing entry...
+        if (end_iter != roarings.end() && end_iter->first == end_high) {
+            auto &end_inner = end_iter->second;
+            end_inner.removeRangeClosed(0, end_low);
+            eraseIfEmpty(end_iter);
         }
     }
 
     /**
-     * Clear the bitmap
+     * Clears the bitmap.
      */
     void clear() {
         roarings.clear();
@@ -1225,7 +1320,8 @@ public:
     const_iterator end() const;
 
 private:
-    std::map<uint32_t, Roaring> roarings{}; // The empty constructor silences warnings from pedantic static analyzers.
+    typedef std::map<uint32_t, Roaring> roarings_t;
+    roarings_t roarings{}; // The empty constructor silences warnings from pedantic static analyzers.
     bool copyOnWrite{false};
     static uint32_t highBytes(const uint64_t in) { return uint32_t(in >> 32); }
     static uint32_t lowBytes(const uint64_t in) { return uint32_t(in); }
@@ -1250,6 +1346,17 @@ private:
         roarings.emplace(key, std::move(value));
 #endif
     }
+
+    /**
+     * Erases the entry pointed to by 'iter' from the 'roarings' map. Warning:
+     * this invalidates 'iter'.
+     */
+    void eraseIfEmpty(roarings_t::iterator iter) {
+        const auto &bitmap = iter->second;
+        if (bitmap.isEmpty()) {
+            roarings.erase(iter);
+        }
+    }
 };
 
 /**
@@ -1259,7 +1366,7 @@ class Roaring64MapSetBitForwardIterator {
 public:
     typedef std::forward_iterator_tag iterator_category;
     typedef uint64_t *pointer;
-    typedef uint64_t &reference_type;
+    typedef uint64_t &reference;
     typedef uint64_t value_type;
     typedef int64_t difference_type;
     typedef Roaring64MapSetBitForwardIterator type_of_iterator;
diff --git a/tests/cpp_unit.cpp b/tests/cpp_unit.cpp
index f01557456..a2f7598d1 100644
--- a/tests/cpp_unit.cpp
+++ b/tests/cpp_unit.cpp
@@ -10,6 +10,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <time.h>
+#include <algorithm>
 #include <random>
 #include <vector>
 
@@ -229,6 +230,67 @@ void test_roaring64_iterate_multi_roaring(void) {
     assert_true(iterate_count == 2);
 }
 
+namespace {
+bool roaringEqual(const Roaring64Map &actual,
+                  std::initializer_list<uint64_t> expected) {
+    return expected.size() == actual.cardinality() &&
+           std::equal(expected.begin(), expected.end(), actual.begin());
+}
+}  // namespace
+
+DEFINE_TEST(test_roaring64_remove_32) {
+    Roaring64Map roaring;
+
+    // A specific test to make sure we don't get slots confused.
+    // Specifically, we make Roaring64Map with only one slot (namely slot 5)
+    // with values {100, 200, 300} in its inner bitmap. Then we do a 32-bit
+    // remove of 100 from slot 0. A correct implementation of 'remove' would
+    // be a no-op.
+    const uint64_t b5 = uint64_t(5) << 32;
+    Roaring64Map r;
+    r.add(b5 + 100);
+    r.add(b5 + 200);
+    r.add(b5 + 300);
+    r.remove(uint32_t(100));
+
+    // No change
+    assert_true(roaringEqual(r, {b5 + 100, b5 + 200, b5 + 300}));
+}
+
+DEFINE_TEST(test_roaring64_add_and_remove) {
+    Roaring64Map r;
+
+    const uint64_t b5 = uint64_t(5) << 32;
+
+    // 32-bit adds
+    r.add(300u);
+    r.add(200u);
+    r.add(100u);
+    assert_true(roaringEqual(r, {100, 200, 300}));
+
+    // 64-bit adds
+    r.add(uint64_t(200));  // Duplicate
+    r.add(uint64_t(400));  // New
+    r.add(b5 + 400);  // All new
+    r.add(b5 + 300);
+    r.add(b5 + 200);
+    r.add(b5 + 100);
+    assert_true(roaringEqual(r,
+        {100, 200, 300, 400, b5 + 100, b5 + 200, b5 + 300, b5 + 400}));
+
+    // 32-bit removes
+    r.remove(200u);  // Exists.
+    r.remove(500u);  // Doesn't exist
+    assert_true(roaringEqual(r,
+        {100, 300, 400, b5 + 100, b5 + 200, b5 + 300, b5 + 400}));
+
+    // 64-bit removes
+    r.remove(b5 + 100);  // Exists.
+    r.remove(b5 + 500);  // Doesn't exist
+    assert_true(roaringEqual(r,
+        {100, 300, 400, b5 + 200, b5 + 300, b5 + 400}));
+}
+
 DEFINE_TEST(test_roaring64_iterate_multi_roaring) {
     test_roaring64_iterate_multi_roaring();
 }
@@ -736,7 +798,7 @@ DEFINE_TEST(test_cpp_add_range_64) {
     }
 }
 
-DEFINE_TEST(test_cpp_remove_range_64) {
+DEFINE_TEST(test_cpp_remove_range_closed_64) {
     {
         // 32-bit integers
         Roaring64Map r1 =
@@ -831,6 +893,28 @@ DEFINE_TEST(test_cpp_remove_range_64) {
     }
 }
 
+DEFINE_TEST(test_cpp_remove_range_64) {
+    // Because removeRange delegates to removeRangeClosed, we do most of the
+    // unit testing in test_cpp_remove_range_closed_64(). We just do a couple of
+    // sanity checks here.
+    Roaring64Map r1;
+    auto b5 = uint64_t(5) << 32;
+
+    auto uint64_max = std::numeric_limits<uint64_t>::max();
+
+    r1.add(0u);  // 32-bit add
+    r1.add(b5 + 1000);  // arbitrary 64 bit add
+    r1.add(b5 + 1001);  // arbitrary 64 bit add
+    r1.add(uint64_max - 1000);
+    r1.add(uint64_max);  // highest possible bit
+
+    // Half-open interval: result should be the set {0, maxUint64}
+    r1.removeRange(1, uint64_max);
+
+    Roaring64Map r2 = Roaring64Map::bitmapOf(2, uint64_t(0), uint64_max);
+    assert_true(r1 == r2);
+}
+
 std::pair<doublechecked::Roaring64Map, doublechecked::Roaring64Map>
     make_two_big_roaring64_maps() {
     // Insert a large number of pseudorandom numbers into two sets.
@@ -1307,6 +1391,7 @@ int main() {
         cmocka_unit_test(test_cpp_add_range),
         cmocka_unit_test(test_cpp_remove_range),
         cmocka_unit_test(test_cpp_add_range_64),
+        cmocka_unit_test(test_cpp_remove_range_closed_64),
         cmocka_unit_test(test_cpp_remove_range_64),
         cmocka_unit_test(test_run_compression_cpp_64_true),
         cmocka_unit_test(test_run_compression_cpp_64_false),
@@ -1319,6 +1404,8 @@ int main() {
         cmocka_unit_test(test_cpp_clear_64),
         cmocka_unit_test(test_cpp_move_64),
         cmocka_unit_test(test_roaring64_iterate_multi_roaring),
+        cmocka_unit_test(test_roaring64_remove_32),
+        cmocka_unit_test(test_roaring64_add_and_remove),
         cmocka_unit_test(test_cpp_bidirectional_iterator_64),
         cmocka_unit_test(test_cpp_frozen),
         cmocka_unit_test(test_cpp_frozen_64),
diff --git a/tests/roaring64map_checked.hh b/tests/roaring64map_checked.hh
index 8455d3e9b..2a3c97cb7 100644
--- a/tests/roaring64map_checked.hh
+++ b/tests/roaring64map_checked.hh
@@ -172,22 +172,39 @@ class Roaring64Map {
         return ans;
     }
 
-    void removeRange(const uint64_t x, const uint64_t y) {
-        if (x != y) {  // repeat remove_range_closed() cast and bounding logic
-            removeRangeClosed(x, y - 1);
+    void removeRange(const uint64_t min, const uint64_t max) {
+        plain.removeRange(min, max);
+        if (min < max) {
+            // Points to the first entry with key >= min, or end
+            auto start = check.lower_bound(min);
+            // Points to the first entry with key >= max, or end.
+            auto end = check.lower_bound(max);
+            // Removes the half-open interval [start, end) (i.e. does not include max).
+            check.erase(start, end);
         }
     }
 
     void removeRangeClosed(uint32_t min, uint32_t max) {
         plain.removeRangeClosed(min, max);
         if (min <= max) {
-            check.erase(check.lower_bound(min), check.upper_bound(max));
+            // Points to the first entry with key >= min, or end
+            auto start = check.lower_bound(min);
+            // Points to the first entry with key > max, or end.
+            auto end = check.upper_bound(max);
+            // Removes the half-open interval [start, end) (i.e. includes max).
+            check.erase(start, end);
         }
     }
+
     void removeRangeClosed(uint64_t min, uint64_t max) {
         plain.removeRangeClosed(min, max);
         if (min <= max) {
-            check.erase(check.lower_bound(min), check.upper_bound(max));
+            // Points to the first entry with key >= min, or end
+            auto start = check.lower_bound(min);
+            // Points to the first entry with key > max, or end.
+            auto end = check.upper_bound(max);
+            // Removes the half-open interval [start, end) (i.e. includes max).
+            check.erase(start, end);
         }
     }
 

From cea238c86c1334c1dce99f95dc070612cf63815e Mon Sep 17 00:00:00 2001
From: Corey Kosak <kosak@users.noreply.github.com>
Date: Tue, 8 Nov 2022 19:46:01 -0500
Subject: [PATCH 020/162] Improve the code readability of
 Roaring64Map::printf() and Roaring64Map::toString() (#399)

---
 cpp/roaring64map.hh | 120 ++++++++++++++++----------------------------
 tests/cpp_unit.cpp  |  38 ++++++++++++++
 2 files changed, 81 insertions(+), 77 deletions(-)

diff --git a/cpp/roaring64map.hh b/cpp/roaring64map.hh
index 6dfe6ec35..0af6f35f7 100644
--- a/cpp/roaring64map.hh
+++ b/cpp/roaring64map.hh
@@ -5,10 +5,12 @@ A C++ header for 64-bit Roaring Bitmaps, implemented by way of a map of many
 #ifndef INCLUDE_ROARING_64_MAP_HH_
 #define INCLUDE_ROARING_64_MAP_HH_
 
+#include <inttypes.h>
 #include <algorithm>
 #include <cstdarg>  // for va_list handling in bitmapOf()
 #include <cstdio>  // for std::printf() in the printf() method
 #include <cstring>  // for std::memcpy()
+#include <functional>
 #include <limits>
 #include <map>
 #include <new>
@@ -1193,90 +1195,27 @@ public:
     }
 
     /**
-     * Print the content of the bitmap
+     * Print the contents of the bitmap to stdout.
+     * Note: this method adds a final newline, but toString() does not.
      */
     void printf() const {
-        if (!isEmpty()) {
-            auto map_iter = roarings.cbegin();
-            while (map_iter->second.isEmpty()) ++map_iter;
-            struct iter_data {
-                uint32_t high_bits{};
-                char first_char{'{'};
-            } outer_iter_data;
-            outer_iter_data.high_bits = roarings.begin()->first;
-            map_iter->second.iterate(
-                [](uint32_t low_bits, void *inner_iter_data) -> bool {
-                    std::printf("%c%llu",
-                                ((iter_data *)inner_iter_data)->first_char,
-                                (long long unsigned)uniteBytes(
-                                    ((iter_data *)inner_iter_data)->high_bits,
-                                    low_bits));
-                    ((iter_data *)inner_iter_data)->first_char = ',';
-                    return true;
-                },
-                (void *)&outer_iter_data);
-            std::for_each(
-                ++map_iter, roarings.cend(),
-                [](const std::pair<const uint32_t, Roaring> &map_entry) {
-                    map_entry.second.iterate(
-                        [](uint32_t low_bits, void *high_bits) -> bool {
-                            std::printf(",%llu",
-                                        (long long unsigned)uniteBytes(
-                                            *(uint32_t *)high_bits, low_bits));
-                            return true;
-                        },
-                        (void *)&map_entry.first);
-                });
-        } else
-            std::printf("{");
-        std::printf("}\n");
+        auto sink = [](const std::string &s) {
+            fputs(s.c_str(), stdout);
+        };
+        printToSink(sink);
+        sink("\n");
     }
 
     /**
-     * Print the content of the bitmap into a string
+     * Print the contents of the bitmap into a string.
      */
     std::string toString() const {
-        struct iter_data {
-            std::string str{}; // The empty constructor silences warnings from pedantic static analyzers.
-            uint32_t high_bits{0};
-            char first_char{'{'};
-        } outer_iter_data;
-        if (!isEmpty()) {
-            auto map_iter = roarings.cbegin();
-            while (map_iter->second.isEmpty()) ++map_iter;
-            outer_iter_data.high_bits = roarings.begin()->first;
-            map_iter->second.iterate(
-                [](uint32_t low_bits, void *inner_iter_data) -> bool {
-                    ((iter_data *)inner_iter_data)->str +=
-                        ((iter_data *)inner_iter_data)->first_char;
-                    ((iter_data *)inner_iter_data)->str += std::to_string(
-                        uniteBytes(((iter_data *)inner_iter_data)->high_bits,
-                                   low_bits));
-                    ((iter_data *)inner_iter_data)->first_char = ',';
-                    return true;
-                },
-                (void *)&outer_iter_data);
-            std::for_each(
-                ++map_iter, roarings.cend(),
-                [&outer_iter_data](
-                    const std::pair<const uint32_t, Roaring> &map_entry) {
-                    outer_iter_data.high_bits = map_entry.first;
-                    map_entry.second.iterate(
-                        [](uint32_t low_bits, void *inner_iter_data) -> bool {
-                            ((iter_data *)inner_iter_data)->str +=
-                                ((iter_data *)inner_iter_data)->first_char;
-                            ((iter_data *)inner_iter_data)->str +=
-                                std::to_string(uniteBytes(
-                                    ((iter_data *)inner_iter_data)->high_bits,
-                                    low_bits));
-                            return true;
-                        },
-                        (void *)&outer_iter_data);
-                });
-        } else
-            outer_iter_data.str = '{';
-        outer_iter_data.str += '}';
-        return outer_iter_data.str;
+        std::string result;
+        auto sink = [&result](const std::string &s) {
+            result += s;
+        };
+        printToSink(sink);
+        return result;
     }
 
     /**
@@ -1347,6 +1286,33 @@ private:
 #endif
     }
 
+    /**
+     * Prints the contents of the bitmap to a caller-provided sink function.
+     */
+    void printToSink(const std::function<void(const std::string &)> &sink) const {
+        sink("{");
+
+        // Storage for snprintf. Big enough to store the decimal representation
+        // of the largest uint64_t value and trailing \0.
+        char buffer[32];
+        const char *separator = "";
+        // Reusable, and therefore avoids many repeated heap allocations.
+        std::string callback_string;
+        for (const auto &entry : roarings) {
+            auto high_bits = entry.first;
+            const auto &bitmap = entry.second;
+            for (const auto low_bits : bitmap) {
+                auto value = uniteBytes(high_bits, low_bits);
+                snprintf(buffer, sizeof(buffer), "%" PRIu64, value);
+                callback_string = separator;
+                callback_string.append(buffer);
+                sink(callback_string);
+                separator = ",";
+            }
+        }
+        sink("}");
+    }
+
     /**
      * Erases the entry pointed to by 'iter' from the 'roarings' map. Warning:
      * this invalidates 'iter'.
diff --git a/tests/cpp_unit.cpp b/tests/cpp_unit.cpp
index a2f7598d1..7ff47a3ce 100644
--- a/tests/cpp_unit.cpp
+++ b/tests/cpp_unit.cpp
@@ -1307,6 +1307,43 @@ DEFINE_TEST(test_cpp_is_subset_64) {
   assert_true(r3.isSubset(r2));
 }
 
+DEFINE_TEST(test_cpp_to_string) {
+    // test toString
+    const auto b5 = uint64_t(5) << 32;
+    const auto uint32_max = std::numeric_limits<uint32_t>::max();
+    const auto uint64_max = std::numeric_limits<uint64_t>::max();
+
+    {
+        // 32-bit test.
+        Roaring a;
+        assert_string_equal("{}", a.toString().c_str());
+
+        a.add(1);
+        assert_string_equal("{1}", a.toString().c_str());
+
+        a.add(2);
+        a.add(3);
+        a.add(uint32_max);
+        assert_string_equal("{1,2,3,4294967295}", a.toString().c_str());
+    }
+
+    {
+        // 64-bit test.
+        Roaring64Map r;
+        assert_string_equal("{}", r.toString().c_str());
+
+        r.add(b5 + 100);
+        assert_string_equal("{21474836580}", r.toString().c_str());
+
+        r.add(1u);
+        r.add(2u);
+        r.add(uint32_max);
+        r.add(uint64_max);
+        assert_string_equal("{1,2,4294967295,21474836580,18446744073709551615}",
+                            r.toString().c_str());
+    }
+}
+
 DEFINE_TEST(test_cpp_remove_run_compression) {
   Roaring r;
   uint32_t max = (std::numeric_limits<uint32_t>::max)();
@@ -1426,6 +1463,7 @@ int main() {
         cmocka_unit_test(issue_336),
         cmocka_unit_test(issue_372),
         cmocka_unit_test(test_cpp_is_subset_64),
+        cmocka_unit_test(test_cpp_to_string),
         cmocka_unit_test(test_cpp_remove_run_compression),
     };
     return cmocka_run_group_tests(tests, NULL, NULL);

From 4dbe48f5437c854f13ab82860e9b7331e13b861b Mon Sep 17 00:00:00 2001
From: Corey Kosak <kosak@users.noreply.github.com>
Date: Thu, 10 Nov 2022 11:24:03 -0500
Subject: [PATCH 021/162] RoaringMap64::select(): remove illegal pointer cast;
 throw exception on unreachable code path. (#400)

* RoaringMap64::select(): remove illegal pointer cast; throw exception on unreachable code path.

* Respond to review feedback: ROARING_TERMINATE

* Respond to review feedback
---
 cpp/roaring64map.hh | 31 ++++++++++++++++++++-----------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/cpp/roaring64map.hh b/cpp/roaring64map.hh
index 0af6f35f7..a1786eaf9 100644
--- a/cpp/roaring64map.hh
+++ b/cpp/roaring64map.hh
@@ -898,20 +898,29 @@ public:
     }
 
     /**
-     * If the size of the roaring bitmap is strictly greater than rank, then
-     * this function returns true and set element to the element of given
-     * rank.  Otherwise, it returns false.
+     * Selects the value at index 'rank' in the bitmap, where the smallest value
+     * is at index 0. If 'rank' < cardinality(), returns true with *element set
+     * to the element of the specified rank. Otherwise, returns false and the
+     * contents of *element are unspecified.
      */
-    bool select(uint64_t rnk, uint64_t *element) const {
+    bool select(uint64_t rank, uint64_t *element) const {
         for (const auto &map_entry : roarings) {
-            uint64_t sub_cardinality = (uint64_t)map_entry.second.cardinality();
-            if (rnk < sub_cardinality) {
-                *element = ((uint64_t)map_entry.first) << 32;
-                // assuming little endian
-                return map_entry.second.select((uint32_t)rnk,
-                                               ((uint32_t *)element));
+            auto key = map_entry.first;
+            const auto &bitmap = map_entry.second;
+
+            uint64_t sub_cardinality = bitmap.cardinality();
+            if (rank < sub_cardinality) {
+                uint32_t low_bytes;
+                // Casting rank to uint32_t is safe because
+                // rank < sub_cardinality and sub_cardinality <= 2^32.
+                if (!bitmap.select((uint32_t)rank, &low_bytes)) {
+                    ROARING_TERMINATE("Logic error: bitmap.select() "
+                        "returned false despite rank < cardinality()");
+                }
+                *element = uniteBytes(key, low_bytes);
+                return true;
             }
-            rnk -= sub_cardinality;
+            rank -= sub_cardinality;
         }
         return false;
     }

From aad6512b71ad9803b8bd261bb2a781af93f1dd15 Mon Sep 17 00:00:00 2001
From: Ole Sasse <ole.sasse@databricks.com>
Date: Fri, 11 Nov 2022 15:29:21 +0100
Subject: [PATCH 022/162] Fix a bug in containsRange that triggered an assert
 (#404)

---
 src/roaring.c      |  3 +--
 tests/cpp_unit.cpp | 10 ++++++++++
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/roaring.c b/src/roaring.c
index cc717bb29..7479b4720 100644
--- a/src/roaring.c
+++ b/src/roaring.c
@@ -2880,7 +2880,7 @@ bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_sta
     int32_t is = ra_get_index(&r->high_low_container, hb_rs);
     int32_t ie = ra_get_index(&r->high_low_container, hb_re);
     ie = (ie < 0 ? -ie - 1 : ie);
-    if ((is < 0) || ((ie - is) != span)) {
+    if ((is < 0) || ((ie - is) != span) || ie >= hlc_sz) {
        return false;
     }
     const uint32_t lb_rs = range_start & 0xFFFF;
@@ -2894,7 +2894,6 @@ bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_sta
     if (!container_contains_range(c, lb_rs, 1 << 16, type)) {
       return false;
     }
-    assert(ie < hlc_sz); // would indicate an algorithmic bug
     c = ra_get_container_at_index(&r->high_low_container, ie, &type);
     if (!container_contains_range(c, 0, lb_re, type)) {
         return false;
diff --git a/tests/cpp_unit.cpp b/tests/cpp_unit.cpp
index 7ff47a3ce..4f4bf4a5f 100644
--- a/tests/cpp_unit.cpp
+++ b/tests/cpp_unit.cpp
@@ -1413,6 +1413,15 @@ DEFINE_TEST(test_cpp_deserialize_64_key_too_small) {
 }
 #endif
 
+DEFINE_TEST(test_cpp_contains_range_interleaved_containers) {
+    Roaring roaring;
+    // Range from last position in first container up to second position in 3rd container.
+    roaring.addRange(0xFFFF, 0x1FFFF + 2);
+    // Query from last position in 2nd container up to second position in 4th container.
+    // There is no 4th container in the bitmap.
+    roaring.containsRange(0x1FFFF, 0x2FFFF + 2);
+}
+
 int main() {
     roaring::misc::tellmeall();
     const struct CMUnitTest tests[] = {
@@ -1465,6 +1474,7 @@ int main() {
         cmocka_unit_test(test_cpp_is_subset_64),
         cmocka_unit_test(test_cpp_to_string),
         cmocka_unit_test(test_cpp_remove_run_compression),
+        cmocka_unit_test(test_cpp_contains_range_interleaved_containers),
     };
     return cmocka_run_group_tests(tests, NULL, NULL);
 }

From c36081337974f3784e4c4cfecc55ad5c3be80966 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Fri, 11 Nov 2022 09:35:55 -0500
Subject: [PATCH 023/162] Preparing new release

---
 CMakeLists.txt                    | 4 ++--
 include/roaring/roaring_version.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index ecd15f912..58ed46dc5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,8 +18,8 @@ endif()
 set(ROARING_LIB_NAME roaring)
 set(PROJECT_VERSION_MAJOR 0)
 set(PROJECT_VERSION_MINOR 7)
-set(PROJECT_VERSION_PATCH 2)
-set(ROARING_LIB_VERSION "0.7.2" CACHE STRING "Roaring library version")
+set(PROJECT_VERSION_PATCH 3)
+set(ROARING_LIB_VERSION "0.7.3" CACHE STRING "Roaring library version")
 set(ROARING_LIB_SOVERSION "5" CACHE STRING "Roaring library soversion")
 
 option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON)
diff --git a/include/roaring/roaring_version.h b/include/roaring/roaring_version.h
index 12f856758..128727194 100644
--- a/include/roaring/roaring_version.h
+++ b/include/roaring/roaring_version.h
@@ -1,10 +1,10 @@
 // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand 
 #ifndef ROARING_INCLUDE_ROARING_VERSION 
 #define ROARING_INCLUDE_ROARING_VERSION 
-#define ROARING_VERSION "0.7.2"
+#define ROARING_VERSION "0.7.3"
 enum { 
     ROARING_VERSION_MAJOR = 0,
     ROARING_VERSION_MINOR = 7,
-    ROARING_VERSION_REVISION = 2
+    ROARING_VERSION_REVISION = 3
 }; 
 #endif // ROARING_INCLUDE_ROARING_VERSION 

From b2bbd46c9f1e415e0d97a2796b6f11b1a2bb7503 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Tue, 15 Nov 2022 09:01:29 -0500
Subject: [PATCH 024/162] Update roaring64map.hh

---
 cpp/roaring64map.hh | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/cpp/roaring64map.hh b/cpp/roaring64map.hh
index a1786eaf9..1654aef53 100644
--- a/cpp/roaring64map.hh
+++ b/cpp/roaring64map.hh
@@ -1,6 +1,10 @@
-/*
-A C++ header for 64-bit Roaring Bitmaps, implemented by way of a map of many
-32-bit Roaring Bitmaps.
+/**
+ * A C++ header for 64-bit Roaring Bitmaps, 
+ * implemented by way of a map of many
+ * 32-bit Roaring Bitmaps.
+ * 
+ * Reference (format specification) :
+ * https://github.com/RoaringBitmap/RoaringFormatSpec#extention-for-64-bit-implementations
 */
 #ifndef INCLUDE_ROARING_64_MAP_HH_
 #define INCLUDE_ROARING_64_MAP_HH_

From ff8aca1087e35ff8f46ff4d22e0dd657227ca944 Mon Sep 17 00:00:00 2001
From: Corey Kosak <kosak@users.noreply.github.com>
Date: Tue, 15 Nov 2022 18:23:56 -0500
Subject: [PATCH 025/162] Update flip-type operations of Roaring64Map (#402)

* Update flip-type operations of Roaring64Map

* Create private helper method 'ensureRangePopulated'

* typo

* Respond to review feedback.

* If the caller invokes (half-open interval) flip() with a range that falls
completely into slot 0, delegate to 32-bit flipClosed() rather than
64-bit flipClosed().

* Revert "If the caller invokes (half-open interval) flip() with a range that falls"

This reverts commit 7662b3a79ae83b14dd7f95ec6ecc0a1466a619cb.

* typo
---
 cpp/roaring.hh      |  16 ++-
 cpp/roaring64map.hh | 156 ++++++++++++++++++----
 tests/cpp_unit.cpp  | 313 ++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 428 insertions(+), 57 deletions(-)

diff --git a/cpp/roaring.hh b/cpp/roaring.hh
index 3853ae7b6..c193691ba 100644
--- a/cpp/roaring.hh
+++ b/cpp/roaring.hh
@@ -99,7 +99,7 @@ public:
     }
 
     /**
-     * Construct a bitmap from a list of integer values.
+     * Construct a bitmap from a list of uint32_t values.
      */
     static Roaring bitmapOf(size_t n, ...) {
         Roaring ans;
@@ -345,14 +345,22 @@ public:
     }
 
     /**
-     * Compute the negation of the roaring bitmap within a specified interval.
-     * interval: [range_start, range_end).
-     * Areas outside the range are passed through unchanged.
+     * Compute the negation of the roaring bitmap within the half-open interval
+     * [range_start, range_end). Areas outside the interval are unchanged.
      */
     void flip(uint64_t range_start, uint64_t range_end) {
         api::roaring_bitmap_flip_inplace(&roaring, range_start, range_end);
     }
 
+    /**
+     * Compute the negation of the roaring bitmap within the closed interval
+     * [range_start, range_end]. Areas outside the interval are unchanged.
+     */
+    void flipClosed(uint32_t range_start, uint32_t range_end) {
+        api::roaring_bitmap_flip_inplace(
+            &roaring, range_start, uint64_t(range_end) + 1);
+    }
+
     /**
      * Remove run-length encoding even when it is more space efficient.
      * Return whether a change was applied.
diff --git a/cpp/roaring64map.hh b/cpp/roaring64map.hh
index 1654aef53..d9e4ec464 100644
--- a/cpp/roaring64map.hh
+++ b/cpp/roaring64map.hh
@@ -85,7 +85,7 @@ public:
      Roaring64Map &operator=(Roaring64Map &&r) noexcept = default;
 
     /**
-     * Construct a bitmap from a list of integer values.
+     * Construct a bitmap from a list of uint64_t values.
      */
     static Roaring64Map bitmapOf(size_t n...) {
         Roaring64Map ans;
@@ -798,39 +798,98 @@ public:
     }
 
     /**
-     * Compute the negation of the roaring bitmap within a specified interval.
-     * areas outside the range are passed through unchanged.
+     * Computes the negation of the roaring bitmap within the half-open interval
+     * [min, max). Areas outside the interval are unchanged.
      */
-    void flip(uint64_t range_start, uint64_t range_end) {
-        if (range_start >= range_end) {
+    void flip(uint64_t min, uint64_t max) {
+        if (min >= max) {
+            return;
+        }
+        flipClosed(min, max - 1);
+    }
+
+    /**
+     * Computes the negation of the roaring bitmap within the closed interval
+     * [min, max]. Areas outside the interval are unchanged.
+     */
+    void flipClosed(uint32_t min, uint32_t max) {
+        auto iter = roarings.begin();
+        // Since min and max are uint32_t, highbytes(min or max) == 0. The inner
+        // bitmap we are looking for, if it exists, will be at the first slot of
+        // 'roarings'. If it does not exist, we have to create it.
+        if (iter == roarings.end() || iter->first != 0) {
+            iter = roarings.emplace_hint(iter, std::piecewise_construct,
+                                         std::forward_as_tuple(0),
+                                         std::forward_as_tuple());
+            auto &bitmap = iter->second;
+            bitmap.setCopyOnWrite(copyOnWrite);
+        }
+        auto &bitmap = iter->second;
+        bitmap.flipClosed(min, max);
+        eraseIfEmpty(iter);
+    }
+
+    /**
+     * Computes the negation of the roaring bitmap within the closed interval
+     * [min, max]. Areas outside the interval are unchanged.
+     */
+    void flipClosed(uint64_t min, uint64_t max) {
+        if (min > max) {
           return;
         }
-        uint32_t start_high = highBytes(range_start);
-        uint32_t start_low = lowBytes(range_start);
-        uint32_t end_high = highBytes(range_end);
-        uint32_t end_low = lowBytes(range_end);
+        uint32_t start_high = highBytes(min);
+        uint32_t start_low = lowBytes(min);
+        uint32_t end_high = highBytes(max);
+        uint32_t end_low = lowBytes(max);
 
+        // We put std::numeric_limits<>::max in parentheses to avoid a
+        // clash with the Windows.h header under Windows.
+        const uint32_t uint32_max = (std::numeric_limits<uint32_t>::max)();
+
+        // Fill in any nonexistent slots with empty Roarings. This simplifies
+        // the logic below, allowing it to simply iterate over the map between
+        // 'start_high' and 'end_high' in a linear fashion.
+        auto current_iter = ensureRangePopulated(start_high, end_high);
+
+        // If start and end land on the same inner bitmap, then we can do the
+        // whole operation in one call.
         if (start_high == end_high) {
-            roarings[start_high].flip(start_low, end_low);
+            auto &bitmap = current_iter->second;
+            bitmap.flipClosed(start_low, end_low);
+            eraseIfEmpty(current_iter);
             return;
         }
-        // we put std::numeric_limits<>::max/min in parentheses
-        // to avoid a clash with the Windows.h header under Windows
-        // flip operates on the range [lower_bound, upper_bound)
-        const uint64_t max_upper_bound =
-            static_cast<uint64_t>((std::numeric_limits<uint32_t>::max)()) + 1;
-        roarings[start_high].flip(start_low, max_upper_bound);
-        roarings[start_high++].setCopyOnWrite(copyOnWrite);
-
-        for (; start_high <= highBytes(range_end) - 1; ++start_high) {
-            roarings[start_high].flip((std::numeric_limits<uint32_t>::min)(),
-                                      max_upper_bound);
-            roarings[start_high].setCopyOnWrite(copyOnWrite);
+
+        // Because start and end don't land on the same inner bitmap,
+        // we need to do this in multiple steps:
+        // 1. Partially flip the first bitmap in the closed interval
+        //    [start_low, uint32_max]
+        // 2. Flip intermediate bitmaps completely: [0, uint32_max]
+        // 3. Partially flip the last bitmap in the closed interval
+        //    [0, end_low]
+
+        auto num_intermediate_bitmaps = end_high - start_high - 1;
+
+        // 1. Partially flip the first bitmap.
+        {
+            auto &bitmap = current_iter->second;
+            bitmap.flipClosed(start_low, uint32_max);
+            auto temp = current_iter++;
+            eraseIfEmpty(temp);
         }
 
-        roarings[start_high].flip((std::numeric_limits<uint32_t>::min)(),
-                                  end_low);
-        roarings[start_high].setCopyOnWrite(copyOnWrite);
+        // 2. Flip intermediate bitmaps completely.
+        for (uint32_t i = 0; i != num_intermediate_bitmaps; ++i) {
+            auto &bitmap = current_iter->second;
+            bitmap.flipClosed(0, uint32_max);
+            auto temp = current_iter++;
+            eraseIfEmpty(temp);
+        }
+
+        // 3. Partially flip the last bitmap.
+        auto &bitmap = current_iter->second;
+        bitmap.flipClosed(0, end_low);
+        eraseIfEmpty(current_iter);
     }
 
     /**
@@ -1336,6 +1395,53 @@ private:
             roarings.erase(iter);
         }
     }
+
+    /**
+     * Ensure that every key in the closed interval [start_high, end_high]
+     * refers to a Roaring bitmap rather being an empty slot. Inserts empty
+     * Roaring bitmaps if necessary. The interval must be valid and non-empty.
+     * Returns an iterator to the bitmap at start_high.
+     */
+    roarings_t::iterator ensureRangePopulated(uint32_t start_high,
+                                              uint32_t end_high) {
+        if (start_high > end_high) {
+            ROARING_TERMINATE("Logic error: start_high > end_high");
+        }
+        // next_populated_iter points to the first entry in the outer map with
+        // key >= start_high, or end().
+        auto next_populated_iter = roarings.lower_bound(start_high);
+
+        // Use uint64_t to avoid an infinite loop when end_high == uint32_max.
+        roarings_t::iterator start_iter{};  // Definitely assigned in loop.
+        for (uint64_t slot = start_high; slot <= end_high; ++slot) {
+            roarings_t::iterator slot_iter;
+            if (next_populated_iter != roarings.end() &&
+                next_populated_iter->first == slot) {
+                // 'slot' index has caught up to next_populated_iter.
+                // Note it here and advance next_populated_iter.
+                slot_iter = next_populated_iter++;
+            } else {
+                // 'slot' index has not yet caught up to next_populated_iter.
+                // Make a fresh entry {key = 'slot', value = Roaring()}, insert
+                // it just prior to next_populated_iter, and set its copy
+                // on write flag. We take pains to use emplace_hint and
+                // piecewise_construct to minimize effort.
+                slot_iter = roarings.emplace_hint(
+                    next_populated_iter, std::piecewise_construct,
+                    std::forward_as_tuple(uint32_t(slot)),
+                    std::forward_as_tuple());
+                auto &bitmap = slot_iter->second;
+                bitmap.setCopyOnWrite(copyOnWrite);
+            }
+
+            // Make a note of the iterator of the starting slot. It will be
+            // needed for the return value.
+            if (slot == start_high) {
+                start_iter = slot_iter;
+            }
+        }
+        return start_iter;
+    }
 };
 
 /**
diff --git a/tests/cpp_unit.cpp b/tests/cpp_unit.cpp
index 4f4bf4a5f..057024fea 100644
--- a/tests/cpp_unit.cpp
+++ b/tests/cpp_unit.cpp
@@ -34,6 +34,14 @@ using roaring::Roaring64Map;  // C++ class extended for 64-bit numbers
 static_assert(std::is_nothrow_move_constructible<Roaring>::value,
               "Expected Roaring to be no except move constructable");
 
+
+namespace {
+// We put std::numeric_limits<>::max in parentheses to avoid a
+// clash with the Windows.h header under Windows.
+const auto uint32_max = (std::numeric_limits<uint32_t>::max)();
+const auto uint64_max = (std::numeric_limits<uint64_t>::max)();
+}  // namespace
+
 bool roaring_iterator_sumall(uint32_t value, void *param) {
     *(uint32_t *)param += value;
     return true;  // we always process all values
@@ -900,8 +908,6 @@ DEFINE_TEST(test_cpp_remove_range_64) {
     Roaring64Map r1;
     auto b5 = uint64_t(5) << 32;
 
-    auto uint64_max = std::numeric_limits<uint64_t>::max();
-
     r1.add(0u);  // 32-bit add
     r1.add(b5 + 1000);  // arbitrary 64 bit add
     r1.add(b5 + 1001);  // arbitrary 64 bit add
@@ -1072,7 +1078,7 @@ DEFINE_TEST(test_cpp_frozen) {
 
     Roaring r1;
     r1.add(0);
-    r1.add(UINT32_MAX);
+    r1.add(uint32_max);
     r1.add(1000);
     r1.add(2000);
     r1.add(100000);
@@ -1153,7 +1159,7 @@ DEFINE_TEST(test_cpp_frozen_64) {
 
     Roaring64Map r1;
     r1.add((uint64_t)0);
-    r1.add((uint64_t)UINT32_MAX);
+    r1.add((uint64_t)uint32_max);
     r1.add((uint64_t)1000);
     r1.add((uint64_t)2000);
     r1.add((uint64_t)100000);
@@ -1212,6 +1218,13 @@ DEFINE_TEST(test_cpp_frozen_64) {
 }
 
 DEFINE_TEST(test_cpp_flip) {
+    {
+        // flipping an empty map works as expected
+        Roaring r1;
+        r1.flip(2, 5);
+        Roaring r2 = Roaring::bitmapOf(3, 2, 3, 4);
+        assert_true(r1 == r2);
+    }
     {
         // nothing is affected outside of the given range
         Roaring r1 = Roaring::bitmapOf(3, 1, 3, 6);
@@ -1235,11 +1248,8 @@ DEFINE_TEST(test_cpp_flip) {
     }
     {
         // uint32 max can be flipped
-        Roaring r1 =
-            Roaring::bitmapOf(1, (std::numeric_limits<uint32_t>::max)());
-        r1.flip(
-            (std::numeric_limits<uint32_t>::max)(),
-            static_cast<uint64_t>((std::numeric_limits<uint32_t>::max)()) + 1);
+        Roaring r1 = Roaring::bitmapOf(1, uint32_max);
+        r1.flip(uint32_max, static_cast<uint64_t>(uint32_max) + 1);
         assert_true(r1.isEmpty());
     }
     {
@@ -1251,32 +1261,221 @@ DEFINE_TEST(test_cpp_flip) {
     }
 }
 
+DEFINE_TEST(test_cpp_flip_closed) {
+    {
+        // flipping an empty map works as expected
+        Roaring r1;
+        r1.flipClosed(2, 5);
+        Roaring r2 = Roaring::bitmapOf(4, 2, 3, 4, 5);
+        assert_true(r1 == r2);
+    }
+    {
+        // nothing is affected outside of the given range
+        Roaring r1 = Roaring::bitmapOf(3, 1, 3, 6);
+        r1.flipClosed(2, 4);
+        Roaring r2 = Roaring::bitmapOf(4, 1, 2, 4, 6);
+        assert_true(r1 == r2);
+    }
+    {
+        // given range can go outside of existing range
+        Roaring r1 = Roaring::bitmapOf(2, 1, 3);
+        r1.flipClosed(0, 4);
+        Roaring r2 = Roaring::bitmapOf(3, 0, 2, 4);
+        assert_true(r1 == r2);
+    }
+    {
+        // range end is inclusive
+        Roaring r1 = Roaring::bitmapOf(2, 1, 3);
+        r1.flipClosed(1, 2);
+        Roaring r2 = Roaring::bitmapOf(2, 2, 3);
+        assert_true(r1 == r2);
+    }
+    {
+        // uint32 max can be flipped
+        Roaring r1 = Roaring::bitmapOf(1, uint32_max);
+        r1.flipClosed(uint32_max, uint32_max);
+        assert_true(r1.isEmpty());
+    }
+    {
+        // empty range does nothing
+        Roaring r1 = Roaring::bitmapOf(2, 2, 3);
+        Roaring r2 = r1;
+        r1.flipClosed(2, 1);
+        assert_true(r1 == r2);
+    }
+}
+
+
 DEFINE_TEST(test_cpp_flip_64) {
+    {
+        // 32-bit test
+        {
+            // flipping an empty map works as expected
+            Roaring64Map r1;
+            r1.flip(2, 5);
+            auto r2 = Roaring64Map::bitmapOf(
+                3, uint64_t(2), uint64_t(3), uint64_t(4));
+            assert_true(r1 == r2);
+        }
+        {
+            // nothing is affected outside of the given range
+            auto r1 = Roaring64Map::bitmapOf(
+                3, uint64_t(1), uint64_t(3), uint64_t(6));
+            r1.flip(uint32_t(2), uint32_t(5));
+            Roaring64Map r2 = Roaring64Map::bitmapOf(
+                4, uint64_t(1), uint64_t(2), uint64_t(4), uint64_t(6));
+            assert_true(r1 == r2);
+        }
+        {
+            // given range can go outside of existing range
+            auto r1 = Roaring64Map::bitmapOf(2, uint64_t(1), uint64_t(3));
+            r1.flip(uint32_t(0), uint32_t(5));
+            auto r2 = Roaring64Map::bitmapOf(
+                3, uint64_t(0), uint64_t(2), uint64_t(4));
+            assert_true(r1 == r2);
+        }
+        {
+            // range end is exclusive
+            auto r1 = Roaring64Map::bitmapOf(2, uint64_t(1), uint64_t(3));
+            r1.flip(uint32_t(1), uint32_t(3));
+            auto r2 = Roaring64Map::bitmapOf(2, uint64_t(2), uint64_t(3));
+            assert_true(r1 == r2);
+        }
+        {
+            // uint32 max can be flipped
+            auto r1 = Roaring64Map::bitmapOf(1, uint64_t(uint32_max));
+            r1.flip(uint32_max, uint64_t(uint32_max) + 1);
+            assert_true(r1.isEmpty());
+        }
+        {
+            // empty range does nothing
+            auto r1 = Roaring64Map::bitmapOf(2, uint64_t(2), uint64_t(3));
+            auto r2 = r1;
+            r1.flip(uint32_t(2), uint32_t(2));
+            assert_true(r1 == r2);
+        }
+    }
+
+    const auto b1 = uint64_t(1) << 32;
+    const auto b2 = uint64_t(2) << 32;
+
     {
         // nothing is affected outside of the given range
-        Roaring64Map r1 = Roaring64Map::bitmapOf(3, (((uint64_t)1) << 32) - 3, ((uint64_t)1) << 32,
-                                                 (((uint64_t)1) << 32) + 3);
-        r1.flip((((uint64_t)1) << 32) - 2, (((uint64_t)1) << 32) + 2);
+        Roaring64Map r1 = Roaring64Map::bitmapOf(3, b1 - 3, b1, b1 + 3);
+        r1.flip(b1 - 2, b1 + 2);
         Roaring64Map r2 = Roaring64Map::bitmapOf(
-            5, (((uint64_t)1) << 32) - 3, (((uint64_t)1) << 32) - 2, (((uint64_t)1) << 32) - 1,
-            (((uint64_t)1) << 32) + 1, (((uint64_t)1) << 32) + 3);
+            5, b1 - 3, b1 - 2, b1 - 1, b1 + 1, b1 + 3);
         assert_true(r1 == r2);
     }
     {
         // given range can go outside of existing range
-        Roaring64Map r1 = Roaring64Map::bitmapOf(2, (((uint64_t)1) << 32) - 2, ((uint64_t)1) << 32);
-        r1.flip((((uint64_t)1) << 32) - 3, (((uint64_t)1) << 32) + 2);
+        Roaring64Map r1 = Roaring64Map::bitmapOf(2, b1 - 2, b1);
+        r1.flip(b1 - 3, b1 + 2);
         Roaring64Map r2 = Roaring64Map::bitmapOf(
-            3, (((uint64_t)1) << 32) - 3, (((uint64_t)1) << 32) - 1, (((uint64_t)1) << 32) + 1);
+            3, b1 - 3, b1 - 1, b1 + 1);
         assert_true(r1 == r2);
     }
     {
         // range end is exclusive
+        Roaring64Map r1 = Roaring64Map::bitmapOf(2, b2 - 1, b2 + 2);
+        r1.flip(b2 - 1, b2 + 2);
+        Roaring64Map r2;
+        for (uint64_t i = b2; i <= b2 + 2; ++i) {
+            r2.add(i);
+        }
+        assert_true(r1 == r2);
+    }
+    {
+        // uint32 max can be flipped
         Roaring64Map r1 =
-            Roaring64Map::bitmapOf(2, (((uint64_t)2) << 32) - 1, (((uint64_t)2) << 32) + 2);
-        r1.flip((((uint64_t)2) << 32) - 1, (((uint64_t)2) << 32) + 2);
+            Roaring64Map::bitmapOf(1, static_cast<uint64_t>(uint32_max));
+        r1.flip(uint32_max, static_cast<uint64_t>(uint32_max) + 1);
+        assert_true(r1.isEmpty());
+    }
+    {
+        // empty range does nothing
+        Roaring64Map r1 = Roaring64Map::bitmapOf(2, b1 - 1, b1);
+        Roaring64Map r2 = r1;
+        r1.flip(b1 - 1, b1 - 1);
+        assert_true(r1 == r2);
+    }
+}
+
+DEFINE_TEST(test_cpp_flip_closed_64) {
+    {
+        // 32-bit test
+        {
+            // flipping an empty map works as expected
+            Roaring64Map r1;
+            r1.flipClosed(uint32_t(2), uint32_t(5));
+            auto r2 = Roaring64Map::bitmapOf(
+                4, uint64_t(2), uint64_t(3), uint64_t(4), uint64_t(5));
+            assert_true(r1 == r2);
+        }
+        {
+            // nothing is affected outside of the given range
+            auto r1 = Roaring64Map::bitmapOf(
+                3, uint64_t(1), uint64_t(3), uint64_t(6));
+            r1.flipClosed(uint32_t(2), uint32_t(4));
+            Roaring64Map r2 = Roaring64Map::bitmapOf(
+                4, uint64_t(1), uint64_t(2), uint64_t(4), uint64_t(6));
+            assert_true(r1 == r2);
+        }
+        {
+            // given range can go outside of existing range
+            auto r1 = Roaring64Map::bitmapOf(2, uint64_t(1), uint64_t(3));
+            r1.flipClosed(uint32_t(0), uint32_t(4));
+            auto r2 = Roaring64Map::bitmapOf(
+                3, uint64_t(0), uint64_t(2), uint64_t(4));
+            assert_true(r1 == r2);
+        }
+        {
+            // range end is inclusive
+            auto r1 = Roaring64Map::bitmapOf(2, uint64_t(1), uint64_t(3));
+            r1.flipClosed(uint32_t(1), uint32_t(2));
+            auto r2 = Roaring64Map::bitmapOf(2, uint64_t(2), uint64_t(3));
+            assert_true(r1 == r2);
+        }
+        {
+            // uint32 max can be flipped
+            auto r1 = Roaring64Map::bitmapOf(1, uint64_t(uint32_max));
+            r1.flipClosed(uint32_max, uint32_max);
+            assert_true(r1.isEmpty());
+        }
+        {
+            // empty range does nothing
+            auto r1 = Roaring64Map::bitmapOf(2, uint64_t(2), uint64_t(3));
+            auto r2 = r1;
+            r1.flipClosed(uint32_t(2), uint32_t(1));
+            assert_true(r1 == r2);
+        }
+    }
+
+    const auto b1 = uint64_t(1) << 32;
+    const auto b2 = uint64_t(2) << 32;
+
+    {
+        // nothing is affected outside of the given range
+        Roaring64Map r1 = Roaring64Map::bitmapOf(3, b1 - 3, b1, b1 + 3);
+        r1.flipClosed(b1 - 2, b1 + 1);
+        Roaring64Map r2 = Roaring64Map::bitmapOf(
+            5, b1 - 3, b1 - 2, b1 - 1, b1 + 1, b1 + 3);
+        assert_true(r1 == r2);
+    }
+    {
+        // given range can go outside of existing range
+        Roaring64Map r1 = Roaring64Map::bitmapOf(2, b1 - 2, b1);
+        r1.flipClosed(b1 - 3, b1 + 1);
+        Roaring64Map r2 = Roaring64Map::bitmapOf(
+            3, b1 - 3, b1 - 1, b1 + 1);
+        assert_true(r1 == r2);
+    }
+    {
+        // range end is inclusive
+        Roaring64Map r1 = Roaring64Map::bitmapOf(2, b2 - 1, b2 + 2);
+        r1.flipClosed(b2 - 1, b2 + 1);
         Roaring64Map r2;
-        for (uint64_t i = (((uint64_t)2) << 32); i <= (((uint64_t)2) << 32) + 2; ++i) {
+        for (uint64_t i = b2; i <= b2 + 2; ++i) {
             r2.add(i);
         }
         assert_true(r1 == r2);
@@ -1284,21 +1483,78 @@ DEFINE_TEST(test_cpp_flip_64) {
     {
         // uint32 max can be flipped
         Roaring64Map r1 =
-            Roaring64Map::bitmapOf(1, static_cast<uint64_t>((std::numeric_limits<uint32_t>::max)()));
-        r1.flip(
-            (std::numeric_limits<uint32_t>::max)(),
-            static_cast<uint64_t>((std::numeric_limits<uint32_t>::max)()) + 1);
+            Roaring64Map::bitmapOf(1, static_cast<uint64_t>(uint32_max));
+        r1.flipClosed(uint32_max, uint32_max);
         assert_true(r1.isEmpty());
     }
     {
         // empty range does nothing
-        Roaring64Map r1 = Roaring64Map::bitmapOf(2, (((uint64_t)1) << 32) - 1, ((uint64_t)1) << 32);
+        Roaring64Map r1 = Roaring64Map::bitmapOf(2, b1 - 1, b1);
         Roaring64Map r2 = r1;
-        r1.flip((((uint64_t)1) << 32) - 1, (((uint64_t)1) << 32) - 1);
+        r1.flipClosed(b1 - 1, b1 - 2);
         assert_true(r1 == r2);
     }
 }
 
+DEFINE_TEST(test_combinatoric_flip_many_64) {
+    // Given 'num_slots_to_test' outer slots, we repeatedly seed a Roaring64Map
+    // with all combinations of present and absent outer slots (basically the
+    // powerset of {0...num_slots_to_test - 1}), then we add_range_closed
+    // and see if the cardinality is what we expect.
+    //
+    // For example (assuming num_slots_to_test = 5), the iterations of the outer
+    // loop represent these sets:
+    // 1. {}
+    // 2. {0}
+    // 3. {1}
+    // 4. {0, 1}
+    // 5. {2}
+    // 6. {0, 2}
+    // 7. {1, 2}
+    // 8. {0, 1, 2}
+    // 9. {3}
+    // and so forth...
+    //
+    // For example, in step 6 (representing set {0, 2}) we set a bit somewhere
+    // in slot 0 and we set another bit somehwere in slot 2. The purpose of this
+    // is to make sure 'flipClosed' does the right thing when it encounters
+    // an arbitrary mix of present and absent slots. Then we call
+    // 'flipClosed' over the whole range and confirm that the cardinality
+    // is what we expect.
+    const uint32_t num_slots_to_test = 5;
+    const uint32_t base_slot = 50;
+
+    const uint32_t bitmask_limit = 1 << num_slots_to_test;
+
+    for (uint32_t bitmask = 0; bitmask < bitmask_limit; ++bitmask) {
+        Roaring64Map roaring;
+        uint32_t num_one_bits = 0;
+
+        // The 1-bits in 'bitmask' indicate which slots we want to seed
+        // with a value.
+        for (uint32_t bit_index = 0; bit_index < num_slots_to_test; ++bit_index) {
+            if ((bitmask & (1 << bit_index)) == 0) {
+                continue;
+            }
+            auto slot = base_slot + bit_index;
+            auto value = (uint64_t(slot) << 32) + 0x1234567 + bit_index;
+            roaring.add(value);
+            ++num_one_bits;
+        }
+
+        auto first_bucket = uint64_t(base_slot) << 32;
+        auto last_bucket = uint64_t(base_slot + num_slots_to_test - 1) << 32;
+
+        roaring.flipClosed(first_bucket, last_bucket + uint32_max);
+
+        // Slots not initalized with a bit will now have cardinality 2^32
+        // Slots initialized with a bit will have cardinality 2^32 - 1
+        auto expected_cardinality = num_slots_to_test * (uint64_t(1) << 32)
+          - num_one_bits;
+        assert_int_equal(expected_cardinality, roaring.cardinality());
+    }
+}
+
 DEFINE_TEST(test_cpp_is_subset_64) {
   Roaring64Map r1 = Roaring64Map::bitmapOf(1, uint64_t(1));
   Roaring64Map r2 = Roaring64Map::bitmapOf(1, uint64_t(1) << 32);
@@ -1310,8 +1566,6 @@ DEFINE_TEST(test_cpp_is_subset_64) {
 DEFINE_TEST(test_cpp_to_string) {
     // test toString
     const auto b5 = uint64_t(5) << 32;
-    const auto uint32_max = std::numeric_limits<uint32_t>::max();
-    const auto uint64_max = std::numeric_limits<uint64_t>::max();
 
     {
         // 32-bit test.
@@ -1456,7 +1710,10 @@ int main() {
         cmocka_unit_test(test_cpp_frozen),
         cmocka_unit_test(test_cpp_frozen_64),
         cmocka_unit_test(test_cpp_flip),
+        cmocka_unit_test(test_cpp_flip_closed),
         cmocka_unit_test(test_cpp_flip_64),
+        cmocka_unit_test(test_cpp_flip_closed_64),
+        cmocka_unit_test(test_combinatoric_flip_many_64),
         cmocka_unit_test(test_cpp_deserialize_64_empty),
         cmocka_unit_test(test_cpp_deserialize_64_32bit_vals),
         cmocka_unit_test(test_cpp_deserialize_64_spread_vals),

From cd5033b7c298a25fb961eb462a4d81229d8003b1 Mon Sep 17 00:00:00 2001
From: Corey Kosak <kosak@users.noreply.github.com>
Date: Tue, 15 Nov 2022 18:24:16 -0500
Subject: [PATCH 026/162] Improve add-type operations (#397)

* Improve add-type operations

* Rewrite in 'ensureRangePopulated' style.

* Respond to review feedback

* typo
---
 cpp/roaring64map.hh           | 185 +++++++++++++++++++++++++++-------
 tests/cpp_unit.cpp            | 169 ++++++++++++++++++++++++++++++-
 tests/roaring64map_checked.hh |   7 +-
 3 files changed, 315 insertions(+), 46 deletions(-)

diff --git a/cpp/roaring64map.hh b/cpp/roaring64map.hh
index d9e4ec464..76df3eda8 100644
--- a/cpp/roaring64map.hh
+++ b/cpp/roaring64map.hh
@@ -99,34 +99,39 @@ public:
     }
 
     /**
-     * Add value x
+     * Adds value x.
      */
     void add(uint32_t x) {
-        roarings[0].add(x);
-        roarings[0].setCopyOnWrite(copyOnWrite);
+        lookupOrCreateInner(0).add(x);
     }
+
+    /**
+     * Adds value x.
+     */
     void add(uint64_t x) {
-        roarings[highBytes(x)].add(lowBytes(x));
-        roarings[highBytes(x)].setCopyOnWrite(copyOnWrite);
+        lookupOrCreateInner(highBytes(x)).add(lowBytes(x));
     }
 
     /**
-     * Add value x
-     * Returns true if a new value was added, false if the value was already existing.
+     * Adds value x.
+     * Returns true if a new value was added, false if the value was already
+     * present.
      */
     bool addChecked(uint32_t x) {
-        bool result = roarings[0].addChecked(x);
-        roarings[0].setCopyOnWrite(copyOnWrite);
-        return result;
+        return lookupOrCreateInner(0).addChecked(x);
     }
+
+    /**
+     * Adds value x.
+     * Returns true if a new value was added, false if the value was already
+     * present.
+     */
     bool addChecked(uint64_t x) {
-        bool result = roarings[highBytes(x)].addChecked(lowBytes(x));
-        roarings[highBytes(x)].setCopyOnWrite(copyOnWrite);
-        return result;
+        return lookupOrCreateInner(highBytes(x)).addChecked(lowBytes(x));
     }
 
     /**
-     * Add all values in range [min, max)
+     * Adds all values in the half-open interval [min, max).
      */
     void addRange(uint64_t min, uint64_t max) {
         if (min >= max) {
@@ -136,11 +141,15 @@ public:
     }
 
     /**
-     * Add all values in range [min, max]
+     * Adds all values in the closed interval [min, max].
      */
     void addRangeClosed(uint32_t min, uint32_t max) {
-        roarings[0].addRangeClosed(min, max);
+        lookupOrCreateInner(0).addRangeClosed(min, max);
     }
+
+    /**
+     * Adds all values in the closed interval [min, max]
+     */
     void addRangeClosed(uint64_t min, uint64_t max) {
         if (min > max) {
             return;
@@ -149,41 +158,83 @@ public:
         uint32_t start_low = lowBytes(min);
         uint32_t end_high = highBytes(max);
         uint32_t end_low = lowBytes(max);
+
+        // We put std::numeric_limits<>::max in parentheses to avoid a
+        // clash with the Windows.h header under Windows.
+        const uint32_t uint32_max = (std::numeric_limits<uint32_t>::max)();
+
+        // Fill in any nonexistent slots with empty Roarings. This simplifies
+        // the logic below, allowing it to simply iterate over the map between
+        // 'start_high' and 'end_high' in a linear fashion.
+        auto current_iter = ensureRangePopulated(start_high, end_high);
+
+        // If start and end land on the same inner bitmap, then we can do the
+        // whole operation in one call.
         if (start_high == end_high) {
-            roarings[start_high].addRangeClosed(start_low, end_low);
-            roarings[start_high].setCopyOnWrite(copyOnWrite);
+            auto &bitmap = current_iter->second;
+            bitmap.addRangeClosed(start_low, end_low);
             return;
         }
-        // we put std::numeric_limits<>::max/min in parenthesis to avoid a clash
-        // with the Windows.h header under Windows
-        roarings[start_high].addRangeClosed(
-            start_low, (std::numeric_limits<uint32_t>::max)());
-        roarings[start_high].setCopyOnWrite(copyOnWrite);
-        start_high++;
-        for (; start_high < end_high; ++start_high) {
-            roarings[start_high].addRangeClosed(
-                (std::numeric_limits<uint32_t>::min)(),
-                (std::numeric_limits<uint32_t>::max)());
-            roarings[start_high].setCopyOnWrite(copyOnWrite);
+
+        // Because start and end don't land on the same inner bitmap,
+        // we need to do this in multiple steps:
+        // 1. Partially fill the first bitmap with values from the closed
+        //    interval [start_low, uint32_max]
+        // 2. Fill intermediate bitmaps completely: [0, uint32_max]
+        // 3. Partially fill the last bitmap with values from the closed
+        //    interval [0, end_low]
+        auto num_intermediate_bitmaps = end_high - start_high - 1;
+
+        // Step 1: Partially fill the first bitmap.
+        {
+            auto &bitmap = current_iter->second;
+            bitmap.addRangeClosed(start_low, uint32_max);
+            ++current_iter;
+        }
+
+        // Step 2. Fill intermediate bitmaps completely.
+        if (num_intermediate_bitmaps != 0) {
+            auto &first_intermediate = current_iter->second;
+            first_intermediate.addRangeClosed(0, uint32_max);
+            ++current_iter;
+
+            // Now make (num_intermediate_bitmaps - 1) copies of this.
+            for (uint32_t i = 1; i != num_intermediate_bitmaps; ++i) {
+                auto &next_intermediate = current_iter->second;
+                next_intermediate = first_intermediate;
+                ++current_iter;
+            }
         }
-        roarings[end_high].addRangeClosed(
-            (std::numeric_limits<uint32_t>::min)(), end_low);
-        roarings[end_high].setCopyOnWrite(copyOnWrite);
+
+        // Step 3: Partially fill the last bitmap.
+        auto &bitmap = current_iter->second;
+        bitmap.addRangeClosed(0, end_low);
     }
 
     /**
-     * Add value n_args from pointer vals
+     * Adds 'n_args' values from the contiguous memory range starting at 'vals'.
      */
     void addMany(size_t n_args, const uint32_t *vals) {
-        Roaring &roaring = roarings[0];
-        roaring.addMany(n_args, vals);
-        roaring.setCopyOnWrite(copyOnWrite);
+        lookupOrCreateInner(0).addMany(n_args, vals);
     }
 
+    /**
+     * Adds 'n_args' values from the contiguous memory range starting at 'vals'.
+     */
     void addMany(size_t n_args, const uint64_t *vals) {
+        // Potentially reduce outer map lookups by optimistically
+        // assuming that adjacent values will belong to the same inner bitmap.
+        Roaring *last_inner_bitmap = nullptr;
+        uint32_t last_value_high = 0;
         for (size_t lcv = 0; lcv < n_args; lcv++) {
-            roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv]));
-            roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite);
+            auto value = vals[lcv];
+            auto value_high = highBytes(value);
+            auto value_low = lowBytes(value);
+            if (last_inner_bitmap == nullptr || value_high != last_value_high) {
+                last_inner_bitmap = &lookupOrCreateInner(value_high);
+                last_value_high = value_high;
+            }
+            last_inner_bitmap->add(value_low);
         }
     }
 
@@ -1358,6 +1409,17 @@ private:
 #endif
     }
 
+    /*
+     * Look up 'key' in the 'roarings' map. If it does not exist, create it.
+     * Also, set its copyOnWrite flag to 'copyOnWrite'. Then return a reference
+     * to the (already existing or newly created) inner bitmap.
+     */
+    Roaring &lookupOrCreateInner(uint32_t key) {
+        auto &bitmap = roarings[key];
+        bitmap.setCopyOnWrite(copyOnWrite);
+        return bitmap;
+    }
+
     /**
      * Prints the contents of the bitmap to a caller-provided sink function.
      */
@@ -1385,6 +1447,53 @@ private:
         sink("}");
     }
 
+    /**
+     * Ensures that every key in the closed interval [start_high, end_high]
+     * refers to a Roaring bitmap rather being an empty slot. Inserts empty
+     * Roaring bitmaps if necessary. The interval must be valid and non-empty.
+     * Returns an iterator to the bitmap at start_high.
+     */
+    roarings_t::iterator ensureRangePopulated(uint32_t start_high,
+                                              uint32_t end_high) {
+        if (start_high > end_high) {
+            ROARING_TERMINATE("Logic error: start_high > end_high");
+        }
+        // next_populated_iter points to the first entry in the outer map with
+        // key >= start_high, or end().
+        auto next_populated_iter = roarings.lower_bound(start_high);
+
+        // Use uint64_t to avoid an infinite loop when end_high == uint32_max.
+        roarings_t::iterator start_iter{};  // Definitely assigned in loop.
+        for (uint64_t slot = start_high; slot <= end_high; ++slot) {
+            roarings_t::iterator slot_iter;
+            if (next_populated_iter != roarings.end() &&
+                next_populated_iter->first == slot) {
+                // 'slot' index has caught up to next_populated_iter.
+                // Note it here and advance next_populated_iter.
+                slot_iter = next_populated_iter++;
+            } else {
+                // 'slot' index has not yet caught up to next_populated_iter.
+                // Make a fresh entry {key = 'slot', value = Roaring()}, insert
+                // it just prior to next_populated_iter, and set its copy
+                // on write flag. We take pains to use emplace_hint and
+                // piecewise_construct to minimize effort.
+                slot_iter = roarings.emplace_hint(
+                    next_populated_iter, std::piecewise_construct,
+                    std::forward_as_tuple(uint32_t(slot)),
+                    std::forward_as_tuple());
+                auto &bitmap = slot_iter->second;
+                bitmap.setCopyOnWrite(copyOnWrite);
+            }
+
+            // Make a note of the iterator of the starting slot. It will be
+            // needed for the return value.
+            if (slot == start_high) {
+                start_iter = slot_iter;
+            }
+        }
+        return start_iter;
+    }
+
     /**
      * Erases the entry pointed to by 'iter' from the 'roarings' map. Warning:
      * this invalidates 'iter'.
diff --git a/tests/cpp_unit.cpp b/tests/cpp_unit.cpp
index 057024fea..108855470 100644
--- a/tests/cpp_unit.cpp
+++ b/tests/cpp_unit.cpp
@@ -778,7 +778,7 @@ DEFINE_TEST(test_cpp_remove_range) {
     }
 }
 
-DEFINE_TEST(test_cpp_add_range_64) {
+DEFINE_TEST(test_cpp_add_range_closed_64) {
     {
         // 32-bit integers
         Roaring64Map r1;
@@ -789,10 +789,12 @@ DEFINE_TEST(test_cpp_add_range_64) {
         }
         assert_true(r1 == r2);
     }
+    auto b1 = uint64_t(1) << 32;
     std::vector<std::pair<uint64_t, uint64_t>> ranges = {
-        {uint64_t(1) << 32, (uint64_t(1) << 32) + 10},
-        {(uint64_t(1) << 32) - 10, (uint64_t(1) << 32) + 10},
-        {(uint64_t(1) << 32) + 2, (uint64_t(1) << 32) - 2}};
+        {b1, b1 + 10},
+        {b1 + 100, b1 + 100},  // one element
+        {b1 - 10, b1 + 10},
+        {b1 + 2, b1 - 2}};
     for (const auto &range : ranges) {
         uint64_t min = range.first;
         uint64_t max = range.second;
@@ -806,6 +808,157 @@ DEFINE_TEST(test_cpp_add_range_64) {
     }
 }
 
+DEFINE_TEST(test_cpp_add_range_open_64) {
+    {
+        // 32-bit integers
+        Roaring64Map r1;
+        r1.addRange(uint32_t(1), uint32_t(5));
+        Roaring64Map r2;
+        for (uint32_t v = 1; v < 5; ++v) {
+            r2.add(v);
+        }
+        assert_true(r1 == r2);
+    }
+    auto b1 = uint64_t(1) << 32;
+    std::vector<std::pair<uint64_t, uint64_t>> ranges = {
+        {b1, b1 + 10},
+        {b1 - 10, b1 + 10},
+        {b1 + 100, b1 + 100}, // empty
+        {b1 + 2, b1 - 2}};
+    for (const auto &range : ranges) {
+        uint64_t min = range.first;
+        uint64_t max = range.second;
+        Roaring64Map r1;
+        r1.addRange(min, max);
+        Roaring64Map r2;
+        for (uint64_t v = min; v < max; ++v) {
+            r2.add(v);
+        }
+        assert_true(r1 == r2);
+    }
+}
+
+DEFINE_TEST(test_cpp_add_range_closed_large_64) {
+    uint32_t start_high = 300;
+    for (uint32_t end_high = start_high; end_high != 305; ++end_high) {
+        auto begin = (uint64_t(start_high) << 32) + 0x01234567;
+        auto end = (uint64_t(end_high) << 32) + 0x89abcdef;
+        Roaring64Map r1;
+        r1.addRangeClosed(begin, end);
+        auto size = end - begin + 1;
+        assert_true(r1.cardinality() == size);
+    }
+}
+
+DEFINE_TEST(test_cpp_add_range_open_large_64) {
+    uint32_t start_high = 300;
+    for (uint32_t end_high = start_high; end_high != 305; ++end_high) {
+        auto begin = (uint64_t(start_high) << 32) + 0x01234567;
+        auto end = (uint64_t(end_high) << 32) + 0x89abcdef;
+        Roaring64Map r1;
+        r1.addRange(begin, end);
+        auto size = end - begin;
+        assert_true(r1.cardinality() == size);
+    }
+}
+
+DEFINE_TEST(test_cpp_add_many) {
+    std::vector<uint32_t> values = { 9999, 123, 0xFFFFFFFF, 0xFFFFFFF7, 9999};
+    Roaring r1;
+    r1.addMany(values.size(), values.data());
+    Roaring r2;
+    for (const auto value : values) {
+        r2.add(value);
+    }
+    assert_true(r1 == r2);
+}
+
+DEFINE_TEST(test_cpp_add_many_64) {
+    {
+        // 32-bit integers
+        std::vector<uint32_t> values = { 9999, 123, 0xFFFFFFFF, 0xFFFFFFF7, 0, 9999};
+        Roaring64Map r1;
+        r1.addMany(values.size(), values.data());
+        Roaring64Map r2;
+        for (const auto value : values) {
+            r2.add(value);
+        }
+        assert_true(r1 == r2);
+    }
+
+    auto b1 = uint64_t(1) << 32;
+    auto b555 = uint64_t(555) << 32;
+
+    std::vector<uint64_t> values = {
+        b555 + 9999, b1 + 123, b1 + 0xFFFFFFFF, b555 + 0xFFFFFFF7, 0, b555 + 9999};
+    Roaring64Map r1;
+    r1.addMany(values.size(), values.data());
+    Roaring64Map r2;
+    for (const auto value : values) {
+        r2.add(value);
+    }
+    assert_true(r1 == r2);
+}
+
+DEFINE_TEST(test_cpp_add_range_closed_combinatoric_64) {
+    // Given 'num_slots_to_test' outer slots, we repeatedly seed a Roaring64Map
+    // with all combinations of present and absent outer slots (basically the
+    // powerset of {0...num_slots_to_test - 1}), then we add_range_closed
+    // and see if the cardinality is what we expect.
+    //
+    // For example (assuming num_slots_to_test = 5), the iterations of the outer
+    // loop represent these sets:
+    // 1. {}
+    // 2. {0}
+    // 3. {1}
+    // 4. {0, 1}
+    // 5. {2}
+    // 6. {0, 2}
+    // 7. {1, 2}
+    // 8. {0, 1, 2}
+    // 9. {3}
+    // and so forth...
+    //
+    // For example, in step 6 (representing set {0, 2}) we set a bit somewhere
+    // in slot 0 and we set another bit somehwere in slot 2. The purpose of this
+    // is to make sure 'addRangeClosed' does the right thing when it encounters
+    // an arbitrary mix of present and absent slots. Then we call
+    // 'addRangeClosed' over the whole range and confirm that the cardinality
+    // is what we expect.
+    const uint32_t num_slots_to_test = 5;
+    const uint32_t base_slot = 50;
+
+    // We put std::numeric_limits<>::max in parentheses to avoid a
+    // clash with the Windows.h header under Windows.
+    const auto uint32_max = (std::numeric_limits<uint32_t>::max)();
+
+    const uint32_t bitmask_limit = 1 << num_slots_to_test;
+
+    for (uint32_t bitmask = 0; bitmask < bitmask_limit; ++bitmask) {
+        Roaring64Map roaring;
+
+        // The 1-bits in 'bitmask' indicate which slots we want to seed
+        // with a value.
+        for (uint32_t bit_index = 0; bit_index < num_slots_to_test; ++bit_index) {
+            if ((bitmask & (1 << bit_index)) == 0) {
+                continue;
+            }
+            auto slot = base_slot + bit_index;
+            auto value = (uint64_t(slot) << 32) + bit_index;
+            roaring.add(value);
+        }
+
+        auto first_bucket = uint64_t(base_slot) << 32;
+        auto last_bucket = uint64_t(base_slot + num_slots_to_test - 1) << 32;
+
+        roaring.addRangeClosed(first_bucket,
+                               last_bucket + uint32_max);
+
+        auto expected_cardinality = num_slots_to_test * (uint64_t(1) << 32);
+        assert_int_equal(expected_cardinality, roaring.cardinality());
+    }
+}
+
 DEFINE_TEST(test_cpp_remove_range_closed_64) {
     {
         // 32-bit integers
@@ -1690,7 +1843,13 @@ int main() {
         cmocka_unit_test(test_cpp_add_remove_checked_64),
         cmocka_unit_test(test_cpp_add_range),
         cmocka_unit_test(test_cpp_remove_range),
-        cmocka_unit_test(test_cpp_add_range_64),
+        cmocka_unit_test(test_cpp_add_range_closed_64),
+        cmocka_unit_test(test_cpp_add_range_open_64),
+        cmocka_unit_test(test_cpp_add_range_closed_large_64),
+        cmocka_unit_test(test_cpp_add_range_open_large_64),
+        cmocka_unit_test(test_cpp_add_many),
+        cmocka_unit_test(test_cpp_add_many_64),
+        cmocka_unit_test(test_cpp_add_range_closed_combinatoric_64),
         cmocka_unit_test(test_cpp_remove_range_closed_64),
         cmocka_unit_test(test_cpp_remove_range_64),
         cmocka_unit_test(test_run_compression_cpp_64_true),
diff --git a/tests/roaring64map_checked.hh b/tests/roaring64map_checked.hh
index 2a3c97cb7..d31276fa9 100644
--- a/tests/roaring64map_checked.hh
+++ b/tests/roaring64map_checked.hh
@@ -116,9 +116,10 @@ class Roaring64Map {
         return ans;
     }
 
-    void addRange(const uint64_t x, const uint64_t y) {
-        if (x != y) {  // repeat add_range_closed() cast and bounding logic
-            addRangeClosed(x, y - 1);
+    void addRange(const uint64_t min, const uint64_t max) {
+        plain.addRange(min, max);
+        for (uint64_t val = min; val < max; ++val) {
+            check.insert(val);
         }
     }
 

From 5c924b60e4b2be06c96bd4905cd6dcdbc531ff6b Mon Sep 17 00:00:00 2001
From: Corey Kosak <kosak@users.noreply.github.com>
Date: Wed, 16 Nov 2022 08:50:42 -0500
Subject: [PATCH 027/162] Fix build: remove duplicate 'ensureRangePopulated()'
 (#411)

---
 cpp/roaring64map.hh | 47 ---------------------------------------------
 1 file changed, 47 deletions(-)

diff --git a/cpp/roaring64map.hh b/cpp/roaring64map.hh
index 76df3eda8..aaea82dc3 100644
--- a/cpp/roaring64map.hh
+++ b/cpp/roaring64map.hh
@@ -1504,53 +1504,6 @@ private:
             roarings.erase(iter);
         }
     }
-
-    /**
-     * Ensure that every key in the closed interval [start_high, end_high]
-     * refers to a Roaring bitmap rather being an empty slot. Inserts empty
-     * Roaring bitmaps if necessary. The interval must be valid and non-empty.
-     * Returns an iterator to the bitmap at start_high.
-     */
-    roarings_t::iterator ensureRangePopulated(uint32_t start_high,
-                                              uint32_t end_high) {
-        if (start_high > end_high) {
-            ROARING_TERMINATE("Logic error: start_high > end_high");
-        }
-        // next_populated_iter points to the first entry in the outer map with
-        // key >= start_high, or end().
-        auto next_populated_iter = roarings.lower_bound(start_high);
-
-        // Use uint64_t to avoid an infinite loop when end_high == uint32_max.
-        roarings_t::iterator start_iter{};  // Definitely assigned in loop.
-        for (uint64_t slot = start_high; slot <= end_high; ++slot) {
-            roarings_t::iterator slot_iter;
-            if (next_populated_iter != roarings.end() &&
-                next_populated_iter->first == slot) {
-                // 'slot' index has caught up to next_populated_iter.
-                // Note it here and advance next_populated_iter.
-                slot_iter = next_populated_iter++;
-            } else {
-                // 'slot' index has not yet caught up to next_populated_iter.
-                // Make a fresh entry {key = 'slot', value = Roaring()}, insert
-                // it just prior to next_populated_iter, and set its copy
-                // on write flag. We take pains to use emplace_hint and
-                // piecewise_construct to minimize effort.
-                slot_iter = roarings.emplace_hint(
-                    next_populated_iter, std::piecewise_construct,
-                    std::forward_as_tuple(uint32_t(slot)),
-                    std::forward_as_tuple());
-                auto &bitmap = slot_iter->second;
-                bitmap.setCopyOnWrite(copyOnWrite);
-            }
-
-            // Make a note of the iterator of the starting slot. It will be
-            // needed for the return value.
-            if (slot == start_high) {
-                start_iter = slot_iter;
-            }
-        }
-        return start_iter;
-    }
 };
 
 /**

From 1f95d82042bcdad1d86a04967e8e8d220b6fc809 Mon Sep 17 00:00:00 2001
From: "lgtm-com[bot]" <43144390+lgtm-com[bot]@users.noreply.github.com>
Date: Wed, 16 Nov 2022 08:54:48 -0500
Subject: [PATCH 028/162] Add CodeQL workflow for GitHub code scanning (#403)

Co-authored-by: LGTM Migrator <lgtm-migrator@users.noreply.github.com>
---
 .github/workflows/codeql.yml | 42 ++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 .github/workflows/codeql.yml

diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
new file mode 100644
index 000000000..6d19b4e62
--- /dev/null
+++ b/.github/workflows/codeql.yml
@@ -0,0 +1,42 @@
+name: "CodeQL"
+
+on:
+  push:
+    branches: [ "master" ]
+  pull_request:
+    branches: [ "master" ]
+  schedule:
+    - cron: "39 2 * * 6"
+
+jobs:
+  analyze:
+    name: Analyze
+    runs-on: ubuntu-latest
+    permissions:
+      actions: read
+      contents: read
+      security-events: write
+
+    strategy:
+      fail-fast: false
+      matrix:
+        language: [ cpp, python ]
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+
+      - name: Initialize CodeQL
+        uses: github/codeql-action/init@v2
+        with:
+          languages: ${{ matrix.language }}
+          queries: +security-and-quality
+
+      - name: Autobuild
+        uses: github/codeql-action/autobuild@v2
+        if: ${{ matrix.language == 'cpp' || matrix.language == 'python' }}
+
+      - name: Perform CodeQL Analysis
+        uses: github/codeql-action/analyze@v2
+        with:
+          category: "/language:${{ matrix.language }}"

From 827801e2eda53111ecc996af78291a60b80b2099 Mon Sep 17 00:00:00 2001
From: Corey Kosak <kosak@users.noreply.github.com>
Date: Wed, 16 Nov 2022 09:16:48 -0500
Subject: [PATCH 029/162] Provide a more aggressive Roaring64Map::fastunion()
 implementation (#405)

* Provide a more aggressive Roaring64Map::fastunion() implementation

* Initial benchmark implementation

* it looks like this benchmark needs to be inside the "NOT WIN32" clause of the CMakeLists.txt

* Respond to review feedback

* Fix build on github action Ubuntu CI / ubuntu-noexcept-ci.yml
---
 benchmarks/CMakeLists.txt          |   1 +
 benchmarks/fastunion_benchmark.cpp | 101 +++++++++++++++++++++++
 cpp/roaring64map.hh                | 123 +++++++++++++++++++++++++++--
 tests/cpp_unit.cpp                 |  40 ++++++++++
 tools/cmake/FindCTargets.cmake     |  10 +++
 5 files changed, 270 insertions(+), 5 deletions(-)
 create mode 100644 benchmarks/fastunion_benchmark.cpp

diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt
index 4a3eb0e8e..71be77ee7 100644
--- a/benchmarks/CMakeLists.txt
+++ b/benchmarks/CMakeLists.txt
@@ -13,6 +13,7 @@ if(NOT WIN32)
     target_link_libraries(add_benchmark m)
     add_c_benchmark(frozen_benchmark)
     add_c_benchmark(containsmulti_benchmark)
+    add_cpp_benchmark(fastunion_benchmark)
 endif()
 add_c_benchmark(bitset_container_benchmark)
 add_c_benchmark(array_container_benchmark)
diff --git a/benchmarks/fastunion_benchmark.cpp b/benchmarks/fastunion_benchmark.cpp
new file mode 100644
index 000000000..ad693f6b8
--- /dev/null
+++ b/benchmarks/fastunion_benchmark.cpp
@@ -0,0 +1,101 @@
+#include <stdio.h>
+#include <iostream>
+#include <roaring/roaring.h>
+#include "roaring64map.hh"
+#include "benchmark.h"
+
+using roaring::Roaring64Map;
+
+namespace {
+const uint32_t num_iterations = 10;
+
+const uint32_t num_bitmaps = 100;
+const uint32_t num_outer_slots = 1000;
+const uint32_t num_inner_values = 2000;
+
+/**
+ * Creates the input maps for the test. This method creates 'num_bitmaps' maps,
+ * each of which contains 'num_outer_slots' 32-bit Roarings, each of which
+ * contains 'num_inner_values' bits. The inner bits are separated by
+ * 'num_bitmaps' and their starting offset is offset by 1 from one bitmap to the
+ * next. The intent is that in the result of the union, all the bits in a given
+ * 32 bit Roaring slot will end up densely packed together, which seemed like an
+ * interesting thing to do.
+ */
+std::vector<Roaring64Map> makeMaps() {
+    std::vector<Roaring64Map> result;
+    for (uint32_t bm_index = 0; bm_index != num_bitmaps; ++bm_index) {
+        Roaring64Map roaring;
+
+        for (uint32_t slot = 0; slot != num_outer_slots; ++slot) {
+            auto value = (uint64_t(slot) << 32) + bm_index + 0x98765432;
+            for (uint32_t inner_index = 0; inner_index != num_inner_values;
+                 ++inner_index) {
+                roaring.add(value);
+                value += num_bitmaps;
+            }
+        }
+        result.push_back(std::move(roaring));
+    }
+    return result;
+}
+
+Roaring64Map legacy_fastunion(size_t n, const Roaring64Map **inputs) {
+    Roaring64Map ans;
+    // not particularly fast
+    for (size_t lcv = 0; lcv < n; ++lcv) {
+        ans |= *(inputs[lcv]);
+    }
+    return ans;
+}
+
+void benchmarkLegacyFastUnion() {
+    std::cout << "*** Legacy fastunion ***\n";
+    auto maps = makeMaps();
+
+    // Need pointers to the above
+    std::vector<const Roaring64Map*> result_ptrs;
+    for (auto &map : maps) {
+        result_ptrs.push_back(&map);
+    }
+
+    for (uint32_t iter = 0; iter < num_iterations; ++iter) {
+        uint64_t cycles_start, cycles_final;
+        RDTSC_START(cycles_start);
+        auto result = legacy_fastunion(result_ptrs.size(), result_ptrs.data());
+        RDTSC_FINAL(cycles_final);
+
+        auto num_cycles = cycles_final - cycles_start;
+        uint64_t cycles_per_map = num_cycles / maps.size();
+        std::cout << "Iteration " << iter << ": " << cycles_per_map << " per map\n";
+    }
+}
+
+void benchmarkNewFastUnion() {
+    std::cout << "*** New fastunion() ***\n";
+    auto maps = makeMaps();
+
+    // Need pointers to the above
+    std::vector<const Roaring64Map*> result_ptrs;
+    for (auto &map : maps) {
+        result_ptrs.push_back(&map);
+    }
+
+    for (uint32_t iter = 0; iter < num_iterations; ++iter) {
+        uint64_t cycles_start, cycles_final;
+        RDTSC_START(cycles_start);
+        auto result =
+            Roaring64Map::fastunion(result_ptrs.size(), result_ptrs.data());
+        RDTSC_FINAL(cycles_final);
+
+        auto num_cycles = cycles_final - cycles_start;
+        uint64_t cycles_per_map = num_cycles / maps.size();
+        std::cout << "Iteration " << iter << ": " << cycles_per_map << " per map\n";
+    }
+}
+}  // namespace
+
+int main() {
+    benchmarkLegacyFastUnion();
+    benchmarkNewFastUnion();
+}
diff --git a/cpp/roaring64map.hh b/cpp/roaring64map.hh
index aaea82dc3..dcc1e800d 100644
--- a/cpp/roaring64map.hh
+++ b/cpp/roaring64map.hh
@@ -19,6 +19,7 @@
 #include <map>
 #include <new>
 #include <numeric>
+#include <queue>
 #include <stdexcept>
 #include <string>
 #include <utility>
@@ -1351,12 +1352,124 @@ public:
      * pointer).
      */
     static Roaring64Map fastunion(size_t n, const Roaring64Map **inputs) {
-        Roaring64Map ans;
-        // not particularly fast
-        for (size_t lcv = 0; lcv < n; ++lcv) {
-            ans |= *(inputs[lcv]);
+        // The strategy here is to basically do a "group by" operation.
+        // We group the input roarings by key, do a 32-bit
+        // roaring_bitmap_or_many on each group, and collect the results.
+        // We accomplish the "group by" operation using a priority queue, which
+        // tracks the next key for each of our input maps. At each step, our
+        // algorithm takes the next subset of maps that share the same next key,
+        // runs roaring_bitmap_or_many on those bitmaps, and then advances the
+        // current_iter on all the affected entries and then repeats.
+
+        // There is an entry in our priority queue for each of the 'n' inputs.
+        // For a given Roaring64Map, we look at its underlying 'roarings'
+        // std::map, and take its begin() and end(). This forms our half-open
+        // interval [current_iter, end_iter), which we keep in the priority
+        // queue as a pq_entry. These entries are updated (removed and then
+        // reinserted with the pq_entry.iterator field advanced by one step) as
+        // our algorithm progresses. But when a given interval becomes empty
+        // (i.e. pq_entry.iterator == pq_entry.end) it is not returned to the
+        // priority queue.
+        struct pq_entry {
+            roarings_t::const_iterator iterator;
+            roarings_t::const_iterator end;
+        };
+
+        // Custom comparator for the priority queue.
+        auto pq_comp = [](const pq_entry &lhs, const pq_entry &rhs) {
+            auto left_key = lhs.iterator->first;
+            auto right_key = rhs.iterator->first;
+
+            // We compare in the opposite direction than normal because priority
+            // queues normally order from largest to smallest, but we want
+            // smallest to largest.
+            return left_key > right_key;
+        };
+
+        // Create and populate the priority queue.
+        std::priority_queue<pq_entry, std::vector<pq_entry>, decltype(pq_comp)> pq(pq_comp);
+        for (size_t i = 0; i < n; ++i) {
+            const auto &roarings = inputs[i]->roarings;
+            if (roarings.begin() != roarings.end()) {
+                pq.push({roarings.begin(), roarings.end()});
+            }
         }
-        return ans;
+
+        // A reusable vector that holds the pointers to the inner bitmaps that
+        // we pass to the underlying 32-bit fastunion operation.
+        std::vector<const roaring_bitmap_t*> group_bitmaps;
+
+        // Summary of the algorithm:
+        // 1. While the priority queue is not empty:
+        //    A. Get its lowest key. Call this group_key
+        //    B. While the lowest entry in the priority queue has a key equal to
+        //       group_key:
+        //       1. Remove this entry (the pair {current_iter, end_iter}) from
+        //          the priority queue.
+        //       2. Add the bitmap pointed to by current_iter to a list of
+        //          32-bit bitmaps to process.
+        //       3. Advance current_iter. Now it will point to a bitmap entry
+        //          with some key greater than group_key (or it will point to
+        //          end()).
+        //       4. If current_iter != end_iter, reinsert the pair into the
+        //          priority queue.
+        //    C. Invoke the 32-bit roaring_bitmap_or_many() and add to result
+        Roaring64Map result;
+        while (!pq.empty()) {
+            // Find the next key (the lowest key) in the priority queue.
+            auto group_key = pq.top().iterator->first;
+
+            // The purpose of the inner loop is to gather all the inner bitmaps
+            // that share "group_key" into "group_bitmaps" so that they can be
+            // fed to roaring_bitmap_or_many(). While we are doing this, we
+            // advance those iterators to their next value and reinsert them
+            // into the priority queue (unless they reach their end).
+            group_bitmaps.clear();
+            while (!pq.empty()) {
+                auto candidate_current_iter = pq.top().iterator;
+                auto candidate_end_iter = pq.top().end;
+
+                auto candidate_key = candidate_current_iter->first;
+                const auto &candidate_bitmap = candidate_current_iter->second;
+
+                // This element will either be in the group (having
+                // key == group_key) or it will not be in the group (having
+                // key > group_key). (Note it cannot have key < group_key
+                // because of the ordered nature of the priority queue itself
+                // and the ordered nature of all the underlying roaring maps).
+                if (candidate_key != group_key) {
+                    // This entry, and (thanks to the nature of the priority
+                    // queue) all other entries as well, are all greater than
+                    // group_key, so we're done collecting elements for the
+                    // current group. Because of the way this loop was written,
+                    // the group will will always contain at least one element.
+                    break;
+                }
+
+                group_bitmaps.push_back(&candidate_bitmap.roaring);
+                // Remove this entry from the priority queue. Note this
+                // invalidates pq.top() so make sure you don't have any dangling
+                // references to it.
+                pq.pop();
+
+                // Advance 'candidate_current_iter' and insert a new entry
+                // {candidate_current_iter, candidate_end_iter} into the
+                // priority queue (unless it has reached its end).
+                ++candidate_current_iter;
+                if (candidate_current_iter != candidate_end_iter) {
+                    pq.push({candidate_current_iter, candidate_end_iter});
+                }
+            }
+
+            // Use the fast inner union to combine these.
+            auto *inner_result = roaring_bitmap_or_many(group_bitmaps.size(),
+                group_bitmaps.data());
+            // Insert the 32-bit result at end of the 'roarings' map of the
+            // result we are building.
+            result.roarings.insert(result.roarings.end(),
+                std::make_pair(group_key, Roaring(inner_result)));
+        }
+        return result;
     }
 
     friend class Roaring64MapSetBitForwardIterator;
diff --git a/tests/cpp_unit.cpp b/tests/cpp_unit.cpp
index 108855470..fef448278 100644
--- a/tests/cpp_unit.cpp
+++ b/tests/cpp_unit.cpp
@@ -1716,6 +1716,45 @@ DEFINE_TEST(test_cpp_is_subset_64) {
   assert_true(r3.isSubset(r2));
 }
 
+DEFINE_TEST(test_cpp_fast_union_64) {
+    auto update = [](Roaring64Map *dest, uint32_t bitmask, uint32_t offset) {
+        for (uint32_t i = 0; i != 32; ++i) {
+            if ((bitmask & (1 << i)) != 0) {
+                dest->add(offset + i);
+            }
+        }
+    };
+
+    // Generate three Roaring64Maps that have a variety of combinations of
+    // present and absent slots and calculate their union with fastunion.
+    const uint32_t num_slots_to_test = 4;
+    const uint32_t bitmask_limit = 1 << num_slots_to_test;
+
+    for (size_t r0_bitmask = 0; r0_bitmask != bitmask_limit; ++r0_bitmask) {
+        for (size_t r1_bitmask = 0; r1_bitmask != bitmask_limit; ++r1_bitmask) {
+            for (size_t r2_bitmask = 0; r2_bitmask != bitmask_limit;
+                 ++r2_bitmask) {
+                Roaring64Map r0_map, r1_map, r2_map;
+                update(&r0_map, r0_bitmask, 0);
+                update(&r1_map, r1_bitmask, 0x1000);
+                update(&r2_map, r2_bitmask, 0x2000);
+
+                const Roaring64Map *maps[] = {
+                    &r0_map, &r1_map, &r2_map
+                };
+                auto actual = Roaring64Map::fastunion(3, maps);
+
+                Roaring64Map expected;
+                update(&expected, r0_bitmask, 0);
+                update(&expected, r1_bitmask, 0x1000);
+                update(&expected, r2_bitmask, 0x2000);
+
+                assert_true(expected == actual);
+            }
+        }
+    }
+}
+
 DEFINE_TEST(test_cpp_to_string) {
     // test toString
     const auto b5 = uint64_t(5) << 32;
@@ -1888,6 +1927,7 @@ int main() {
         cmocka_unit_test(issue_336),
         cmocka_unit_test(issue_372),
         cmocka_unit_test(test_cpp_is_subset_64),
+        cmocka_unit_test(test_cpp_fast_union_64),
         cmocka_unit_test(test_cpp_to_string),
         cmocka_unit_test(test_cpp_remove_run_compression),
         cmocka_unit_test(test_cpp_contains_range_interleaved_containers),
diff --git a/tools/cmake/FindCTargets.cmake b/tools/cmake/FindCTargets.cmake
index 8dae8ffc3..97f2b64d6 100644
--- a/tools/cmake/FindCTargets.cmake
+++ b/tools/cmake/FindCTargets.cmake
@@ -41,3 +41,13 @@ function(add_c_benchmark BENCH_NAME)
   add_executable(${BENCH_NAME} ${BENCH_NAME}.c)
   target_link_libraries(${BENCH_NAME} ${ROARING_LIB_NAME})
 endfunction(add_c_benchmark)
+
+function(add_cpp_benchmark BENCH_NAME)
+  add_executable(${BENCH_NAME} ${BENCH_NAME}.cpp)
+  target_link_libraries(${BENCH_NAME} ${ROARING_LIB_NAME})
+  if(ROARING_EXCEPTIONS)
+    target_compile_definitions(${BENCH_NAME} PUBLIC ROARING_EXCEPTIONS=1)
+  else()
+    target_compile_definitions(${BENCH_NAME} PUBLIC ROARING_EXCEPTIONS=0)
+  endif()
+endfunction(add_cpp_benchmark)

From 9672fe539b2152be19d3ce4b6cd9ff3299853da9 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Wed, 16 Nov 2022 09:19:01 -0500
Subject: [PATCH 030/162] Minor fix.

---
 tests/cpp_unit.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tests/cpp_unit.cpp b/tests/cpp_unit.cpp
index fef448278..4d4d2dac3 100644
--- a/tests/cpp_unit.cpp
+++ b/tests/cpp_unit.cpp
@@ -928,10 +928,6 @@ DEFINE_TEST(test_cpp_add_range_closed_combinatoric_64) {
     const uint32_t num_slots_to_test = 5;
     const uint32_t base_slot = 50;
 
-    // We put std::numeric_limits<>::max in parentheses to avoid a
-    // clash with the Windows.h header under Windows.
-    const auto uint32_max = (std::numeric_limits<uint32_t>::max)();
-
     const uint32_t bitmask_limit = 1 << num_slots_to_test;
 
     for (uint32_t bitmask = 0; bitmask < bitmask_limit; ++bitmask) {

From 4351a6cf8ee7f0b17bd18386bf43c503fc555d51 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Wed, 16 Nov 2022 17:19:53 -0500
Subject: [PATCH 031/162] Let us guard the malloc.h include with a check for
 glibc. (#412)

---
 include/roaring/portability.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/roaring/portability.h b/include/roaring/portability.h
index 3f43e97fe..d16a4daa9 100644
--- a/include/roaring/portability.h
+++ b/include/roaring/portability.h
@@ -49,7 +49,7 @@
 #include <stdbool.h>
 #include <stdint.h>
 #include <stdlib.h>  // will provide posix_memalign with _POSIX_C_SOURCE as defined above
-#if !(defined(__APPLE__)) && !(defined(__FreeBSD__))
+#ifdef __GLIBC__
 #include <malloc.h>  // this should never be needed but there are some reports that it is needed.
 #endif
 

From 5355686f8df739ca72ab6044b7cb257ad7e0e09c Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Thu, 17 Nov 2022 09:23:04 -0500
Subject: [PATCH 032/162] add support for initializer list (#408)

* This PR adds support for initializer lists.

* Adding constructor.

* Portable printf/uint64_t

* Fixing a couple of issues.

* Trimming some 'ull'.

* Fixed spacing.

* A few more fixes.

* Removing the new constructors.

* Fixed typo.

* Correcting typo.
---
 README.md                 |  5 +++++
 cpp/roaring.hh            | 11 +++++++++++
 cpp/roaring64map.hh       | 11 +++++++++++
 tests/cpp_random_unit.cpp | 21 +++++++++++----------
 tests/cpp_unit.cpp        | 30 ++++++++++++++++++++++++++++++
 5 files changed, 68 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index 705ddb790..d9689a316 100644
--- a/README.md
+++ b/README.md
@@ -372,6 +372,11 @@ int main() {
     r2.printf();
     printf("\n");
 
+    // create a new bitmap with initializer list
+    Roaring r2i = Roaring::bitmapOfList({1, 2, 3, 5, 6});
+
+    assert(r2i == r2);
+
     // we can also create a bitmap from a pointer to 32-bit integers
     const uint32_t values[] = {2, 3, 4};
     Roaring r3(3, values);
diff --git a/cpp/roaring.hh b/cpp/roaring.hh
index c193691ba..945a862b6 100644
--- a/cpp/roaring.hh
+++ b/cpp/roaring.hh
@@ -7,6 +7,7 @@ A C++ header for Roaring Bitmaps.
 #include <cstdarg>
 
 #include <algorithm>
+#include <initializer_list>
 #include <new>
 #include <stdexcept>
 #include <string>
@@ -112,6 +113,16 @@ public:
         return ans;
     }
 
+    /**
+     * Construct a bitmap from a list of uint32_t values.
+     * E.g., bitmapOfList({1,2,3}).
+     */
+    static Roaring bitmapOfList(std::initializer_list<uint32_t> l) {
+        Roaring ans;
+        ans.addMany(l.size(), l.begin());
+        return ans;
+    }
+
     /**
      * Add value x
      */
diff --git a/cpp/roaring64map.hh b/cpp/roaring64map.hh
index dcc1e800d..e0416ba2c 100644
--- a/cpp/roaring64map.hh
+++ b/cpp/roaring64map.hh
@@ -15,6 +15,7 @@
 #include <cstdio>  // for std::printf() in the printf() method
 #include <cstring>  // for std::memcpy()
 #include <functional>
+#include <initializer_list>
 #include <limits>
 #include <map>
 #include <new>
@@ -99,6 +100,16 @@ public:
         return ans;
     }
 
+    /**
+     * Construct a bitmap from a list of uint64_t values.
+     * E.g., bitmapOfList({1,2,3}).
+     */
+    static Roaring64Map bitmapOfList(std::initializer_list<uint64_t> l) {
+        Roaring64Map ans;
+        ans.addMany(l.size(), l.begin());
+        return ans;
+    }
+
     /**
      * Adds value x.
      */
diff --git a/tests/cpp_random_unit.cpp b/tests/cpp_random_unit.cpp
index 0cc29c6e1..a9da75ff7 100644
--- a/tests/cpp_random_unit.cpp
+++ b/tests/cpp_random_unit.cpp
@@ -20,11 +20,12 @@
 // https://www.llvm.org/docs/LibFuzzer.html
 //
 
-#include <assert.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
+#include <cassert>
+#include <cinttypes>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <ctime>
 
 #include <iostream>
 #include <type_traits>
@@ -378,10 +379,10 @@ DEFINE_TEST(random_doublecheck_test_64) {
         const Roaring64Map &right = roars[rand() % NUM_ROARS];
 
 #ifdef ROARING_CPP_RANDOM_PRINT_STATUS
-        printf("[%lu]: %llu %llu %llu\n", step,
-               static_cast<unsigned long long>(left.cardinality()),
-               static_cast<unsigned long long>(right.cardinality()),
-               static_cast<unsigned long long>(out.cardinality()));
+        printf("[%lu]: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", step,
+               left.cardinality(),
+               right.cardinality(),
+               out.cardinality());
 #endif
 
         int op = rand() % 6;
@@ -482,7 +483,7 @@ DEFINE_TEST(random_doublecheck_test_64) {
 int main() {
     uint64_t seed = time(nullptr);
     srand(seed);
-    printf("Seed: %lu\n", seed);
+    printf("Seed:  %" PRIu64 "\n", seed);
 
     gravity = rand() % 10000;  // starting focal point
 
diff --git a/tests/cpp_unit.cpp b/tests/cpp_unit.cpp
index 4d4d2dac3..14948d664 100644
--- a/tests/cpp_unit.cpp
+++ b/tests/cpp_unit.cpp
@@ -333,6 +333,10 @@ void test_example_cpp(bool copy_on_write) {
 
     r2.printf();
     printf("\n");
+    // create a new bitmap with initializer list
+    Roaring r2i = Roaring::bitmapOfList({1, 2, 3, 5, 6});
+
+    assert(r2i == r2);
 
     // test select
     uint32_t element;
@@ -532,6 +536,11 @@ void test_example_cpp_64(bool copy_on_write) {
 
     r2.printf();
     printf("\n");
+    // create a new bitmap with initializer list
+    Roaring64Map r2i =
+        Roaring64Map::bitmapOfList({1, 2, 234294967296, 195839473298,
+                               14000000000000000100ull});
+    assert(r2i == r2);
 
     // test select
     uint64_t element;
@@ -807,6 +816,25 @@ DEFINE_TEST(test_cpp_add_range_closed_64) {
         assert_true(r1 == r2);
     }
 }
+DEFINE_TEST(test_bitmap_of_32) {
+        Roaring r1 = Roaring::bitmapOfList({1,2,4});
+        r1.printf();
+        printf("\n");
+        Roaring r2 =
+            Roaring::bitmapOf(3, 1, 2, 4);
+        r2.printf();
+        printf("\n");
+        assert_true(r1 == r2);
+}
+
+DEFINE_TEST(test_bitmap_of_64) {
+        Roaring64Map r1 = Roaring64Map::bitmapOfList({1,2,4});
+        r1.printf();
+        Roaring64Map r2 =
+            Roaring64Map::bitmapOf(3, uint64_t(1), uint64_t(2), uint64_t(4));
+        r2.printf();
+        assert_true(r1 == r2);
+}
 
 DEFINE_TEST(test_cpp_add_range_open_64) {
     {
@@ -1867,6 +1895,8 @@ DEFINE_TEST(test_cpp_contains_range_interleaved_containers) {
 int main() {
     roaring::misc::tellmeall();
     const struct CMUnitTest tests[] = {
+        cmocka_unit_test(test_bitmap_of_32),
+        cmocka_unit_test(test_bitmap_of_64),
         cmocka_unit_test(serial_test),
         cmocka_unit_test(test_example_true),
         cmocka_unit_test(test_example_false),

From 19d9486f4bd912dd9063e4ae4f297ffa115231e7 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Thu, 17 Nov 2022 09:28:10 -0500
Subject: [PATCH 033/162] Preparing new release

---
 CMakeLists.txt                    | 8 ++++----
 include/roaring/roaring_version.h | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 58ed46dc5..0c779b62a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -17,10 +17,10 @@ if(CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_C_COMPILER_VERSION VERSIO
 endif()
 set(ROARING_LIB_NAME roaring)
 set(PROJECT_VERSION_MAJOR 0)
-set(PROJECT_VERSION_MINOR 7)
-set(PROJECT_VERSION_PATCH 3)
-set(ROARING_LIB_VERSION "0.7.3" CACHE STRING "Roaring library version")
-set(ROARING_LIB_SOVERSION "5" CACHE STRING "Roaring library soversion")
+set(PROJECT_VERSION_MINOR 8)
+set(PROJECT_VERSION_PATCH 0)
+set(ROARING_LIB_VERSION "0.8.0" CACHE STRING "Roaring library version")
+set(ROARING_LIB_SOVERSION "6" CACHE STRING "Roaring library soversion")
 
 option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON)
 if(NOT ROARING_EXCEPTIONS)
diff --git a/include/roaring/roaring_version.h b/include/roaring/roaring_version.h
index 128727194..ff5a9f5df 100644
--- a/include/roaring/roaring_version.h
+++ b/include/roaring/roaring_version.h
@@ -1,10 +1,10 @@
 // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand 
 #ifndef ROARING_INCLUDE_ROARING_VERSION 
 #define ROARING_INCLUDE_ROARING_VERSION 
-#define ROARING_VERSION "0.7.3"
+#define ROARING_VERSION "0.8.0"
 enum { 
     ROARING_VERSION_MAJOR = 0,
-    ROARING_VERSION_MINOR = 7,
-    ROARING_VERSION_REVISION = 3
+    ROARING_VERSION_MINOR = 8,
+    ROARING_VERSION_REVISION = 0
 }; 
 #endif // ROARING_INCLUDE_ROARING_VERSION 

From 6ef51dcd2a555d521caa46459df06015f17cdf6d Mon Sep 17 00:00:00 2001
From: Paul Smith <paul@mad-scientist.net>
Date: Thu, 17 Nov 2022 14:43:33 -0500
Subject: [PATCH 034/162] convert.c: Remove set-but-not-used variable (Clang 15
 warning) (#413)

---
 src/containers/convert.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/containers/convert.c b/src/containers/convert.c
index a87babff0..300a1c0a8 100644
--- a/src/containers/convert.c
+++ b/src/containers/convert.c
@@ -263,7 +263,6 @@ container_t *convert_run_optimize(
 
         int long_ctr = 0;
         uint64_t cur_word = c_qua_bitset->words[0];
-        int run_count = 0;
         while (true) {
             while (cur_word == UINT64_C(0) &&
                    long_ctr < BITSET_CONTAINER_SIZE_IN_WORDS - 1)
@@ -294,7 +293,6 @@ container_t *convert_run_optimize(
             int local_run_end = __builtin_ctzll(~cur_word_with_1s);
             run_end = local_run_end + long_ctr * 64;
             add_run(answer, run_start, run_end - 1);
-            run_count++;
             cur_word = cur_word_with_1s & (cur_word_with_1s + 1);
         }
         return answer;

From 23492ef06a189883b3b2d4d36711ab56526986b4 Mon Sep 17 00:00:00 2001
From: DavidKorczynski <david@adalogics.com>
Date: Thu, 17 Nov 2022 22:54:46 +0000
Subject: [PATCH 035/162] Add CIFuzz to Github workflows (#414)

Signed-off-by: David Korczynski <david@adalogics.com>

Signed-off-by: David Korczynski <david@adalogics.com>
---
 .github/workflows/cifuzz.yml | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 .github/workflows/cifuzz.yml

diff --git a/.github/workflows/cifuzz.yml b/.github/workflows/cifuzz.yml
new file mode 100644
index 000000000..94bfd479d
--- /dev/null
+++ b/.github/workflows/cifuzz.yml
@@ -0,0 +1,24 @@
+name: CIFuzz
+on: [pull_request]
+jobs:
+  Fuzzing:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Build Fuzzers
+      id: build
+      uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master
+      with:
+        oss-fuzz-project-name: 'croaring'
+        dry-run: false
+    - name: Run Fuzzers
+      uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master
+      with:
+        oss-fuzz-project-name: 'croaring'
+        fuzz-seconds: 300
+        dry-run: false
+    - name: Upload Crash
+      uses: actions/upload-artifact@v3
+      if: failure() && steps.build.outcome == 'success'
+      with:
+        name: artifacts
+        path: ./out/artifacts

From a644f44898d3c0f27af4406827bbde5f42a06b53 Mon Sep 17 00:00:00 2001
From: Corey Kosak <kosak@users.noreply.github.com>
Date: Tue, 22 Nov 2022 11:34:28 -0500
Subject: [PATCH 036/162] Add initializer_list constructor and assignment
 operator to Roaring and Roaring64Map (#415)

---
 cpp/roaring.hh      | 22 +++++++++++++++---
 cpp/roaring64map.hh | 18 ++++++++++++++-
 tests/cpp_unit.cpp  | 54 ++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 84 insertions(+), 10 deletions(-)

diff --git a/cpp/roaring.hh b/cpp/roaring.hh
index 945a862b6..f02f12400 100644
--- a/cpp/roaring.hh
+++ b/cpp/roaring.hh
@@ -56,12 +56,19 @@ public:
     }
 
     /**
-     * Construct a bitmap from a list of integer values.
+     * Construct a bitmap from a list of 32-bit integer values.
      */
     Roaring(size_t n, const uint32_t *data) : Roaring() {
         api::roaring_bitmap_add_many(&roaring, n, data);
     }
 
+    /**
+     * Construct a bitmap from an initializer list.
+     */
+    Roaring(std::initializer_list<uint32_t> l) : Roaring() {
+        addMany(l.size(), l.begin());
+    }
+
     /**
      * Copy constructor
      */
@@ -75,8 +82,8 @@ public:
     }
 
     /**
-     * Move constructor. The moved object remains valid, i.e.
-     * all methods can still be called on it.
+     * Move constructor. The moved-from object remains valid but empty, i.e.
+     * it behaves as though it was just freshly constructed.
      */
     Roaring(Roaring &&r) noexcept : roaring(r.roaring) {
         //
@@ -260,6 +267,15 @@ public:
         return *this;
     }
 
+    /**
+     * Assignment from an initializer list.
+     */
+    Roaring &operator=(std::initializer_list<uint32_t> l) {
+        // Delegate to move assignment operator
+        *this = Roaring(l);
+        return *this;
+    }
+
     /**
      * Compute the intersection between the current bitmap and the provided
      * bitmap, writing the result in the current bitmap. The provided bitmap
diff --git a/cpp/roaring64map.hh b/cpp/roaring64map.hh
index e0416ba2c..cf4b06aae 100644
--- a/cpp/roaring64map.hh
+++ b/cpp/roaring64map.hh
@@ -53,6 +53,13 @@ public:
      */
     Roaring64Map(size_t n, const uint64_t *data) { addMany(n, data); }
 
+    /**
+     * Construct a bitmap from an initializer list.
+     */
+    Roaring64Map(std::initializer_list<uint64_t> l) {
+        addMany(l.size(), l.begin());
+    }
+
     /**
      * Construct a 64-bit map from a 32-bit one
      */
@@ -84,7 +91,16 @@ public:
     /**
      * Move assignment operator.
      */
-     Roaring64Map &operator=(Roaring64Map &&r) noexcept = default;
+    Roaring64Map &operator=(Roaring64Map &&r) noexcept = default;
+
+    /**
+     * Assignment from an initializer list.
+     */
+    Roaring64Map &operator=(std::initializer_list<uint64_t> l) {
+        // Delegate to move assignment operator
+        *this = Roaring64Map(l);
+        return *this;
+    }
 
     /**
      * Construct a bitmap from a list of uint64_t values.
diff --git a/tests/cpp_unit.cpp b/tests/cpp_unit.cpp
index 14948d664..1ed6c60b2 100644
--- a/tests/cpp_unit.cpp
+++ b/tests/cpp_unit.cpp
@@ -336,7 +336,12 @@ void test_example_cpp(bool copy_on_write) {
     // create a new bitmap with initializer list
     Roaring r2i = Roaring::bitmapOfList({1, 2, 3, 5, 6});
 
-    assert(r2i == r2);
+    assert_true(r2i == r2);
+
+    // create a new bitmap directly from initializer list
+    Roaring r2id = {1, 2, 3, 5, 6};
+
+    assert_true(r2id == r2);
 
     // test select
     uint32_t element;
@@ -431,7 +436,8 @@ void test_example_cpp(bool copy_on_write) {
         assert_true(a.contains(10));
         assert_true(a.contains(20));
 
-        // b should be destroyed without any errors
+        // Our move semantics allow moved-from objects to continue to be used
+        // normally (they are reset to empty Roarings).
         assert_true(b.cardinality() == 0);
     }
 
@@ -448,10 +454,27 @@ void test_example_cpp(bool copy_on_write) {
         assert_true(a.contains(10));
         assert_true(a.contains(20));
 
-        // b should be destroyed without any errors
+        // Our move semantics allow moved-from objects to continue to be used
+        // normally (they are reset to empty Roarings).
         assert_int_equal(0, b.cardinality());
     }
 
+    // test initializer lists
+    {
+        Roaring a;
+        a.add(10);
+        a.add(20);
+
+        // construction
+        Roaring b({10, 20});
+        assert_true(a == b);
+
+        a.add(30);
+        // assignment
+        b = {10, 20, 30};
+        assert_true(a == b);
+    }
+
     // test toString
     {
         Roaring a;
@@ -540,7 +563,12 @@ void test_example_cpp_64(bool copy_on_write) {
     Roaring64Map r2i =
         Roaring64Map::bitmapOfList({1, 2, 234294967296, 195839473298,
                                14000000000000000100ull});
-    assert(r2i == r2);
+    assert_true(r2i == r2);
+
+    // create a new bitmap directly from initializer list
+    Roaring64Map r2id = {1, 2, 234294967296, 195839473298,
+                         14000000000000000100ull};
+    assert_true(r2id == r2);
 
     // test select
     uint64_t element;
@@ -817,7 +845,7 @@ DEFINE_TEST(test_cpp_add_range_closed_64) {
     }
 }
 DEFINE_TEST(test_bitmap_of_32) {
-        Roaring r1 = Roaring::bitmapOfList({1,2,4});
+        Roaring r1 = Roaring::bitmapOfList({1, 2, 4});
         r1.printf();
         printf("\n");
         Roaring r2 =
@@ -825,15 +853,29 @@ DEFINE_TEST(test_bitmap_of_32) {
         r2.printf();
         printf("\n");
         assert_true(r1 == r2);
+
+        Roaring r1d = {1, 2, 4};
+        assert_true(r1 == r1d);
+
+        Roaring r3a = Roaring::bitmapOfList({7, 8, 9});
+        r3a = {1, 2, 4};  // overwrite with assignment operator
+        assert_true(r1 == r3a);
 }
 
 DEFINE_TEST(test_bitmap_of_64) {
-        Roaring64Map r1 = Roaring64Map::bitmapOfList({1,2,4});
+        Roaring64Map r1 = Roaring64Map::bitmapOfList({1, 2, 4});
         r1.printf();
         Roaring64Map r2 =
             Roaring64Map::bitmapOf(3, uint64_t(1), uint64_t(2), uint64_t(4));
         r2.printf();
         assert_true(r1 == r2);
+
+        Roaring64Map r1d = {1, 2, 4};
+        assert_true(r1 == r1d);
+
+        Roaring64Map r3a = Roaring64Map::bitmapOfList({7, 8, 9});
+        r3a = {1, 2, 4};  // overwrite with assignment operator
+        assert_true(r1 == r3a);
 }
 
 DEFINE_TEST(test_cpp_add_range_open_64) {

From 7fad89fc785e227f2c72c7f8152f71bbd7bb10ed Mon Sep 17 00:00:00 2001
From: Justin Whear <justin.whear+github@gmail.com>
Date: Mon, 12 Dec 2022 15:41:48 -0800
Subject: [PATCH 037/162] Add link to Zig wrapper to README (#418)

For the last year and a half, I've maintained a Zig wrapper here: https://github.com/jwhear/roaring-zig
---
 README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/README.md b/README.md
index d9689a316..80c01487b 100644
--- a/README.md
+++ b/README.md
@@ -648,6 +648,9 @@ Yuce Tekol wrote a D wrapper available at https://github.com/yuce/droaring
 
 Antonio Guilherme Ferreira Viggiano wrote a Redis Module available at https://github.com/aviggiano/redis-roaring
 
+# Zig Wrapper
+
+Justin Whear wrote a Zig wrapper available at https://github.com/jwhear/roaring-zig
 
 
 # Mailing list/discussion group

From dd582fd83d9430d836eb526382e45993062dfd3e Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Sun, 25 Dec 2022 13:33:27 -0500
Subject: [PATCH 038/162] Documentation improvment.

---
 cpp/roaring.hh            |  8 ++++++++
 cpp/roaring64map.hh       |  8 ++++++++
 include/roaring/roaring.h | 10 +++++++++-
 3 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/cpp/roaring.hh b/cpp/roaring.hh
index f02f12400..d48ebdab8 100644
--- a/cpp/roaring.hh
+++ b/cpp/roaring.hh
@@ -280,6 +280,9 @@ public:
      * Compute the intersection between the current bitmap and the provided
      * bitmap, writing the result in the current bitmap. The provided bitmap
      * is not modified.
+     *
+     * Performance hint: if you are computing the intersection between several
+     * bitmaps, two-by-two, it is best to start with the smallest bitmap.
      */
     Roaring &operator&=(const Roaring &r) {
         api::roaring_bitmap_and_inplace(&roaring, &r.roaring);
@@ -612,6 +615,11 @@ public:
     /**
      * Computes the intersection between two bitmaps and returns new bitmap.
      * The current bitmap and the provided bitmap are unchanged.
+     *
+     * Performance hint: if you are computing the intersection between several
+     * bitmaps, two-by-two, it is best to start with the smallest bitmap.
+     * Consider also using the operator &= to avoid needlessly creating
+     * many temporary bitmaps.
      */
     Roaring operator&(const Roaring &o) const {
         roaring_bitmap_t *r = api::roaring_bitmap_and(&roaring, &o.roaring);
diff --git a/cpp/roaring64map.hh b/cpp/roaring64map.hh
index cf4b06aae..1b0b97a53 100644
--- a/cpp/roaring64map.hh
+++ b/cpp/roaring64map.hh
@@ -492,6 +492,9 @@ public:
      * Compute the intersection of the current bitmap and the provided bitmap,
      * writing the result in the current bitmap. The provided bitmap is not
      * modified.
+     *
+     * Performance hint: if you are computing the intersection between several
+     * bitmaps, two-by-two, it is best to start with the smallest bitmap.
      */
     Roaring64Map &operator&=(const Roaring64Map &other) {
         if (this == &other) {
@@ -1304,6 +1307,11 @@ public:
     /**
      * Computes the intersection between two bitmaps and returns new bitmap.
      * The current bitmap and the provided bitmap are unchanged.
+     *
+     * Performance hint: if you are computing the intersection between several
+     * bitmaps, two-by-two, it is best to start with the smallest bitmap.
+     * Consider also using the operator &= to avoid needlessly creating
+     * many temporary bitmaps.
      */
     Roaring64Map operator&(const Roaring64Map &o) const {
         return Roaring64Map(*this) &= o;
diff --git a/include/roaring/roaring.h b/include/roaring/roaring.h
index 415152445..4283d9a3b 100644
--- a/include/roaring/roaring.h
+++ b/include/roaring/roaring.h
@@ -122,6 +122,11 @@ void roaring_bitmap_printf(const roaring_bitmap_t *r);
 /**
  * Computes the intersection between two bitmaps and returns new bitmap. The
  * caller is responsible for memory management.
+ *
+ * Performance hint: if you are computing the intersection between several
+ * bitmaps, two-by-two, it is best to start with the smallest bitmap.
+ * You may also rely on roaring_bitmap_and_inplace to avoid creating
+ * many temporary bitmaps.
  */
 roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *r1,
                                      const roaring_bitmap_t *r2);
@@ -173,7 +178,10 @@ uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *r1,
 
 /**
  * Inplace version of `roaring_bitmap_and()`, modifies r1
- * r1 == r2 is allowed
+ * r1 == r2 is allowed.
+ *
+ * Performance hint: if you are computing the intersection between several
+ * bitmaps, two-by-two, it is best to start with the smallest bitmap.
  */
 void roaring_bitmap_and_inplace(roaring_bitmap_t *r1,
                                 const roaring_bitmap_t *r2);

From 025d86759304ffeb8491cda6170f0323c544461b Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Thu, 5 Jan 2023 17:25:33 -0500
Subject: [PATCH 039/162] Adding test for PyRoaringBitMap issue81

---
 tests/toplevel_unit.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/tests/toplevel_unit.c b/tests/toplevel_unit.c
index 05052ff31..630e5524d 100644
--- a/tests/toplevel_unit.c
+++ b/tests/toplevel_unit.c
@@ -53,7 +53,8 @@ DEFINE_TEST(range_contains) {
     uint32_t start = end-2;
     roaring_bitmap_t *bm = roaring_bitmap_from_range(start, end-1, 1);
     roaring_bitmap_printf_describe(bm);printf("\n");
-    roaring_bitmap_contains_range(bm, start, end);
+    assert_true(roaring_bitmap_contains_range(bm, start, end-1));
+    assert_false(roaring_bitmap_contains_range(bm, start, end));
     roaring_bitmap_free(bm);
 }
 
@@ -1469,7 +1470,7 @@ DEFINE_TEST(test_contains_range) {
             values[i] = val;
       }
       for (uint64_t i = 0; i < 100000; ++i){
-            if (roaring_bitmap_contains_range(r1, values[i], values[i] + length_range)){
+            if (roaring_bitmap_contains_range(r1, values[i], values[i] + length_range)) {
                 for (uint32_t j = values[i]; j < values[i] + length_range; ++j) assert_true(roaring_bitmap_contains(r1, j));
             }
             else {
@@ -1501,6 +1502,14 @@ DEFINE_TEST(test_contains_range) {
     }
 }
 
+DEFINE_TEST(test_contains_range_PyRoaringBitMap_issue81) {
+    roaring_bitmap_t* r = roaring_bitmap_create();
+    roaring_bitmap_add_range(r, 1, 1900544);
+    assert_true(roaring_bitmap_contains_range(r,1,1900544));
+    assert_false(roaring_bitmap_contains_range(r,1,1900545));
+    roaring_bitmap_free(r);
+}
+
 DEFINE_TEST(test_intersection_array_x_array) {
     roaring_bitmap_t *r1 = roaring_bitmap_create();
     assert_non_null(r1);
@@ -4241,6 +4250,7 @@ int main() {
     tellmeall();
 
     const struct CMUnitTest tests[] = {
+        cmocka_unit_test(test_contains_range_PyRoaringBitMap_issue81),
         cmocka_unit_test(issue316),
         cmocka_unit_test(issue288),
         cmocka_unit_test(issue245),

From 94c645514b0bc28aee07d429d581d6be0d8d8f37 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Thu, 5 Jan 2023 17:33:20 -0500
Subject: [PATCH 040/162] Correcting code.

---
 tests/toplevel_unit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/toplevel_unit.c b/tests/toplevel_unit.c
index 630e5524d..896cf13b4 100644
--- a/tests/toplevel_unit.c
+++ b/tests/toplevel_unit.c
@@ -1506,7 +1506,7 @@ DEFINE_TEST(test_contains_range_PyRoaringBitMap_issue81) {
     roaring_bitmap_t* r = roaring_bitmap_create();
     roaring_bitmap_add_range(r, 1, 1900544);
     assert_true(roaring_bitmap_contains_range(r,1,1900544));
-    assert_false(roaring_bitmap_contains_range(r,1,1900545));
+    assert_false(roaring_bitmap_contains_range(r,1900543,1900545));
     roaring_bitmap_free(r);
 }
 

From 332ccc740a2af6a1e5c5bf0ec73157688da36cc0 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Thu, 5 Jan 2023 22:18:12 -0500
Subject: [PATCH 041/162] Minor maintenance (removing unused functions).

---
 amalgamation.sh                         |  1 -
 benchmarks/array_container_benchmark.c  |  4 +--
 benchmarks/bitset_container_benchmark.c |  2 +-
 benchmarks/run_container_benchmark.c    |  4 +--
 include/roaring/containers/array.h      | 11 +++----
 include/roaring/containers/bitset.h     | 26 +++++------------
 include/roaring/containers/containers.h |  4 +--
 include/roaring/containers/run.h        | 12 +++-----
 include/roaring/isadetection.h          | 38 ++++++++++++-------------
 include/roaring/portability.h           | 16 +++++++++--
 src/containers/array.c                  |  1 -
 src/containers/bitset.c                 |  4 +--
 src/containers/run.c                    |  1 -
 tests/bitset_container_unit.c           |  6 ++--
 tests/run_container_unit.c              | 13 +++++++--
 15 files changed, 70 insertions(+), 73 deletions(-)

diff --git a/amalgamation.sh b/amalgamation.sh
index ed3e54000..8cbfba71b 100755
--- a/amalgamation.sh
+++ b/amalgamation.sh
@@ -67,7 +67,6 @@ $SCRIPTPATH/include/roaring/containers/mixed_union.h
 $SCRIPTPATH/include/roaring/containers/mixed_xor.h
 $SCRIPTPATH/include/roaring/containers/containers.h
 $SCRIPTPATH/include/roaring/roaring_array.h
-$SCRIPTPATH/include/roaring/misc/configreport.h
 "
 
 # .c implementation files
diff --git a/benchmarks/array_container_benchmark.c b/benchmarks/array_container_benchmark.c
index a76844ac9..903d0e4ac 100644
--- a/benchmarks/array_container_benchmark.c
+++ b/benchmarks/array_container_benchmark.c
@@ -137,8 +137,8 @@ int main() {
     printf("intersection cardinality = %d \n", answer);
     BEST_TIME(intersection_test(B1, B2, BO), answer, repeat, answer);
     printf("==intersection and union test 2 \n");
-    array_container_clear(B1);
-    array_container_clear(B2);
+    B1->cardinality = 0;
+    B2->cardinality = 0;
     for (int x = 0; x < 1 << 16; x += 16) {
         array_container_add(B1, (uint16_t)x);
     }
diff --git a/benchmarks/bitset_container_benchmark.c b/benchmarks/bitset_container_benchmark.c
index 3d418ee2c..bda9cc9d9 100644
--- a/benchmarks/bitset_container_benchmark.c
+++ b/benchmarks/bitset_container_benchmark.c
@@ -69,7 +69,7 @@ int set_test(bitset_container_t* B) {
 int unset_test(bitset_container_t* B) {
     int x;
     for (x = 0; x < 1 << 16; x += 3) {
-        bitset_container_unset(B, (uint16_t)x);
+        bitset_container_remove(B, (uint16_t)x);
     }
     return 0;
 }
diff --git a/benchmarks/run_container_benchmark.c b/benchmarks/run_container_benchmark.c
index b04170257..9b7256358 100644
--- a/benchmarks/run_container_benchmark.c
+++ b/benchmarks/run_container_benchmark.c
@@ -141,8 +141,8 @@ int main() {
     printf("intersection cardinality = %d \n", answer);
     BEST_TIME(intersection_test(B1, B2, BO), answer, repeat, answer);
     printf("==intersection and union test 2 \n");
-    run_container_clear(B1);
-    run_container_clear(B2);
+    B1->n_runs = 0;
+    B2->n_runs = 0;
     for (int x = 0; x < (1 << 16); x += 64) {
         int length = x % 11;
         for (int y = 0; y < length; ++y)
diff --git a/include/roaring/containers/array.h b/include/roaring/containers/array.h
index 758703569..3a3fe828a 100644
--- a/include/roaring/containers/array.h
+++ b/include/roaring/containers/array.h
@@ -86,10 +86,6 @@ void array_container_copy(const array_container_t *src, array_container_t *dst);
 void array_container_add_from_range(array_container_t *arr, uint32_t min,
                                     uint32_t max, uint16_t step);
 
-/* Set the cardinality to zero (does not release memory). */
-static inline void array_container_clear(array_container_t *array) {
-    array->cardinality = 0;
-}
 
 static inline bool array_container_empty(const array_container_t *array) {
     return array->cardinality == 0;
@@ -448,14 +444,15 @@ static inline void array_container_add_range_nvals(array_container_t *array,
 }
 
 /**
- * Adds all values in range [min,max].
+ * Adds all values in range [min,max]. This function is currently unused
+ * and left as a documentation.
  */
-static inline void array_container_add_range(array_container_t *array,
+/*static inline void array_container_add_range(array_container_t *array,
                                              uint32_t min, uint32_t max) {
     int32_t nvals_greater = count_greater(array->array, array->cardinality, max);
     int32_t nvals_less = count_less(array->array, array->cardinality - nvals_greater, min);
     array_container_add_range_nvals(array, min, max, nvals_less, nvals_greater);
-}
+}*/
 
 /*
  * Removes all elements array[pos] .. array[pos+count-1]
diff --git a/include/roaring/containers/bitset.h b/include/roaring/containers/bitset.h
index 2c9e53061..b8c136499 100644
--- a/include/roaring/containers/bitset.h
+++ b/include/roaring/containers/bitset.h
@@ -77,8 +77,8 @@ static inline void bitset_container_set(bitset_container_t *bitset,
     bitset->words[offset] = load;
 }
 
-/* Unset the ith bit.  */
-static inline void bitset_container_unset(bitset_container_t *bitset,
+/* Unset the ith bit. Currently unused. Could be used for optimization. */
+/*static inline void bitset_container_unset(bitset_container_t *bitset,
                                           uint16_t pos) {
     uint64_t shift = 6;
     uint64_t offset;
@@ -87,7 +87,7 @@ static inline void bitset_container_unset(bitset_container_t *bitset,
     uint64_t load = bitset->words[offset];
     ASM_CLEAR_BIT_DEC_WAS_SET(load, p, bitset->cardinality);
     bitset->words[offset] = load;
-}
+}*/
 
 /* Add `pos' to `bitset'. Returns true if `pos' was not present. Might be slower
  * than bitset_container_set.  */
@@ -142,15 +142,15 @@ static inline void bitset_container_set(bitset_container_t *bitset,
     bitset->words[pos >> 6] = new_word;
 }
 
-/* Unset the ith bit.  */
-static inline void bitset_container_unset(bitset_container_t *bitset,
+/* Unset the ith bit. Currently unused.  */
+/*static inline void bitset_container_unset(bitset_container_t *bitset,
                                           uint16_t pos) {
     const uint64_t old_word = bitset->words[pos >> 6];
     const int index = pos & 63;
     const uint64_t new_word = old_word & (~(UINT64_C(1) << index));
     bitset->cardinality -= (uint32_t)((old_word ^ new_word) >> index);
     bitset->words[pos >> 6] = new_word;
-}
+}*/
 
 /* Add `pos' to `bitset'. Returns true if `pos' was not present. Might be slower
  * than bitset_container_set.  */
@@ -254,19 +254,7 @@ void bitset_container_add_from_range(bitset_container_t *bitset, uint32_t min,
  * bitset->cardinality =  bitset_container_compute_cardinality(bitset).*/
 int bitset_container_compute_cardinality(const bitset_container_t *bitset);
 
-/* Get whether there is at least one bit set  (see bitset_container_empty for the reverse),
-   when the cardinality is unknown, it is computed and stored in the struct */
-static inline bool bitset_container_nonzero_cardinality(
-    bitset_container_t *bitset) {
-    // account for laziness
-    if (bitset->cardinality == BITSET_UNKNOWN_CARDINALITY) {
-        // could bail early instead with a nonzero result
-        bitset->cardinality = bitset_container_compute_cardinality(bitset);
-    }
-    return bitset->cardinality > 0;
-}
-
-/* Check whether this bitset is empty (see bitset_container_nonzero_cardinality for the reverse),
+/* Check whether this bitset is empty,
  *  it never modifies the bitset struct. */
 static inline bool bitset_container_empty(
     const bitset_container_t *bitset) {
diff --git a/include/roaring/containers/containers.h b/include/roaring/containers/containers.h
index ce8f86283..ad78515d1 100644
--- a/include/roaring/containers/containers.h
+++ b/include/roaring/containers/containers.h
@@ -182,7 +182,7 @@ static inline bitset_container_t *container_to_bitset(
  * Get the container name from the typecode
  * (unused at time of writing)
  */
-static inline const char *get_container_name(uint8_t typecode) {
+/*static inline const char *get_container_name(uint8_t typecode) {
     switch (typecode) {
         case BITSET_CONTAINER_TYPE:
             return container_names[0];
@@ -197,7 +197,7 @@ static inline const char *get_container_name(uint8_t typecode) {
             __builtin_unreachable();
             return "unknown";
     }
-}
+}*/
 
 static inline const char *get_full_container_name(
     const container_t *c, uint8_t typecode
diff --git a/include/roaring/containers/run.h b/include/roaring/containers/run.h
index 793fc01d8..4b01d5987 100644
--- a/include/roaring/containers/run.h
+++ b/include/roaring/containers/run.h
@@ -305,11 +305,6 @@ static inline bool run_container_empty(
 /* Copy one container into another. We assume that they are distinct. */
 void run_container_copy(const run_container_t *src, run_container_t *dst);
 
-/* Set the cardinality to zero (does not release memory). */
-static inline void run_container_clear(run_container_t *run) {
-    run->n_runs = 0;
-}
-
 /**
  * Append run described by vl to the run container, possibly merging.
  * It is assumed that the run would be inserted at the end of the container, no
@@ -610,14 +605,15 @@ static inline void run_container_add_range_nruns(run_container_t* run,
 }
 
 /**
- * Add all values in range [min, max]
+ * Add all values in range [min, max]. This function is currently unused
+ * and left as documentation.
  */
-static inline void run_container_add_range(run_container_t* run,
+/*static inline void run_container_add_range(run_container_t* run,
                                            uint32_t min, uint32_t max) {
     int32_t nruns_greater = rle16_count_greater(run->runs, run->n_runs, max);
     int32_t nruns_less = rle16_count_less(run->runs, run->n_runs - nruns_greater, min);
     run_container_add_range_nruns(run, min, max, nruns_less, nruns_greater);
-}
+}*/
 
 /**
  * Shifts last $count elements either left (distance < 0) or right (distance > 0)
diff --git a/include/roaring/isadetection.h b/include/roaring/isadetection.h
index 732903756..69c25a073 100644
--- a/include/roaring/isadetection.h
+++ b/include/roaring/isadetection.h
@@ -76,23 +76,23 @@ enum croaring_instruction_set {
 
 #if defined(__PPC64__)
 
-static inline uint32_t dynamic_croaring_detect_supported_architectures() {
-  return CROARING_ALTIVEC;
-}
+//static inline uint32_t dynamic_croaring_detect_supported_architectures() {
+//  return CROARING_ALTIVEC;
+//}
 
 #elif defined(__arm__) || defined(__aarch64__) // incl. armel, armhf, arm64
 
 #if defined(__ARM_NEON)
 
-static inline uint32_t dynamic_croaring_detect_supported_architectures() {
-  return CROARING_NEON;
-}
+//static inline uint32_t dynamic_croaring_detect_supported_architectures() {
+//  return CROARING_NEON;
+//}
 
 #else // ARM without NEON
 
-static inline uint32_t dynamic_croaring_detect_supported_architectures() {
-  return CROARING_DEFAULT;
-}
+//static inline uint32_t dynamic_croaring_detect_supported_architectures() {
+//  return CROARING_DEFAULT;
+//}
 
 #endif
 
@@ -165,9 +165,9 @@ static inline uint32_t dynamic_croaring_detect_supported_architectures() {
 #else // fallback
 
 
-static inline uint32_t dynamic_croaring_detect_supported_architectures() {
-  return CROARING_DEFAULT;
-}
+//static inline uint32_t dynamic_croaring_detect_supported_architectures() {
+//  return CROARING_DEFAULT;
+//}
 
 
 #endif // end SIMD extension detection code
@@ -220,14 +220,14 @@ static inline bool croaring_avx2() {
 
 #else // defined(__x86_64__) || defined(_M_AMD64) // x64
 
-static inline bool croaring_avx2() {
-  return false;
-}
+//static inline bool croaring_avx2() {
+//  return false;
+//}
 
-static inline uint32_t croaring_detect_supported_architectures() {
-    // no runtime dispatch
-    return dynamic_croaring_detect_supported_architectures();
-}
+//static inline uint32_t croaring_detect_supported_architectures() {
+//    // no runtime dispatch
+//    return dynamic_croaring_detect_supported_architectures();
+//}
 #endif // defined(__x86_64__) || defined(_M_AMD64) // x64
 
 #endif // ROARING_ISADETECTION_H
diff --git a/include/roaring/portability.h b/include/roaring/portability.h
index d16a4daa9..5b0a942ee 100644
--- a/include/roaring/portability.h
+++ b/include/roaring/portability.h
@@ -39,7 +39,11 @@
 #endif // __clang__
 #endif // _MSC_VER
 
-#if !(defined(_POSIX_C_SOURCE)) || (_POSIX_C_SOURCE < 200809L)
+#if defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE < 200809L)
+#undef _POSIX_C_SOURCE
+#endif
+
+#ifndef _POSIX_C_SOURCE
 #define _POSIX_C_SOURCE 200809L
 #endif // !(defined(_POSIX_C_SOURCE)) || (_POSIX_C_SOURCE < 200809L)
 #if !(defined(_XOPEN_SOURCE)) || (_XOPEN_SOURCE < 700)
@@ -226,6 +230,10 @@ inline int __builtin_clzll(unsigned long long input_num) {
 
 #define IS_BIG_ENDIAN (*(uint16_t *)"\0\xff" < 0x100)
 
+#ifdef USENEON
+// we can always compute the popcount fast.
+#elif (defined(_M_ARM) || defined(_M_ARM64)) && (defined(_WIN64) && defined(CROARING_REGULAR_VISUAL_STUDIO) && CROARING_REGULAR_VISUAL_STUDIO)
+// we will need this function:
 static inline int hammingbackup(uint64_t x) {
   uint64_t c1 = UINT64_C(0x5555555555555555);
   uint64_t c2 = UINT64_C(0x3333333333333333);
@@ -235,10 +243,14 @@ static inline int hammingbackup(uint64_t x) {
   x *= UINT64_C(0x0101010101010101);
   return x >> 56;
 }
+#endif
+
 
 static inline int hamming(uint64_t x) {
 #if defined(_WIN64) && defined(CROARING_REGULAR_VISUAL_STUDIO) && CROARING_REGULAR_VISUAL_STUDIO
-#ifdef _M_ARM64
+#ifdef USENEON
+   return vaddv_u8(vcnt_u8(vcreate_u8(input_num)));
+#elif defined(_M_ARM64)
   return hammingbackup(x);
   // (int) _CountOneBits64(x); is unavailable
 #else  // _M_ARM64
diff --git a/src/containers/array.c b/src/containers/array.c
index dd9632062..b4adc2de9 100644
--- a/src/containers/array.c
+++ b/src/containers/array.c
@@ -23,7 +23,6 @@ extern inline bool array_container_contains(const array_container_t *arr,
                                             uint16_t pos);
 extern inline int array_container_cardinality(const array_container_t *array);
 extern inline bool array_container_nonzero_cardinality(const array_container_t *array);
-extern inline void array_container_clear(array_container_t *array);
 extern inline int32_t array_container_serialized_size_in_bytes(int32_t card);
 extern inline bool array_container_empty(const array_container_t *array);
 extern inline bool array_container_full(const array_container_t *array);
diff --git a/src/containers/bitset.c b/src/containers/bitset.c
index c03d32a9f..31421846a 100644
--- a/src/containers/bitset.c
+++ b/src/containers/bitset.c
@@ -21,9 +21,9 @@ extern "C" { namespace roaring { namespace internal {
 #endif
 
 extern inline int bitset_container_cardinality(const bitset_container_t *bitset);
-extern inline bool bitset_container_nonzero_cardinality(bitset_container_t *bitset);
 extern inline void bitset_container_set(bitset_container_t *bitset, uint16_t pos);
-extern inline void bitset_container_unset(bitset_container_t *bitset, uint16_t pos);
+// unused at this time:
+//extern inline void bitset_container_unset(bitset_container_t *bitset, uint16_t pos);
 extern inline bool bitset_container_get(const bitset_container_t *bitset,
                                         uint16_t pos);
 extern inline int32_t bitset_container_serialized_size_in_bytes(void);
diff --git a/src/containers/run.c b/src/containers/run.c
index 6c14eef6a..ee8a4bcf3 100644
--- a/src/containers/run.c
+++ b/src/containers/run.c
@@ -18,7 +18,6 @@ extern inline bool run_container_contains(const run_container_t *run,
 extern inline int run_container_index_equalorlarger(const run_container_t *arr, uint16_t x);
 extern inline bool run_container_is_full(const run_container_t *run);
 extern inline bool run_container_nonzero_cardinality(const run_container_t *rc);
-extern inline void run_container_clear(run_container_t *run);
 extern inline int32_t run_container_serialized_size_in_bytes(int32_t num_runs);
 extern inline run_container_t *run_container_create_range(uint32_t start,
                                                    uint32_t stop);
diff --git a/tests/bitset_container_unit.c b/tests/bitset_container_unit.c
index 7de6f6826..64d752d5b 100644
--- a/tests/bitset_container_unit.c
+++ b/tests/bitset_container_unit.c
@@ -79,7 +79,7 @@ DEFINE_TEST(set_get_test) {
                      (1 << 16) / 3 + 1);
 
     for (size_t x = 0; x < 1 << 16; x += 3) {
-        bitset_container_unset(B, x);
+        bitset_container_remove(B, x);
     }
 
     assert_int_equal(bitset_container_cardinality(B), 0);
@@ -154,7 +154,7 @@ DEFINE_TEST(xor_test) {
     }
 
     for (size_t x = 0; x < (1 << 16); x += 62 * 3) {
-        bitset_container_unset(BI, x);
+        bitset_container_remove(BI, x);
     }
 
     bitset_container_xor(B1, B2, TMP);
@@ -185,7 +185,7 @@ DEFINE_TEST(andnot_test) {
     // important: 62 is not divisible by 3
     for (size_t x = 0; x < (1 << 16); x += 62) {
         bitset_container_set(B2, x);
-        bitset_container_unset(BI, x);
+        bitset_container_remove(BI, x);
     }
 
     const int expected = bitset_container_compute_cardinality(BI);
diff --git a/tests/run_container_unit.c b/tests/run_container_unit.c
index dbf08f253..94adf6d88 100644
--- a/tests/run_container_unit.c
+++ b/tests/run_container_unit.c
@@ -171,11 +171,18 @@ DEFINE_TEST(select_test) {
     run_container_free(B);
 }
 
+static inline void _run_container_add_range(run_container_t* run,
+                                           uint32_t min, uint32_t max) {
+    int32_t nruns_greater = rle16_count_greater(run->runs, run->n_runs, max);
+    int32_t nruns_less = rle16_count_less(run->runs, run->n_runs - nruns_greater, min);
+    run_container_add_range_nruns(run, min, max, nruns_less, nruns_greater);
+}
+
 DEFINE_TEST(remove_range_test) {
     run_container_t* run = run_container_create();
-    run_container_add_range(run, 100, 150);
-    run_container_add_range(run, 200, 250);
-    run_container_add_range(run, 300, 350);
+    _run_container_add_range(run, 100, 150);
+    _run_container_add_range(run, 200, 250);
+    _run_container_add_range(run, 300, 350);
 
     // act on left-most run
     run_container_remove_range(run, 100, 110);

From 6fee997d9ed94dab3f75ca78d3fbade22415a240 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Fri, 6 Jan 2023 03:25:59 +0000
Subject: [PATCH 042/162] Minor fixes

---
 tests/cpp_random_unit.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/cpp_random_unit.cpp b/tests/cpp_random_unit.cpp
index a9da75ff7..fc49c3239 100644
--- a/tests/cpp_random_unit.cpp
+++ b/tests/cpp_random_unit.cpp
@@ -131,7 +131,7 @@ Roaring64Map make_random_bitset64() {
                 uint64_t card = r.cardinality();
                 if (card != 0) {
                     uint64_t rnk = rand() % card;
-                    uint64_t element;
+                    uint64_t element = 0;
                     assert_true(r.select(rnk, &element));
                     assert_int_equal(rnk + 1, r.rank(element));
                     r.remove(rnk);
@@ -424,7 +424,7 @@ DEFINE_TEST(random_doublecheck_test_64) {
                 uint64_t card = out.cardinality();
                 if (card != 0) {  // pick gravity point inside set somewhere
                     uint64_t rnk = rand() % card;
-                    uint64_t element;
+                    uint64_t element = 0;
                     assert_true(out.select(rnk, &element));
                     assert_int_equal(rnk + 1, out.rank(element));
                     gravity64 = element;

From 4fc13f1e54f0ae8e0bcd6bde5225c2bb3de61111 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Thu, 5 Jan 2023 22:26:17 -0500
Subject: [PATCH 043/162] Minor fix

---
 include/roaring/misc/configreport.h | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/include/roaring/misc/configreport.h b/include/roaring/misc/configreport.h
index 7e3c3c1d6..05b214505 100644
--- a/include/roaring/misc/configreport.h
+++ b/include/roaring/misc/configreport.h
@@ -177,14 +177,6 @@ static inline void tellmeall() {
 #ifdef __VERSION__
     printf(" compiler version: %s\t", __VERSION__);
 #endif
-    uint32_t config =  croaring_detect_supported_architectures();
-    if((config & CROARING_NEON) == CROARING_NEON) {
-        printf(" NEON detected\t");
-    }
-    if((config & CROARING_ALTIVEC) == CROARING_ALTIVEC) {
-        printf("Altivec detected\n");
-    }
-
     if ((sizeof(int) != 4) || (sizeof(long) != 8)) {
         printf("number of bytes: int = %lu long = %lu \n",
                (long unsigned int)sizeof(size_t),

From 79efb65e364102963f63354b440620a741648fd5 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Thu, 5 Jan 2023 22:39:05 -0500
Subject: [PATCH 044/162] Please don't rely on asserts in tests !!!

---
 tests/add_offset.c            | 16 +++----
 tests/c_example1.c            | 21 +++++----
 tests/cpp_example2.cpp        | 11 +++--
 tests/mixed_container_unit.c  |  4 +-
 tests/realdata_unit.c         | 41 +++++++++--------
 tests/roaring64map_checked.hh | 50 ++++++++++----------
 tests/roaring_checked.hh      | 86 +++++++++++++++++------------------
 7 files changed, 117 insertions(+), 112 deletions(-)

diff --git a/tests/add_offset.c b/tests/add_offset.c
index 211b3ae50..edd515d97 100644
--- a/tests/add_offset.c
+++ b/tests/add_offset.c
@@ -41,7 +41,7 @@ static int setup_container_add_offset_test(void **state_) {
     switch (test.type) {
     case BITSET_CONTAINER_TYPE:
         bc = bitset_container_create();
-        assert(bc != NULL);
+        assert_true(bc != NULL);
         for (size_t i = 0; i < test.n_values; i++) {
             bitset_container_add(bc, test.values[i]);
         }
@@ -49,7 +49,7 @@ static int setup_container_add_offset_test(void **state_) {
         break;
     case ARRAY_CONTAINER_TYPE:
         ac = array_container_create();
-        assert(ac != NULL);
+        assert_true(ac != NULL);
         for (size_t i = 0; i < test.n_values; i++) {
             array_container_add(ac, test.values[i]);
         }
@@ -57,14 +57,14 @@ static int setup_container_add_offset_test(void **state_) {
         break;
     case RUN_CONTAINER_TYPE:
         rc = run_container_create();
-        assert(rc != NULL);
+        assert_true(rc != NULL);
         for (size_t i = 0; i < test.n_values; i++) {
             run_container_add(rc, test.values[i]);
         }
         state->in = rc;
         break;
     default:
-        assert(false); // To catch buggy tests.
+        assert_true(false); // To catch buggy tests.
     }
 
     return 0;
@@ -104,7 +104,7 @@ static void container_add_offset_test(void **state_) {
     uint8_t type = test.type;
     int card_lo = 0, card_hi = 0;
 
-    assert(test.n_values > 0);
+    assert_true(test.n_values > 0);
 
     container_add_offset(state->in, type, &state->lo, &state->hi, offset);
     container_add_offset(state->in, type, NULL, &state->hi_only, offset);
@@ -158,7 +158,7 @@ static int setup_roaring_add_offset_test(void **state_) {
     roaring_add_offset_test_case_t test = state->test_case;
 
     state->in = roaring_bitmap_of_ptr(test.n_values, test.values);
-    assert(state->in != NULL);
+    assert_true(state->in != NULL);
 
     return 0;
 }
@@ -314,7 +314,7 @@ int main() {
     dense_bitmap[i++] = 400000;
     dense_bitmap[i++] = 1400000;
 
-    assert(i == ARRAY_SIZE(dense_bitmap));
+    assert_true(i == ARRAY_SIZE(dense_bitmap));
 
     // NB: only add positive offsets, the test function takes care of also
     // running a negative test for that offset.
@@ -338,7 +338,7 @@ int main() {
         roaring_add_offset_test_state_t state = ROARING_ADD_OFFSET_TEST_CASE(dense_bitmap, offset);
         roaring_state[i++] = state;
     }
-    assert(i <= ARRAY_SIZE(roaring_state));
+    assert_true(i <= ARRAY_SIZE(roaring_state));
 
     i = j = 0;
     struct CMUnitTest tests[ARRAY_SIZE(container_state)+ARRAY_SIZE(roaring_state)];
diff --git a/tests/c_example1.c b/tests/c_example1.c
index 833eeb9ce..a0d3480af 100644
--- a/tests/c_example1.c
+++ b/tests/c_example1.c
@@ -2,6 +2,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <assert.h>
+#include "test.h"
 
 bool roaring_iterator_sumall(uint32_t value, void *param) {
     *(uint32_t *)param += value;
@@ -14,7 +15,7 @@ int main() {
     // then we can add values
     for (uint32_t i = 100; i < 1000; i++) roaring_bitmap_add(r1, i);
     // check whether a value is contained
-    assert(roaring_bitmap_contains(r1, 500));
+    assert_true(roaring_bitmap_contains(r1, 500));
     // compute how many bits there are:
     uint32_t cardinality = roaring_bitmap_get_cardinality(r1);
     printf("Cardinality = %d \n", cardinality);
@@ -38,24 +39,24 @@ int main() {
     // we can also go in reverse and go from arrays to bitmaps
     uint64_t card1 = roaring_bitmap_get_cardinality(r1);
     uint32_t *arr1 = (uint32_t *)malloc(card1 * sizeof(uint32_t));
-    assert(arr1 != NULL);
+    assert_true(arr1 != NULL);
     roaring_bitmap_to_uint32_array(r1, arr1);
     roaring_bitmap_t *r1f = roaring_bitmap_of_ptr(card1, arr1);
     free(arr1);
-    assert(roaring_bitmap_equals(r1, r1f));  // what we recover is equal
+    assert_true(roaring_bitmap_equals(r1, r1f));  // what we recover is equal
     roaring_bitmap_free(r1f);
 
     // we can go from arrays to bitmaps from "offset" by "limit"
     size_t offset = 100;
     size_t limit = 1000;
     uint32_t *arr3 = (uint32_t *)malloc(limit * sizeof(uint32_t));
-    assert(arr3 != NULL);
+    assert_true(arr3 != NULL);
     roaring_bitmap_range_uint32_array(r1, offset, limit, arr3);
     free(arr3);
 
     // we can copy and compare bitmaps
     roaring_bitmap_t *z = roaring_bitmap_copy(r3);
-    assert(roaring_bitmap_equals(r3, z));  // what we recover is equal
+    assert_true(roaring_bitmap_equals(r3, z));  // what we recover is equal
     roaring_bitmap_free(z);
 
     // we can compute union two-by-two
@@ -65,12 +66,12 @@ int main() {
     // we can compute a big union
     const roaring_bitmap_t *allmybitmaps[] = {r1, r2, r3};
     roaring_bitmap_t *bigunion = roaring_bitmap_or_many(3, allmybitmaps);
-    assert(
+    assert_true(
         roaring_bitmap_equals(r1_2_3, bigunion));  // what we recover is equal
     // can also do the big union with a heap
     roaring_bitmap_t *bigunionheap =
         roaring_bitmap_or_many_heap(3, allmybitmaps);
-    assert(roaring_bitmap_equals(r1_2_3, bigunionheap));
+    assert_true(roaring_bitmap_equals(r1_2_3, bigunionheap));
 
     roaring_bitmap_free(r1_2_3);
     roaring_bitmap_free(bigunion);
@@ -85,18 +86,18 @@ int main() {
     char *serializedbytes = (char*)malloc(expectedsize);
     roaring_bitmap_portable_serialize(r1, serializedbytes);
     roaring_bitmap_t *t = roaring_bitmap_portable_deserialize(serializedbytes);
-    assert(roaring_bitmap_equals(r1, t));  // what we recover is equal
+    assert_true(roaring_bitmap_equals(r1, t));  // what we recover is equal
     roaring_bitmap_free(t);
     // we can also check whether there is a bitmap at a memory location without
     // reading it
     size_t sizeofbitmap =
         roaring_bitmap_portable_deserialize_size(serializedbytes, expectedsize);
     printf("sizeofbitmap = %zu \n", sizeofbitmap);
-    assert(sizeofbitmap ==
+    assert_true(sizeofbitmap ==
            expectedsize);  // sizeofbitmap would be zero if no bitmap were found
     // we can also read the bitmap "safely" by specifying a byte size limit:
     t = roaring_bitmap_portable_deserialize_safe(serializedbytes, expectedsize);
-    assert(roaring_bitmap_equals(r1, t));  // what we recover is equal
+    assert_true(roaring_bitmap_equals(r1, t));  // what we recover is equal
     roaring_bitmap_free(t);
 
     free(serializedbytes);
diff --git a/tests/cpp_example2.cpp b/tests/cpp_example2.cpp
index 9005712d0..bea031345 100644
--- a/tests/cpp_example2.cpp
+++ b/tests/cpp_example2.cpp
@@ -1,6 +1,7 @@
 #include <iostream>
 
 #include "roaring.hh"
+#include "test.h"
 
 using namespace roaring;
 
@@ -11,7 +12,7 @@ int main() {
     }
 
     // check whether a value is contained
-    assert(r1.contains(500));
+    assert_true(r1.contains(500));
 
     // compute how many bits there are:
     uint32_t cardinality = r1.cardinality();
@@ -47,11 +48,11 @@ int main() {
     delete[] arr1;
 
     // bitmaps shall be equal
-    assert(r1 == r1f);
+    assert_true(r1 == r1f);
 
     // we can copy and compare bitmaps
     Roaring z(r3);
-    assert(r3 == z);
+    assert_true(r3 == z);
 
     // we can compute union two-by-two
     Roaring r1_2_3 = r1 | r2;
@@ -60,7 +61,7 @@ int main() {
     // we can compute a big union
     const Roaring *allmybitmaps[] = {&r1, &r2, &r3};
     Roaring bigunion = Roaring::fastunion(3, allmybitmaps);
-    assert(r1_2_3 == bigunion);
+    assert_true(r1_2_3 == bigunion);
 
     // we can compute intersection two-by-two
     Roaring i1_2 = r1 & r2;
@@ -70,7 +71,7 @@ int main() {
     char *serializedbytes = new char[expectedsize];
     r1.write(serializedbytes);
     Roaring t = Roaring::read(serializedbytes);
-    assert(r1 == t);
+    assert_true(r1 == t);
     delete[] serializedbytes;
 
     // we can iterate over all values using custom functions
diff --git a/tests/mixed_container_unit.c b/tests/mixed_container_unit.c
index c55a59dc3..72964eba7 100644
--- a/tests/mixed_container_unit.c
+++ b/tests/mixed_container_unit.c
@@ -1588,7 +1588,7 @@ static int run_negation_range_tests(int k, int h, int start_offset, int r_start,
     int result_size_should_be;
     bool result_should_be[1 << 16];
 
-    assert(h < k);  // bad test call otherwise..not failure of code under test
+    assert_true(h < k);  // bad test call otherwise..not failure of code under test
 
     int runlen = h;
     for (int x = 0; x < (1 << 16) - start_offset; x++) {
@@ -1669,7 +1669,7 @@ static int run_negation_range_tests_simpler(int k, int h, int start_offset,
     int result_size_should_be;
     bool result_should_be[1 << 16];
 
-    assert(h < k);
+    assert_true(h < k);
 
     int runlen = h;
     for (int x = 0; x < (1 << 16) - start_offset; x++) {
diff --git a/tests/realdata_unit.c b/tests/realdata_unit.c
index 5603f5206..1d37a3d18 100644
--- a/tests/realdata_unit.c
+++ b/tests/realdata_unit.c
@@ -16,6 +16,7 @@
 
 #include "../benchmarks/numbersfromtextfiles.h"
 #include "config.h"
+#include "test.h"
 
 /**
  * Once you have collected all the integers, build the bitmaps.
@@ -645,17 +646,17 @@ bool compare_wide_unions(roaring_bitmap_t **rnorun, roaring_bitmap_t **rruns,
         printf("[compare_wide_unions] Unions don't agree! (fast run-norun) \n");
         return false;
     }
-    assert(roaring_bitmap_equals(tempornorun, temporruns));
+    assert_true(roaring_bitmap_equals(tempornorun, temporruns));
 
     roaring_bitmap_t *tempornorunheap =
         roaring_bitmap_or_many_heap(count, (const roaring_bitmap_t **)rnorun);
     roaring_bitmap_t *temporrunsheap =
         roaring_bitmap_or_many_heap(count, (const roaring_bitmap_t **)rruns);
-    // assert(slow_bitmap_equals(tempornorun, tempornorunheap));
-    // assert(slow_bitmap_equals(temporruns,temporrunsheap));
+    // assert_true(slow_bitmap_equals(tempornorun, tempornorunheap));
+    // assert_true(slow_bitmap_equals(temporruns,temporrunsheap));
 
-    assert(roaring_bitmap_equals(tempornorun, tempornorunheap));
-    assert(roaring_bitmap_equals(temporruns, temporrunsheap));
+    assert_true(roaring_bitmap_equals(tempornorun, tempornorunheap));
+    assert_true(roaring_bitmap_equals(temporruns, temporrunsheap));
     roaring_bitmap_free(tempornorunheap);
     roaring_bitmap_free(temporrunsheap);
 
@@ -665,24 +666,24 @@ bool compare_wide_unions(roaring_bitmap_t **rnorun, roaring_bitmap_t **rruns,
         longtempornorun = rnorun[0];
         longtemporruns = rruns[0];
     } else {
-        assert(roaring_bitmap_equals(rnorun[0], rruns[0]));
-        assert(roaring_bitmap_equals(rnorun[1], rruns[1]));
+        assert_true(roaring_bitmap_equals(rnorun[0], rruns[0]));
+        assert_true(roaring_bitmap_equals(rnorun[1], rruns[1]));
         longtempornorun = roaring_bitmap_or(rnorun[0], rnorun[1]);
         longtemporruns = roaring_bitmap_or(rruns[0], rruns[1]);
-        assert(roaring_bitmap_equals(longtempornorun, longtemporruns));
+        assert_true(roaring_bitmap_equals(longtempornorun, longtemporruns));
         for (int i = 2; i < (int)count; ++i) {
-            assert(roaring_bitmap_equals(rnorun[i], rruns[i]));
-            assert(roaring_bitmap_equals(longtempornorun, longtemporruns));
+            assert_true(roaring_bitmap_equals(rnorun[i], rruns[i]));
+            assert_true(roaring_bitmap_equals(longtempornorun, longtemporruns));
 
             roaring_bitmap_t *t1 =
                 roaring_bitmap_or(rnorun[i], longtempornorun);
             roaring_bitmap_t *t2 = roaring_bitmap_or(rruns[i], longtemporruns);
-            assert(roaring_bitmap_equals(t1, t2));
+            assert_true(roaring_bitmap_equals(t1, t2));
             roaring_bitmap_free(longtempornorun);
             longtempornorun = t1;
             roaring_bitmap_free(longtemporruns);
             longtemporruns = t2;
-            assert(roaring_bitmap_equals(longtempornorun, longtemporruns));
+            assert_true(roaring_bitmap_equals(longtempornorun, longtemporruns));
         }
     }
     if (!slow_bitmap_equals(longtempornorun, tempornorun)) {
@@ -712,7 +713,7 @@ bool compare_wide_xors(roaring_bitmap_t **rnorun, roaring_bitmap_t **rruns,
         printf("[compare_wide_xors] Xors don't agree! (fast run-norun) \n");
         return false;
     }
-    assert(roaring_bitmap_equals(tempornorun, temporruns));
+    assert_true(roaring_bitmap_equals(tempornorun, temporruns));
 
     roaring_bitmap_t *longtempornorun;
     roaring_bitmap_t *longtemporruns;
@@ -720,24 +721,24 @@ bool compare_wide_xors(roaring_bitmap_t **rnorun, roaring_bitmap_t **rruns,
         longtempornorun = rnorun[0];
         longtemporruns = rruns[0];
     } else {
-        assert(roaring_bitmap_equals(rnorun[0], rruns[0]));
-        assert(roaring_bitmap_equals(rnorun[1], rruns[1]));
+        assert_true(roaring_bitmap_equals(rnorun[0], rruns[0]));
+        assert_true(roaring_bitmap_equals(rnorun[1], rruns[1]));
         longtempornorun = roaring_bitmap_xor(rnorun[0], rnorun[1]);
         longtemporruns = roaring_bitmap_xor(rruns[0], rruns[1]);
-        assert(roaring_bitmap_equals(longtempornorun, longtemporruns));
+        assert_true(roaring_bitmap_equals(longtempornorun, longtemporruns));
         for (int i = 2; i < (int)count; ++i) {
-            assert(roaring_bitmap_equals(rnorun[i], rruns[i]));
-            assert(roaring_bitmap_equals(longtempornorun, longtemporruns));
+            assert_true(roaring_bitmap_equals(rnorun[i], rruns[i]));
+            assert_true(roaring_bitmap_equals(longtempornorun, longtemporruns));
 
             roaring_bitmap_t *t1 =
                 roaring_bitmap_xor(rnorun[i], longtempornorun);
             roaring_bitmap_t *t2 = roaring_bitmap_xor(rruns[i], longtemporruns);
-            assert(roaring_bitmap_equals(t1, t2));
+            assert_true(roaring_bitmap_equals(t1, t2));
             roaring_bitmap_free(longtempornorun);
             longtempornorun = t1;
             roaring_bitmap_free(longtemporruns);
             longtemporruns = t2;
-            assert(roaring_bitmap_equals(longtempornorun, longtemporruns));
+            assert_true(roaring_bitmap_equals(longtempornorun, longtemporruns));
         }
     }
     if (!slow_bitmap_equals(longtempornorun, tempornorun)) {
diff --git a/tests/roaring64map_checked.hh b/tests/roaring64map_checked.hh
index d31276fa9..d195b205a 100644
--- a/tests/roaring64map_checked.hh
+++ b/tests/roaring64map_checked.hh
@@ -35,7 +35,9 @@
 #include <string>
 
 #include <set>  // sorted set, typically a red-black tree implementation
-#include <assert.h>
+
+#include "test.h"
+
 
 #define ROARING_CPP_NAMESPACE unchecked  // can't be overridden if global
 #include "roaring64map.hh"  // contains Roaring64Map unchecked class
@@ -104,14 +106,14 @@ class Roaring64Map {
     bool addChecked(uint32_t x) {
         bool ans = plain.addChecked(x);
         bool was_in_set = check.insert(x).second;  // insert -> pair<iter,bool>
-        assert(ans == was_in_set);
+        assert_true(ans == was_in_set);
         (void)was_in_set;  // unused besides assert
         return ans;
     }
     bool addChecked(uint64_t x) {
         bool ans = plain.addChecked(x);
         bool was_in_set = check.insert(x).second;  // insert -> pair<iter,bool>
-        assert(ans == was_in_set);
+        assert_true(ans == was_in_set);
         (void)was_in_set;  // unused besides assert
         return ans;
     }
@@ -161,14 +163,14 @@ class Roaring64Map {
     bool removeChecked(uint32_t x) {
         bool ans = plain.removeChecked(x);
         size_t num_removed = check.erase(x);
-        assert(ans == (num_removed == 1));
+        assert_true(ans == (num_removed == 1));
         (void)num_removed;  // unused besides assert
         return ans;
     }
     bool removeChecked(uint64_t x) {
         bool ans = plain.removeChecked(x);
         size_t num_removed = check.erase(x);
-        assert(ans == (num_removed == 1));
+        assert_true(ans == (num_removed == 1));
         (void)num_removed;  // unused besides assert
         return ans;
     }
@@ -211,13 +213,13 @@ class Roaring64Map {
 
     uint64_t maximum() const {
         uint64_t ans = plain.maximum();
-        assert(check.empty() ? ans == 0 : ans == *check.rbegin());
+        assert_true(check.empty() ? ans == 0 : ans == *check.rbegin());
         return ans;
     }
 
     uint64_t minimum() const {
         uint64_t ans = plain.minimum();
-        assert(check.empty()
+        assert_true(check.empty()
             ? ans == (std::numeric_limits<uint64_t>::max)()
             : ans == *check.begin());
         return ans;
@@ -225,12 +227,12 @@ class Roaring64Map {
 
     bool contains(uint32_t x) const {
         bool ans = plain.contains(x);
-        assert(ans == (check.find(x) != check.end()));
+        assert_true(ans == (check.find(x) != check.end()));
         return ans;
     }
     bool contains(uint64_t x) const {
         bool ans = plain.contains(x);
-        assert(ans == (check.find(x) != check.end()));
+        assert_true(ans == (check.find(x) != check.end()));
         return ans;
     }
 
@@ -253,7 +255,7 @@ class Roaring64Map {
     }
 
     ~Roaring64Map() {
-        assert(does_std_set_match_roaring());  // always check on destructor
+        assert_true(does_std_set_match_roaring());  // always check on destructor
     }
 
     Roaring64Map &operator=(const Roaring64Map &r) {
@@ -331,19 +333,19 @@ class Roaring64Map {
 
     uint64_t cardinality() const {
         uint64_t ans = plain.cardinality();
-        assert(ans == check.size());
+        assert_true(ans == check.size());
         return ans;
     }
 
     bool isEmpty() const {
         bool ans = plain.isEmpty();
-        assert(ans == check.empty());
+        assert_true(ans == check.empty());
         return ans;
     }
 
     bool isSubset(const Roaring64Map &r) const {  // is `this` subset of `r`?
         bool ans = plain.isSubset(r.plain);
-        assert(ans == std::includes(
+        assert_true(ans == std::includes(
             r.check.begin(), r.check.end(),  // containing range
             check.begin(), check.end()  // range to test for containment
         ));
@@ -352,7 +354,7 @@ class Roaring64Map {
 
     bool isStrictSubset(const Roaring64Map &r) const {  // is `this` subset of `r`?
         bool ans = plain.isStrictSubset(r.plain);
-        assert(ans == (std::includes(
+        assert_true(ans == (std::includes(
             r.check.begin(), r.check.end(),  // containing range
             check.begin(), check.end()  // range to test for containment
         ) && r.check.size() > check.size()));
@@ -366,7 +368,7 @@ class Roaring64Map {
 
     bool operator==(const Roaring64Map &r) const {
         bool ans = (plain == r.plain);
-        assert(ans == (check == r.check));
+        assert_true(ans == (check == r.check));
         return ans;
     }
 
@@ -399,7 +401,7 @@ class Roaring64Map {
 
     void iterate(roaring::api::roaring_iterator64 iterator, void *ptr) const {
         plain.iterate(iterator, ptr);
-        assert(does_std_set_match_roaring());  // checks equivalent iteration
+        assert_true(does_std_set_match_roaring());  // checks equivalent iteration
     }
 
     bool select(uint64_t rnk, uint64_t *element) const {
@@ -409,7 +411,7 @@ class Roaring64Map {
         auto it_end = check.end();
         for (uint64_t i = 0; it != it_end && i < rnk; ++i)
             ++it;
-        assert(ans == (it != it_end) && (ans ? *it == *element : true));
+        assert_true(ans == (it != it_end) && (ans ? *it == *element : true));
 
         return ans;
     }
@@ -422,7 +424,7 @@ class Roaring64Map {
         auto it_end = check.end();
         for (; it != it_end && *it <= x; ++it)
             ++count;
-        assert(ans == count);
+        assert_true(ans == count);
 
         return ans;
     }
@@ -449,7 +451,7 @@ class Roaring64Map {
         Roaring64Map ans(plain & o.plain);
 
         Roaring64Map inplace(*this);
-        assert(ans == (inplace &= o));  // validate against in-place version
+        assert_true(ans == (inplace &= o));  // validate against in-place version
 
         return ans;
     }
@@ -458,7 +460,7 @@ class Roaring64Map {
         Roaring64Map ans(plain - o.plain);
 
         Roaring64Map inplace(*this);
-        assert(ans == (inplace -= o));  // validate against in-place version
+        assert_true(ans == (inplace -= o));  // validate against in-place version
 
         return ans;
     }
@@ -467,7 +469,7 @@ class Roaring64Map {
         Roaring64Map ans(plain | o.plain);
 
         Roaring64Map inplace(*this);
-        assert(ans == (inplace |= o));  // validate against in-place version
+        assert_true(ans == (inplace |= o));  // validate against in-place version
 
         return ans;
     }
@@ -476,7 +478,7 @@ class Roaring64Map {
         Roaring64Map ans(plain ^ o.plain);
 
         Roaring64Map inplace(*this);
-        assert(ans == (inplace ^= o));  // validate against in-place version
+        assert_true(ans == (inplace ^= o));  // validate against in-place version
 
         return ans;
     }
@@ -505,12 +507,12 @@ class Roaring64Map {
         delete[] plain_inputs;
 
         if (n == 0)
-            assert(ans.cardinality() == 0);
+            assert_true(ans.cardinality() == 0);
         else {
             Roaring64Map temp = *inputs[0];
             for (size_t i = 1; i < n; ++i)
                 temp |= *inputs[i];
-            assert(temp == ans);
+            assert_true(temp == ans);
         }
 
         return ans;
diff --git a/tests/roaring_checked.hh b/tests/roaring_checked.hh
index b06320199..2eb7a2e5c 100644
--- a/tests/roaring_checked.hh
+++ b/tests/roaring_checked.hh
@@ -42,7 +42,7 @@
 #include <string>
 
 #include <set>  // sorted set, typically a red-black tree implementation
-#include <assert.h>
+#include "test.h"
 
 #define ROARING_CPP_NAMESPACE unchecked  // can't be overridden if global
 #include "roaring.hh"  // contains Roaring unchecked class
@@ -112,7 +112,7 @@ class Roaring {
     bool addChecked(uint32_t x) {
         bool ans = plain.addChecked(x);
         bool was_in_set = check.insert(x).second;  // insert -> pair<iter,bool>
-        assert(ans == was_in_set);
+        assert_true(ans == was_in_set);
         (void)was_in_set;  // unused besides assert
         return ans;
     }
@@ -145,7 +145,7 @@ class Roaring {
     bool removeChecked(uint32_t x) {
         bool ans = plain.removeChecked(x);
         size_t num_removed = check.erase(x);
-        assert(ans == (num_removed == 1));
+        assert_true(ans == (num_removed == 1));
         (void)num_removed;  // unused besides assert
         return ans;
     }
@@ -165,19 +165,19 @@ class Roaring {
 
     uint32_t maximum() const {
         uint32_t ans = plain.maximum();
-        assert(check.empty() ? ans == 0 : ans == *check.rbegin());
+        assert_true(check.empty() ? ans == 0 : ans == *check.rbegin());
         return ans;
     }
 
     uint32_t minimum() const {
         uint32_t ans = plain.minimum();
-        assert(check.empty() ? ans == UINT32_MAX : ans == *check.begin());
+        assert_true(check.empty() ? ans == UINT32_MAX : ans == *check.begin());
         return ans;
     }
 
     bool contains(uint32_t x) const {
         bool ans = plain.contains(x);
-        assert(ans == (check.find(x) != check.end()));
+        assert_true(ans == (check.find(x) != check.end()));
         return ans;
     }
 
@@ -186,14 +186,14 @@ class Roaring {
 
         auto it = check.find(x);
         if (x >= y)
-            assert(ans == true);  // roaring says true for this
+            assert_true(ans == true);  // roaring says true for this
         else if (it == check.end())
-            assert(ans == false);  // start of range not in set
+            assert_true(ans == false);  // start of range not in set
         else {
             uint64_t last = x;  // iterate up to y so long as values sequential
             while (++it != check.end() && last + 1 == *it && *it < y)
                 last = *it;
-            assert(ans == (last == y - 1));
+            assert_true(ans == (last == y - 1));
         }
 
         return ans;
@@ -217,7 +217,7 @@ class Roaring {
     }
 
     ~Roaring() {
-        assert(does_std_set_match_roaring());  // always check on destructor
+        assert_true(does_std_set_match_roaring());  // always check on destructor
     }
 
     Roaring &operator=(const Roaring &r) {
@@ -295,19 +295,19 @@ class Roaring {
 
     uint64_t cardinality() const {
         uint64_t ans = plain.cardinality();
-        assert(ans == check.size());
+        assert_true(ans == check.size());
         return ans;
     }
 
     bool isEmpty() const {
         bool ans = plain.isEmpty();
-        assert(ans == check.empty());
+        assert_true(ans == check.empty());
         return ans;
     }
 
     bool isSubset(const Roaring &r) const {  // is `this` subset of `r`?
         bool ans = plain.isSubset(r.plain);
-        assert(ans == std::includes(
+        assert_true(ans == std::includes(
             r.check.begin(), r.check.end(),  // containing range
             check.begin(), check.end()  // range to test for containment
         ));
@@ -316,7 +316,7 @@ class Roaring {
 
     bool isStrictSubset(const Roaring &r) const {  // is `this` subset of `r`?
         bool ans = plain.isStrictSubset(r.plain);
-        assert(ans == (std::includes(
+        assert_true(ans == (std::includes(
             r.check.begin(), r.check.end(),  // containing range
             check.begin(), check.end()  // range to test for containment
         ) && r.check.size() > check.size()));
@@ -335,7 +335,7 @@ class Roaring {
 
     bool operator==(const Roaring &r) const {
         bool ans = (plain == r.plain);
-        assert(ans == (check == r.check));
+        assert_true(ans == (check == r.check));
         return ans;
     }
 
@@ -370,7 +370,7 @@ class Roaring {
 
     void iterate(roaring::api::roaring_iterator iterator, void *ptr) const {
         plain.iterate(iterator, ptr);
-        assert(does_std_set_match_roaring());  // checks equivalent iteration
+        assert_true(does_std_set_match_roaring());  // checks equivalent iteration
     }
 
     bool select(uint32_t rnk, uint32_t *element) const {
@@ -380,7 +380,7 @@ class Roaring {
         auto it_end = check.end();
         for (uint32_t i = 0; it != it_end && i < rnk; ++i)
             ++it;
-        assert(ans == (it != it_end) && (ans ? *it == *element : true));
+        assert_true(ans == (it != it_end) && (ans ? *it == *element : true));
 
         return ans;
     }
@@ -393,9 +393,9 @@ class Roaring {
         auto r_it = r.check.begin();
         auto r_it_end = r.check.end();
         if (it == it_end || r_it == r_it_end) {
-            assert(ans == 0);  // if either is empty then no intersection
+            assert_true(ans == 0);  // if either is empty then no intersection
         } else if (*it > *r.check.rbegin() || *r_it > *check.rbegin()) {
-            assert(ans == 0);  // obvious disjoint
+            assert_true(ans == 0);  // obvious disjoint
         } else {  // may overlap
             uint64_t count = 0;
             while (it != it_end && r_it != r_it_end) {
@@ -403,7 +403,7 @@ class Roaring {
                 else if (*it < *r_it) { ++it; }
                 else { ++r_it; }
             }
-            assert(ans == count);
+            assert_true(ans == count);
         }
 
         return ans;
@@ -417,15 +417,15 @@ class Roaring {
         auto r_it = r.check.begin();
         auto r_it_end = r.check.end();
         if (it == it_end || r_it == r_it_end) {
-            assert(ans == false);  // if either are empty, no intersection
+            assert_true(ans == false);  // if either are empty, no intersection
         } else if (*it > *r.check.rbegin() || *r_it > *check.rbegin()) {
-            assert(ans == false);  // obvious disjoint
+            assert_true(ans == false);  // obvious disjoint
         } else while (it != it_end && r_it != r_it_end) {  // may overlap
-            if (*it == *r_it) { assert(ans == true); goto done; }  // overlap
+            if (*it == *r_it) { assert_true(ans == true); goto done; }  // overlap
             else if (*it < *r_it) { ++it; }
             else { ++r_it; }
         }
-        assert(ans == false);
+        assert_true(ans == false);
 
       done:  // (could use lambda vs goto, but debug step in lambdas is poor)
          return ans;
@@ -444,10 +444,10 @@ class Roaring {
         auto it_end = check.end();
         auto r_it = r.check.begin();
         auto r_it_end = r.check.end();
-        if (it == it_end) { assert(ans == r.check.size()); }  // this empty
-        else if (r_it == r_it_end) { assert(ans == check.size()); }  // r empty
+        if (it == it_end) { assert_true(ans == r.check.size()); }  // this empty
+        else if (r_it == r_it_end) { assert_true(ans == check.size()); }  // r empty
         else if (*it > *r.check.rbegin() || *r_it > *check.rbegin()) {
-            assert(ans == check.size() + r.check.size());  // obvious disjoint
+            assert_true(ans == check.size() + r.check.size());  // obvious disjoint
         } else {
             uint64_t count = 0;
             while (it != it_end || r_it != r_it_end) {
@@ -458,7 +458,7 @@ class Roaring {
                 else if (*it < *r_it) { ++it; }
                 else { ++r_it; }
             }
-            assert(ans == count);
+            assert_true(ans == count);
         }
 
         return ans;
@@ -471,10 +471,10 @@ class Roaring {
         auto it_end = check.end();
         auto r_it = r.check.begin();
         auto r_it_end = r.check.end();
-        if (it == it_end) { assert(ans == 0); }  // this empty
-        else if (r_it == r_it_end) { assert(ans == check.size()); }  // r empty
+        if (it == it_end) { assert_true(ans == 0); }  // this empty
+        else if (r_it == r_it_end) { assert_true(ans == check.size()); }  // r empty
         else if (*it > *r.check.rbegin() || *r_it > *check.rbegin()) {
-            assert(ans == check.size());  // disjoint so nothing removed
+            assert_true(ans == check.size());  // disjoint so nothing removed
         } else {  // may overlap
             uint64_t count = check.size();  // start with cardinality of this
             while (it != it_end && r_it != r_it_end) {
@@ -482,7 +482,7 @@ class Roaring {
                 else if (*it < *r_it) { ++it; }
                 else { ++r_it; }
             }
-            assert(ans == count);
+            assert_true(ans == count);
         }
 
         return ans;
@@ -495,10 +495,10 @@ class Roaring {
         auto it_end = check.end();
         auto r_it = r.check.begin();
         auto r_it_end = r.check.end();
-        if (it == it_end) { assert(ans == r.check.size()); }  // this empty
-        else if (r_it == r_it_end) { assert(ans == check.size()); }  // r empty
+        if (it == it_end) { assert_true(ans == r.check.size()); }  // this empty
+        else if (r_it == r_it_end) { assert_true(ans == check.size()); }  // r empty
         else if (*it > *r.check.rbegin() || *r_it > *check.rbegin()) {
-            assert(ans == check.size() + r.check.size());  // obvious disjoint
+            assert_true(ans == check.size() + r.check.size());  // obvious disjoint
         } else {  // may overlap
             uint64_t count = 0;
             while (it != it_end || r_it != r_it_end) {
@@ -508,7 +508,7 @@ class Roaring {
                 else if (*it < *r_it) { ++count; ++it; }
                 else { ++count; ++r_it; }
             }
-            assert(ans == count);
+            assert_true(ans == count);
         }
 
         return ans;
@@ -522,7 +522,7 @@ class Roaring {
         auto it_end = check.end();
         for (; it != it_end && *it <= x; ++it)
             ++count;
-        assert(ans == count);
+        assert_true(ans == count);
 
         return ans;
     }
@@ -549,7 +549,7 @@ class Roaring {
         Roaring ans(plain & o.plain);
 
         Roaring inplace(*this);
-        assert(ans == (inplace &= o));  // validate against in-place version
+        assert_true(ans == (inplace &= o));  // validate against in-place version
 
         return ans;
     }
@@ -558,7 +558,7 @@ class Roaring {
         Roaring ans(plain - o.plain);
 
         Roaring inplace(*this);
-        assert(ans == (inplace -= o));  // validate against in-place version
+        assert_true(ans == (inplace -= o));  // validate against in-place version
 
         return ans;
     }
@@ -567,7 +567,7 @@ class Roaring {
         Roaring ans(plain | o.plain);
 
         Roaring inplace(*this);
-        assert(ans == (inplace |= o));  // validate against in-place version
+        assert_true(ans == (inplace |= o));  // validate against in-place version
 
         return ans;
     }
@@ -576,7 +576,7 @@ class Roaring {
         Roaring ans(plain ^ o.plain);
 
         Roaring inplace(*this);
-        assert(ans == (inplace ^= o));  // validate against in-place version
+        assert_true(ans == (inplace ^= o));  // validate against in-place version
 
         return ans;
     }
@@ -605,12 +605,12 @@ class Roaring {
         delete[] plain_inputs;
 
         if (n == 0)
-            assert(ans.cardinality() == 0);
+            assert_true(ans.cardinality() == 0);
         else {
             Roaring temp = *inputs[0];
             for (size_t i = 1; i < n; ++i)
                 temp |= *inputs[i];
-            assert(temp == ans);
+            assert_true(temp == ans);
         }
 
         return ans;

From bc51a40aa276053fef531ff45f47c4f548f84986 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Thu, 5 Jan 2023 22:45:26 -0500
Subject: [PATCH 045/162] Updating version.

---
 CMakeLists.txt                    | 4 ++--
 include/roaring/roaring_version.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0c779b62a..4ff34ab82 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,8 +18,8 @@ endif()
 set(ROARING_LIB_NAME roaring)
 set(PROJECT_VERSION_MAJOR 0)
 set(PROJECT_VERSION_MINOR 8)
-set(PROJECT_VERSION_PATCH 0)
-set(ROARING_LIB_VERSION "0.8.0" CACHE STRING "Roaring library version")
+set(PROJECT_VERSION_PATCH 1)
+set(ROARING_LIB_VERSION "0.8.1" CACHE STRING "Roaring library version")
 set(ROARING_LIB_SOVERSION "6" CACHE STRING "Roaring library soversion")
 
 option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON)
diff --git a/include/roaring/roaring_version.h b/include/roaring/roaring_version.h
index ff5a9f5df..df6a1c90f 100644
--- a/include/roaring/roaring_version.h
+++ b/include/roaring/roaring_version.h
@@ -1,10 +1,10 @@
 // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand 
 #ifndef ROARING_INCLUDE_ROARING_VERSION 
 #define ROARING_INCLUDE_ROARING_VERSION 
-#define ROARING_VERSION "0.8.0"
+#define ROARING_VERSION "0.8.1"
 enum { 
     ROARING_VERSION_MAJOR = 0,
     ROARING_VERSION_MINOR = 8,
-    ROARING_VERSION_REVISION = 0
+    ROARING_VERSION_REVISION = 1
 }; 
 #endif // ROARING_INCLUDE_ROARING_VERSION 

From 7c787c66db8baf344e8fb7e5acce68c938ac224a Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Thu, 5 Jan 2023 22:48:11 -0500
Subject: [PATCH 046/162] Minor fix

---
 include/roaring/portability.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/roaring/portability.h b/include/roaring/portability.h
index 5b0a942ee..239c056e9 100644
--- a/include/roaring/portability.h
+++ b/include/roaring/portability.h
@@ -232,7 +232,7 @@ inline int __builtin_clzll(unsigned long long input_num) {
 
 #ifdef USENEON
 // we can always compute the popcount fast.
-#elif (defined(_M_ARM) || defined(_M_ARM64)) && (defined(_WIN64) && defined(CROARING_REGULAR_VISUAL_STUDIO) && CROARING_REGULAR_VISUAL_STUDIO)
+#elif (defined(_M_ARM) || defined(_M_ARM64)) && ((defined(_WIN64) || defined(_WIN32)) && defined(CROARING_REGULAR_VISUAL_STUDIO) && CROARING_REGULAR_VISUAL_STUDIO)
 // we will need this function:
 static inline int hammingbackup(uint64_t x) {
   uint64_t c1 = UINT64_C(0x5555555555555555);

From a44caa798052b66b2c3286eaa0dfe34e9014583a Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Mon, 23 Jan 2023 23:13:42 -0500
Subject: [PATCH 047/162] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 80c01487b..cd86ddfeb 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# CRoaring [![Build status](https://ci.appveyor.com/api/projects/status/gr4ibsflqs9by1bc/branch/master?svg=true)](https://ci.appveyor.com/project/lemire/croaring/branch/master) [![Build Status](https://cloud.drone.io/api/badges/RoaringBitmap/CRoaring/status.svg)](https://cloud.drone.io/RoaringBitmap/CRoaring)
+# CRoaring [![Build status](https://ci.appveyor.com/api/projects/status/gr4ibsflqs9by1bc/branch/master?svg=true)](https://cloud.drone.io/api/badges/RoaringBitmap/CRoaring/status.svg)](https://cloud.drone.io/RoaringBitmap/CRoaring)
 
 Portable Roaring bitmaps in C (and C++) with full support for your favorite compiler (GNU GCC, LLVM's clang, Visual Studio). Included in the [Awesome C](https://github.com/kozross/awesome-c) list of open source C software.
 

From 66f1bb7b8f00e01930d33bfcc94a7a634df5555e Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Mon, 23 Jan 2023 23:14:38 -0500
Subject: [PATCH 048/162] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index cd86ddfeb..41aab9b00 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# CRoaring [![Build status](https://ci.appveyor.com/api/projects/status/gr4ibsflqs9by1bc/branch/master?svg=true)](https://cloud.drone.io/api/badges/RoaringBitmap/CRoaring/status.svg)](https://cloud.drone.io/RoaringBitmap/CRoaring)
+# CRoaring [![Ubuntu-CI](https://github.com/RoaringBitmap/CRoaring/actions/workflows/ubuntu-noexcept-ci.yml/badge.svg)](https://github.com/RoaringBitmap/CRoaring/actions/workflows/ubuntu-noexcept-ci.yml)
 
 Portable Roaring bitmaps in C (and C++) with full support for your favorite compiler (GNU GCC, LLVM's clang, Visual Studio). Included in the [Awesome C](https://github.com/kozross/awesome-c) list of open source C software.
 

From 3078dfafc22c9becf94bb3ab90956dedc39d82e1 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Tue, 24 Jan 2023 11:18:48 -0500
Subject: [PATCH 049/162] Adding Debug tests to VS.

---
 .github/workflows/vs17-ci.yml       | 8 +++++++-
 .github/workflows/vs17-clang-ci.yml | 8 +++++++-
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/vs17-ci.yml b/.github/workflows/vs17-ci.yml
index 8d6e0d65e..1cfc0b652 100644
--- a/.github/workflows/vs17-ci.yml
+++ b/.github/workflows/vs17-ci.yml
@@ -24,4 +24,10 @@ jobs:
       - name: Run basic tests
         run: |
           cd build
-          ctest -C Release --output-on-failure 
+          ctest -C Release --output-on-failure
+      - name: Build Debug
+        run: cmake --build build --config Debug
+      - name: Run basic tests in Debug
+        run: |
+          cd build
+          ctest -C Debug --output-on-failure
diff --git a/.github/workflows/vs17-clang-ci.yml b/.github/workflows/vs17-clang-ci.yml
index 0f258a4dc..9beb6d320 100644
--- a/.github/workflows/vs17-clang-ci.yml
+++ b/.github/workflows/vs17-clang-ci.yml
@@ -24,4 +24,10 @@ jobs:
       - name: Run basic tests
         run: |
           cd build
-          ctest -C Release --output-on-failure 
\ No newline at end of file
+          ctest -C Release --output-on-failure
+      - name: Build Debug
+        run: cmake --build build --config Debug --parallel
+      - name: Run basic tests in Debug
+        run: |
+          cd build
+          ctest -C Debug --output-on-failure 
\ No newline at end of file

From 5f73c5e8981a979a988cecd5c4a085fbd576081f Mon Sep 17 00:00:00 2001
From: Andreas Garnaes <andreas@users.noreply.github.com>
Date: Thu, 26 Jan 2023 04:25:23 +0100
Subject: [PATCH 050/162] Add roaring_bitmap_portable_deserialize_frozen (#421)

* Add roaring_bitmap_portable_deserialize_frozen

* Add deserialization to real_bitmaps_benchmark

* Fix cycle count variable types in real_bitmaps_benchmark

* Add comment on unaligned access

* Allow unaligned memory access for roaring_bitmap_portable_deserialize_frozen

* Add ALLOW_UNALIGNED
---
 benchmarks/real_bitmaps_benchmark.c |  40 +++++++
 include/roaring/containers/array.h  |   2 +
 include/roaring/containers/bitset.h |   1 +
 include/roaring/containers/run.h    |   1 +
 include/roaring/portability.h       |   6 ++
 include/roaring/roaring.h           |  18 ++++
 src/containers/array.c              |   1 +
 src/containers/bitset.c             |   3 +
 src/containers/run.c                |   2 +
 src/roaring.c                       | 155 ++++++++++++++++++++++++++++
 tests/toplevel_unit.c               |  94 +++++++++++++++++
 11 files changed, 323 insertions(+)

diff --git a/benchmarks/real_bitmaps_benchmark.c b/benchmarks/real_bitmaps_benchmark.c
index 87840a926..887c9df74 100644
--- a/benchmarks/real_bitmaps_benchmark.c
+++ b/benchmarks/real_bitmaps_benchmark.c
@@ -174,6 +174,46 @@ int main(int argc, char **argv) {
            " cycles\n",
            count, total_count, cycles_final - cycles_start);
 
+    uint64_t portable_cycle_count = 0, portable_frozen_cycle_count = 0,
+      frozen_cycle_count = 0;
+    for(int i = 0; i < (int)count; i++) {
+        int size = roaring_bitmap_portable_size_in_bytes(bitmaps[i]);
+        char *buf = malloc(size);
+        roaring_bitmap_portable_serialize(bitmaps[i], buf);
+
+        int frozen_size = roaring_bitmap_frozen_size_in_bytes(bitmaps[i]);
+        char *frozen_buf = roaring_aligned_malloc(32, frozen_size);
+        roaring_bitmap_frozen_serialize(bitmaps[i], frozen_buf);
+
+        RDTSC_START(cycles_start);
+        roaring_bitmap_t *r1 = roaring_bitmap_portable_deserialize(buf);
+        RDTSC_FINAL(cycles_final);
+        portable_cycle_count += cycles_final - cycles_start;
+
+        RDTSC_START(cycles_start);
+        roaring_bitmap_t *r2 = roaring_bitmap_portable_deserialize_frozen(buf);
+        RDTSC_FINAL(cycles_final);
+        portable_frozen_cycle_count += cycles_final - cycles_start;
+
+        RDTSC_START(cycles_start);
+        roaring_bitmap_t *r3 = roaring_bitmap_frozen_view(frozen_buf, frozen_size);
+        RDTSC_FINAL(cycles_final);
+        frozen_cycle_count += cycles_final - cycles_start;
+
+        roaring_bitmap_free(r1);
+        roaring_bitmap_free(r2);
+        roaring_bitmap_free(r3);
+        free(buf);
+        roaring_aligned_free(frozen_buf);
+    }
+
+    printf("Deserializing %zu bitmaps took %" PRIu64 " cycles for portable format\n",
+           count, portable_cycle_count);
+    printf("Deserializing %zu bitmaps took %" PRIu64 " cycles for portable frozen format\n",
+           count, portable_frozen_cycle_count);
+    printf("Deserializing %zu bitmaps took %" PRIu64 " cycles for frozen format\n",
+           count, frozen_cycle_count);
+
     for (int i = 0; i < (int)count; ++i) {
         free(numbers[i]);
         numbers[i] = NULL;  // paranoid
diff --git a/include/roaring/containers/array.h b/include/roaring/containers/array.h
index 3a3fe828a..071b0b25f 100644
--- a/include/roaring/containers/array.h
+++ b/include/roaring/containers/array.h
@@ -68,6 +68,7 @@ void array_container_free(array_container_t *array);
 array_container_t *array_container_clone(const array_container_t *src);
 
 /* Get the cardinality of `array'. */
+ALLOW_UNALIGNED
 static inline int array_container_cardinality(const array_container_t *array) {
     return array->cardinality;
 }
@@ -214,6 +215,7 @@ static inline int32_t array_container_size_in_bytes(
 /**
  * Return true if the two arrays have the same content.
  */
+ALLOW_UNALIGNED
 static inline bool array_container_equals(
     const array_container_t *container1,
     const array_container_t *container2) {
diff --git a/include/roaring/containers/bitset.h b/include/roaring/containers/bitset.h
index b8c136499..15767a23f 100644
--- a/include/roaring/containers/bitset.h
+++ b/include/roaring/containers/bitset.h
@@ -232,6 +232,7 @@ static inline bool bitset_container_contains_range(const bitset_container_t *bit
 }
 
 /* Get the number of bits set */
+ALLOW_UNALIGNED
 static inline int bitset_container_cardinality(
     const bitset_container_t *bitset) {
     return bitset->cardinality;
diff --git a/include/roaring/containers/run.h b/include/roaring/containers/run.h
index 4b01d5987..673fde8a0 100644
--- a/include/roaring/containers/run.h
+++ b/include/roaring/containers/run.h
@@ -481,6 +481,7 @@ static inline int32_t run_container_size_in_bytes(
 /**
  * Return true if the two containers have the same content.
  */
+ALLOW_UNALIGNED
 static inline bool run_container_equals(const run_container_t *container1,
                           const run_container_t *container2) {
     if (container1->n_runs != container2->n_runs) {
diff --git a/include/roaring/portability.h b/include/roaring/portability.h
index 239c056e9..558cc7582 100644
--- a/include/roaring/portability.h
+++ b/include/roaring/portability.h
@@ -338,6 +338,12 @@ static inline int hamming(uint64_t x) {
 #define CROARING_UNTARGET_REGION
 #endif
 
+// Allow unaligned memory access
+#if defined(__GNUC__) || defined(__clang__)
+#define ALLOW_UNALIGNED __attribute__((no_sanitize("alignment")))
+#else
+#define ALLOW_UNALIGNED
+#endif
 
 // We need portability.h to be included first,
 // but we also always want isadetection.h to be
diff --git a/include/roaring/roaring.h b/include/roaring/roaring.h
index 4283d9a3b..090d63aac 100644
--- a/include/roaring/roaring.h
+++ b/include/roaring/roaring.h
@@ -519,6 +519,24 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf);
 roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf,
                                                            size_t maxbytes);
 
+/**
+ * Read bitmap from a serialized buffer.
+ * In case of failure, NULL is returned.
+ *
+ * Bitmap returned by this function can be used in all readonly contexts.
+ * Bitmap must be freed as usual, by calling roaring_bitmap_free().
+ * Underlying buffer must not be freed or modified while it backs any bitmaps.
+ *
+ * The function is unsafe in the following ways:
+ * 1) It may execute unaligned memory accesses.
+ * 2) A buffer overflow may occure if buf does not point to a valid serialized
+ *    bitmap.
+ *
+ * This is meant to be compatible with the Java and Go versions:
+ * https://github.com/RoaringBitmap/RoaringFormatSpec
+ */
+roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf);
+
 /**
  * Check how many bytes would be read (up to maxbytes) at this pointer if there
  * is a bitmap, returns zero if there is no valid bitmap.
diff --git a/src/containers/array.c b/src/containers/array.c
index b4adc2de9..312f7c6a1 100644
--- a/src/containers/array.c
+++ b/src/containers/array.c
@@ -376,6 +376,7 @@ void array_container_intersection_inplace(array_container_t *src_1,
     }
 }
 
+ALLOW_UNALIGNED
 int array_container_to_uint32_array(void *vout, const array_container_t *cont,
                                     uint32_t base) {
     int outpos = 0;
diff --git a/src/containers/bitset.c b/src/containers/bitset.c
index 31421846a..25248e00e 100644
--- a/src/containers/bitset.c
+++ b/src/containers/bitset.c
@@ -686,6 +686,7 @@ BITSET_CONTAINER_FN(andnot, &~, _mm256_andnot_si256, vbicq_u64)
 // clang-format On
 
 
+ALLOW_UNALIGNED
 int bitset_container_to_uint32_array(
     uint32_t *out,
     const bitset_container_t *bc,
@@ -816,6 +817,7 @@ bool bitset_container_iterate64(const bitset_container_t *cont, uint32_t base, r
 
 #ifdef CROARING_IS_X64
 CROARING_TARGET_AVX2
+ALLOW_UNALIGNED
 static inline bool _avx2_bitset_container_equals(const bitset_container_t *container1, const bitset_container_t *container2) {
     const __m256i *ptr1 = (const __m256i*)container1->words;
     const __m256i *ptr2 = (const __m256i*)container2->words;
@@ -832,6 +834,7 @@ static inline bool _avx2_bitset_container_equals(const bitset_container_t *conta
 CROARING_UNTARGET_REGION
 #endif // CROARING_IS_X64
 
+ALLOW_UNALIGNED
 bool bitset_container_equals(const bitset_container_t *container1, const bitset_container_t *container2) {
   if((container1->cardinality != BITSET_UNKNOWN_CARDINALITY) && (container2->cardinality != BITSET_UNKNOWN_CARDINALITY)) {
     if(container1->cardinality != container2->cardinality) {
diff --git a/src/containers/run.c b/src/containers/run.c
index ee8a4bcf3..bde8a5371 100644
--- a/src/containers/run.c
+++ b/src/containers/run.c
@@ -625,6 +625,7 @@ void run_container_andnot(const run_container_t *src_1,
     }
 }
 
+ALLOW_UNALIGNED
 int run_container_to_uint32_array(void *vout, const run_container_t *cont,
                                   uint32_t base) {
     int outpos = 0;
@@ -830,6 +831,7 @@ int run_container_rank(const run_container_t *container, uint16_t x) {
 #ifdef CROARING_IS_X64
 
 CROARING_TARGET_AVX2
+ALLOW_UNALIGNED
 /* Get the cardinality of `run'. Requires an actual computation. */
 static inline int _avx2_run_container_cardinality(const run_container_t *run) {
     const int32_t n_runs = run->n_runs;
diff --git a/src/roaring.c b/src/roaring.c
index 7479b4720..64291a639 100644
--- a/src/roaring.c
+++ b/src/roaring.c
@@ -3194,6 +3194,161 @@ roaring_bitmap_frozen_view(const char *buf, size_t length) {
     return rb;
 }
 
+ALLOW_UNALIGNED
+roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf) {
+    char *start_of_buf = (char *) buf;
+    uint32_t cookie;
+    int32_t num_containers;
+    uint16_t *descriptive_headers;
+    uint32_t *offset_headers = NULL;
+    const char *run_flag_bitset = NULL;
+    bool hasrun = false;
+
+    // deserialize cookie
+    memcpy(&cookie, buf, sizeof(uint32_t));
+    buf += sizeof(uint32_t);
+    if (cookie == SERIAL_COOKIE_NO_RUNCONTAINER) {
+        memcpy(&num_containers, buf, sizeof(int32_t));
+        buf += sizeof(int32_t);
+        descriptive_headers = (uint16_t *) buf;
+        buf += num_containers * 2 * sizeof(uint16_t);
+        offset_headers = (uint32_t *) buf;
+        buf += num_containers * sizeof(uint32_t);
+    } else if ((cookie & 0xFFFF) == SERIAL_COOKIE) {
+        num_containers = (cookie >> 16) + 1;
+        hasrun = true;
+        int32_t run_flag_bitset_size = (num_containers + 7) / 8;
+        run_flag_bitset = buf;
+        buf += run_flag_bitset_size;
+        descriptive_headers = (uint16_t *) buf;
+        buf += num_containers * 2 * sizeof(uint16_t);
+        if(num_containers >= NO_OFFSET_THRESHOLD) {
+            offset_headers = (uint32_t *) buf;
+            buf += num_containers * sizeof(uint32_t);
+        }
+    } else {
+        return NULL;
+    }
+
+    // calculate total size for allocation
+    int32_t num_bitset_containers = 0;
+    int32_t num_run_containers = 0;
+    int32_t num_array_containers = 0;
+
+    for (int32_t i = 0; i < num_containers; i++) {
+        uint16_t tmp;
+        memcpy(&tmp, descriptive_headers + 2*i+1, sizeof(tmp));
+        uint32_t cardinality = tmp + 1;
+        bool isbitmap = (cardinality > DEFAULT_MAX_SIZE);
+        bool isrun = false;
+        if(hasrun) {
+          if((run_flag_bitset[i / 8] & (1 << (i % 8))) != 0) {
+            isbitmap = false;
+            isrun = true;
+          }
+        }
+
+        if (isbitmap) {
+            num_bitset_containers++;
+        } else if (isrun) {
+            num_run_containers++;
+        } else {
+            num_array_containers++;
+        }
+    }
+
+    size_t alloc_size = 0;
+    alloc_size += sizeof(roaring_bitmap_t);
+    alloc_size += num_containers * sizeof(container_t*);
+    alloc_size += num_bitset_containers * sizeof(bitset_container_t);
+    alloc_size += num_run_containers * sizeof(run_container_t);
+    alloc_size += num_array_containers * sizeof(array_container_t);
+    alloc_size += num_containers * sizeof(uint16_t); // keys
+    alloc_size += num_containers * sizeof(uint8_t); // typecodes
+
+    // allocate bitmap and construct containers
+    char *arena = (char *)roaring_malloc(alloc_size);
+    if (arena == NULL) {
+        return NULL;
+    }
+
+    roaring_bitmap_t *rb = (roaring_bitmap_t *)
+            arena_alloc(&arena, sizeof(roaring_bitmap_t));
+    rb->high_low_container.flags = ROARING_FLAG_FROZEN;
+    rb->high_low_container.allocation_size = num_containers;
+    rb->high_low_container.size = num_containers;
+    rb->high_low_container.containers =
+        (container_t **)arena_alloc(&arena,
+                                    sizeof(container_t*) * num_containers);
+
+    uint16_t *keys = arena_alloc(&arena, num_containers * sizeof(uint16_t));
+    uint8_t *typecodes = arena_alloc(&arena, num_containers * sizeof(uint8_t));
+
+    rb->high_low_container.keys = keys;
+    rb->high_low_container.typecodes = typecodes;
+
+    for (int32_t i = 0; i < num_containers; i++) {
+        uint16_t tmp;
+        memcpy(&tmp, descriptive_headers + 2*i+1, sizeof(tmp));
+        int32_t cardinality = tmp + 1;
+        bool isbitmap = (cardinality > DEFAULT_MAX_SIZE);
+        bool isrun = false;
+        if(hasrun) {
+          if((run_flag_bitset[i / 8] & (1 << (i % 8))) != 0) {
+            isbitmap = false;
+            isrun = true;
+          }
+        }
+
+        keys[i] = descriptive_headers[2*i];
+
+        if (isbitmap) {
+            typecodes[i] = BITSET_CONTAINER_TYPE;
+            bitset_container_t *c = arena_alloc(&arena, sizeof(bitset_container_t));
+            c->cardinality = cardinality;
+            if(offset_headers != NULL) {
+                c->words = (uint64_t *) (start_of_buf + offset_headers[i]);
+            } else {
+                c->words = (uint64_t *) buf;
+                buf += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
+            }
+            rb->high_low_container.containers[i] = c;
+        } else if (isrun) {
+            typecodes[i] = RUN_CONTAINER_TYPE;
+            run_container_t *c = arena_alloc(&arena, sizeof(run_container_t));
+            c->capacity = cardinality;
+            uint16_t n_runs;
+            if(offset_headers != NULL) {
+                memcpy(&n_runs, start_of_buf + offset_headers[i], sizeof(uint16_t));
+                c->n_runs = n_runs;
+                c->runs = (rle16_t *) (start_of_buf + offset_headers[i] + sizeof(uint16_t));
+            } else {
+                memcpy(&n_runs, buf, sizeof(uint16_t));
+                c->n_runs = n_runs;
+                buf += sizeof(uint16_t);
+                c->runs = (rle16_t *) buf;
+                buf += c->n_runs * sizeof(rle16_t);
+            }
+            rb->high_low_container.containers[i] = c;
+        } else {
+            typecodes[i] = ARRAY_CONTAINER_TYPE;
+            array_container_t *c = arena_alloc(&arena, sizeof(array_container_t));
+            c->cardinality = cardinality;
+            c->capacity = cardinality;
+            if(offset_headers != NULL) {
+                c->array = (uint16_t *) (start_of_buf + offset_headers[i]);
+            } else {
+                c->array = (uint16_t *) buf;
+                buf += cardinality * sizeof(uint16_t);
+            }
+            rb->high_low_container.containers[i] = c;
+        }
+    }
+
+    return rb;
+}
+
+
 #ifdef __cplusplus
 } } }  // extern "C" { namespace roaring {
 #endif
diff --git a/tests/toplevel_unit.c b/tests/toplevel_unit.c
index 896cf13b4..4f129870d 100644
--- a/tests/toplevel_unit.c
+++ b/tests/toplevel_unit.c
@@ -4245,6 +4245,99 @@ DEFINE_TEST(test_frozen_serialization_max_containers) {
     frozen_serialization_compare(r);
 }
 
+DEFINE_TEST(test_portable_deserialize_frozen) {
+    roaring_bitmap_t *r1 =
+        roaring_bitmap_of(8, 1, 2, 3, 100, 1000, 10000, 1000000, 20000000);
+    assert_non_null(r1);
+
+    uint32_t serialize_len;
+    roaring_bitmap_t *r2;
+
+    for (int i = 0, top_val = 384000; i < top_val; i++)
+        roaring_bitmap_add(r1, 3 * i);
+
+    uint32_t expectedsize = roaring_bitmap_portable_size_in_bytes(r1);
+    char *serialized = (char*)malloc(expectedsize);
+    serialize_len = roaring_bitmap_portable_serialize(r1, serialized);
+    assert_int_equal(serialize_len, expectedsize);
+    r2 = roaring_bitmap_portable_deserialize_frozen(serialized);
+    assert_non_null(r2);
+
+    uint64_t card1 = roaring_bitmap_get_cardinality(r1);
+    uint32_t *arr1 = (uint32_t *)malloc(card1 * sizeof(uint32_t));
+    roaring_bitmap_to_uint32_array(r1, arr1);
+
+    uint64_t card2 = roaring_bitmap_get_cardinality(r2);
+    uint32_t *arr2 = (uint32_t *)malloc(card2 * sizeof(uint32_t));
+    roaring_bitmap_to_uint32_array(r2, arr2);
+
+    assert_true(array_equals(arr1, card1, arr2, card2));
+    assert_true(roaring_bitmap_equals(r1, r2));
+    free(arr1);
+    free(arr2);
+    free(serialized);
+    roaring_bitmap_free(r1);
+    roaring_bitmap_free(r2);
+
+    r1 = roaring_bitmap_of(6, 2946000, 2997491, 10478289, 10490227, 10502444,
+                           19866827);
+    expectedsize = roaring_bitmap_portable_size_in_bytes(r1);
+    serialized = (char*)malloc(expectedsize);
+    serialize_len = roaring_bitmap_portable_serialize(r1, serialized);
+    assert_int_equal(serialize_len, expectedsize);
+    assert_int_equal(serialize_len, expectedsize);
+
+    r2 = roaring_bitmap_portable_deserialize_frozen(serialized);
+    assert_non_null(r2);
+
+    card1 = roaring_bitmap_get_cardinality(r1);
+    arr1 = (uint32_t *)malloc(card1 * sizeof(uint32_t));
+    roaring_bitmap_to_uint32_array(r1, arr1);
+
+    card2 = roaring_bitmap_get_cardinality(r2);
+    arr2 = (uint32_t *)malloc(card2 * sizeof(uint32_t));
+    roaring_bitmap_to_uint32_array(r2, arr2);
+
+    assert_true(array_equals(arr1, card1, arr2, card2));
+    assert_true(roaring_bitmap_equals(r1, r2));
+    free(arr1);
+    free(arr2);
+    roaring_bitmap_free(r1);
+    roaring_bitmap_free(r2);
+    free(serialized);
+
+    r1 = roaring_bitmap_create();
+    assert_non_null(r1);
+
+    for (uint32_t k = 100; k < 100000; ++k) {
+        roaring_bitmap_add(r1, k);
+    }
+
+    roaring_bitmap_run_optimize(r1);
+    expectedsize = roaring_bitmap_portable_size_in_bytes(r1);
+    serialized = (char*)malloc(expectedsize);
+    serialize_len = roaring_bitmap_portable_serialize(r1, serialized);
+    assert_int_equal(serialize_len, expectedsize);
+
+    r2 = roaring_bitmap_portable_deserialize_frozen(serialized);
+    assert_non_null(r2);
+
+    card1 = roaring_bitmap_get_cardinality(r1);
+    arr1 = (uint32_t *)malloc(card1 * sizeof(uint32_t));
+    roaring_bitmap_to_uint32_array(r1, arr1);
+
+    card2 = roaring_bitmap_get_cardinality(r2);
+    arr2 = (uint32_t *)malloc(card2 * sizeof(uint32_t));
+    roaring_bitmap_to_uint32_array(r2, arr2);
+
+    assert_true(array_equals(arr1, card1, arr2, card2));
+    assert_true(roaring_bitmap_equals(r1, r2));
+    free(arr1);
+    free(arr2);
+    roaring_bitmap_free(r1);
+    roaring_bitmap_free(r2);
+    free(serialized);
+}
 
 int main() {
     tellmeall();
@@ -4378,6 +4471,7 @@ int main() {
         cmocka_unit_test(test_range_cardinality),
         cmocka_unit_test(test_frozen_serialization),
         cmocka_unit_test(test_frozen_serialization_max_containers),
+        cmocka_unit_test(test_portable_deserialize_frozen),
     };
 
     return cmocka_run_group_tests(tests, NULL, NULL);

From 198cac911ef7c141075c719029720df4cab6d463 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Wed, 25 Jan 2023 22:35:57 -0500
Subject: [PATCH 051/162] Casting.

---
 tests/cpp_unit.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/cpp_unit.cpp b/tests/cpp_unit.cpp
index 1ed6c60b2..139249c4b 100644
--- a/tests/cpp_unit.cpp
+++ b/tests/cpp_unit.cpp
@@ -1870,7 +1870,7 @@ DEFINE_TEST(test_cpp_remove_run_compression) {
 bool test64Deserialize(const std::string& filename) {
     std::ifstream in(TEST_DATA_DIR + filename, std::ios::binary);
     std::vector<char> buf1(std::istreambuf_iterator<char>(in), {});
-    printf("Reading %lu bytes\n", buf1.size());
+    printf("Reading %lu bytes\n", (unsigned long)buf1.size());
     Roaring64Map roaring;
 #if ROARING_EXCEPTIONS
     try {

From e69217bbd73ff96fa930c1768243a26055da892f Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Wed, 25 Jan 2023 22:37:12 -0500
Subject: [PATCH 052/162] Preparing release

---
 CMakeLists.txt                    | 8 ++++----
 include/roaring/roaring_version.h | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4ff34ab82..210825065 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -17,10 +17,10 @@ if(CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_C_COMPILER_VERSION VERSIO
 endif()
 set(ROARING_LIB_NAME roaring)
 set(PROJECT_VERSION_MAJOR 0)
-set(PROJECT_VERSION_MINOR 8)
-set(PROJECT_VERSION_PATCH 1)
-set(ROARING_LIB_VERSION "0.8.1" CACHE STRING "Roaring library version")
-set(ROARING_LIB_SOVERSION "6" CACHE STRING "Roaring library soversion")
+set(PROJECT_VERSION_MINOR 9)
+set(PROJECT_VERSION_PATCH 0)
+set(ROARING_LIB_VERSION "0.9.0" CACHE STRING "Roaring library version")
+set(ROARING_LIB_SOVERSION "7" CACHE STRING "Roaring library soversion")
 
 option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON)
 if(NOT ROARING_EXCEPTIONS)
diff --git a/include/roaring/roaring_version.h b/include/roaring/roaring_version.h
index df6a1c90f..b7ad25e23 100644
--- a/include/roaring/roaring_version.h
+++ b/include/roaring/roaring_version.h
@@ -1,10 +1,10 @@
 // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand 
 #ifndef ROARING_INCLUDE_ROARING_VERSION 
 #define ROARING_INCLUDE_ROARING_VERSION 
-#define ROARING_VERSION "0.8.1"
+#define ROARING_VERSION "0.9.0"
 enum { 
     ROARING_VERSION_MAJOR = 0,
-    ROARING_VERSION_MINOR = 8,
-    ROARING_VERSION_REVISION = 1
+    ROARING_VERSION_MINOR = 9,
+    ROARING_VERSION_REVISION = 0
 }; 
 #endif // ROARING_INCLUDE_ROARING_VERSION 

From 4d492a7f32e31fbfba1921458f88be8f98db0aa1 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Fri, 27 Jan 2023 19:13:26 -0500
Subject: [PATCH 053/162] Adding 390 runner (#422)

* Adding 390 runner

* Adding token

* Fixing branch

* Flushing out big-endian tests.
---
 .github/workflows/s390x.yml         | 30 +++++++++++++++++++++++++
 README.md                           |  5 +++++
 benchmarks/real_bitmaps_benchmark.c |  2 +-
 include/roaring/misc/configreport.h |  8 +++++--
 include/roaring/portability.h       | 31 ++++++++++++++++++++++++++
 include/roaring/roaring.h           | 34 ++++++++++++++++++++++++++++-
 src/roaring_array.c                 |  4 +++-
 tests/c_example1.c                  |  9 +++++++-
 tests/cpp_example2.cpp              |  5 ++++-
 tests/cpp_unit.cpp                  | 15 +++++++++++--
 tests/format_portability_unit.c     | 12 +++++++---
 tests/realdata_unit.c               |  5 +++++
 tests/robust_deserialization_unit.c |  6 ++++-
 tests/toplevel_unit.c               | 10 +++++++++
 14 files changed, 163 insertions(+), 13 deletions(-)
 create mode 100644 .github/workflows/s390x.yml

diff --git a/.github/workflows/s390x.yml b/.github/workflows/s390x.yml
new file mode 100644
index 000000000..d49858218
--- /dev/null
+++ b/.github/workflows/s390x.yml
@@ -0,0 +1,30 @@
+name: Ubuntu s390x (GCC 11)
+
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    branches:
+      - master
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: uraimo/run-on-arch-action@v2
+        name: Test
+        id: runcmd
+        with:
+          arch: s390x
+          githubToken: ${{ github.token }}
+          distro: ubuntu_latest
+          install: |
+            apt-get update -q -y
+            apt-get install -y cmake make g++
+          run: |
+            cmake -DCMAKE_BUILD_TYPE=Release -B build
+            cmake --build build -j=2
+            ctest --output-on-failure --test-dir build
+
diff --git a/README.md b/README.md
index 41aab9b00..30488508a 100644
--- a/README.md
+++ b/README.md
@@ -54,6 +54,11 @@ of the latest hardware. Roaring bitmaps are already available on a variety of pl
 - CMake (to contribute to the project, users can rely on amalgamation/unity builds if they do not wish to use CMake).
 - Under x64 systems, the library provides runtime dispatch so that optimized functions are called based on the detected CPU features. It works with GCC, clang (version 9 and up) and Visual Studio (2017 and up). Other systems (e.g., ARM) do not need runtime dispatch.
 
+Hardly anyone has access to an actual big-endian system. Nevertheless,
+We support big-endian systems such as IBM s390x through emulators---except for
+IO serialization which is only supported on little-endian systems (see [issue 423](https://github.com/RoaringBitmap/CRoaring/issues/423)).
+
+
 # Using as a CMake dependency
 
 If you like CMake, you can just a few lines in you `CMakeLists.txt` file to grab a `CRoaring` release. [See our demonstration for further details](https://github.com/RoaringBitmap/croaring_cmake_demo_single_file).
diff --git a/benchmarks/real_bitmaps_benchmark.c b/benchmarks/real_bitmaps_benchmark.c
index 887c9df74..7d8c4847e 100644
--- a/benchmarks/real_bitmaps_benchmark.c
+++ b/benchmarks/real_bitmaps_benchmark.c
@@ -196,7 +196,7 @@ int main(int argc, char **argv) {
         portable_frozen_cycle_count += cycles_final - cycles_start;
 
         RDTSC_START(cycles_start);
-        roaring_bitmap_t *r3 = roaring_bitmap_frozen_view(frozen_buf, frozen_size);
+        const roaring_bitmap_t *r3 = roaring_bitmap_frozen_view(frozen_buf, frozen_size);
         RDTSC_FINAL(cycles_final);
         frozen_cycle_count += cycles_final - cycles_start;
 
diff --git a/include/roaring/misc/configreport.h b/include/roaring/misc/configreport.h
index 05b214505..a6a64285e 100644
--- a/include/roaring/misc/configreport.h
+++ b/include/roaring/misc/configreport.h
@@ -11,7 +11,6 @@
 #include <stdio.h>
 
 #include <roaring/portability.h>
-
 #ifdef __cplusplus
 extern "C" { namespace roaring { namespace misc {
 #endif
@@ -118,8 +117,10 @@ static inline const char *guessprocessor() {
 }
 
 static inline void tellmeall() {
+#if CROARING_IS_BIG_ENDIAN
+    printf("big-endian system detected\n"));
+#endif
     printf("x64 processor:  %s\t", guessprocessor());
-
 #ifdef __VERSION__
     printf(" compiler version: %s\t", __VERSION__);
 #endif
@@ -170,6 +171,9 @@ static inline void tellmeall() {
 #else
 
 static inline void tellmeall() {
+#if CROARING_IS_BIG_ENDIAN
+    printf("big-endian system detected\n");
+#endif
     printf("Non-X64  processor\n");
 #ifdef __arm__
     printf("ARM processor detected\n");
diff --git a/include/roaring/portability.h b/include/roaring/portability.h
index 558cc7582..15577c84a 100644
--- a/include/roaring/portability.h
+++ b/include/roaring/portability.h
@@ -345,6 +345,37 @@ static inline int hamming(uint64_t x) {
 #define ALLOW_UNALIGNED
 #endif
 
+#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__)
+ #define CROARING_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+ #elif defined(_WIN32)
+ #define CROARING_IS_BIG_ENDIAN 0
+ #else
+ #if defined(__APPLE__) || defined(__FreeBSD__) // defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__
+ #include <machine/endian.h>
+ #elif defined(sun) || defined(__sun) // defined(__APPLE__) || defined(__FreeBSD__)
+ #include <sys/byteorder.h>
+ #else  // defined(__APPLE__) || defined(__FreeBSD__)
+
+ #ifdef __has_include
+ #if __has_include(<endian.h>)
+ #include <endian.h>
+ #endif //__has_include(<endian.h>)
+ #endif //__has_include
+
+ #endif // defined(__APPLE__) || defined(__FreeBSD__)
+
+
+ #ifndef !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__)
+ #define CROARING_IS_BIG_ENDIAN 0
+ #endif
+
+ #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ #define CROARING_IS_BIG_ENDIAN 0
+ #else // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ #define CROARING_IS_BIG_ENDIAN 1
+ #endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#endif
+
 // We need portability.h to be included first,
 // but we also always want isadetection.h to be
 // included (right after).
diff --git a/include/roaring/roaring.h b/include/roaring/roaring.h
index 090d63aac..7b52da6d6 100644
--- a/include/roaring/roaring.h
+++ b/include/roaring/roaring.h
@@ -479,6 +479,10 @@ size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r);
  * more space efficient than the portable form, e.g. when the data is sparse.
  *
  * Returns how many bytes written, should be `roaring_bitmap_size_in_bytes(r)`.
+ *
+ * This function is endian-sensitive: big-endian systems will not be able toCROARING_
+ * reading from big-endian systems, etc. We assume that you have a little-endian
+ * system.
  */
 size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf);
 
@@ -486,7 +490,11 @@ size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf);
  * Use with `roaring_bitmap_serialize()`.
  *
  * (See `roaring_bitmap_portable_deserialize()` if you want a format that's
- * compatible with Java and Go implementations)
+ * compatible with Java and Go implementations).
+ *
+ * This function is endian-sensitive: big-endian systems will not be able toCROARING_
+ * reading from big-endian systems, etc. We assume that you have a little-endian
+ * system.
  */
 roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf);
 
@@ -506,6 +514,10 @@ size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *r);
  *
  * This is meant to be compatible with the Java and Go versions:
  * https://github.com/RoaringBitmap/RoaringFormatSpec
+*
+ * This function is endian-sensitive: big-endian systems will not be able toCROARING_
+ * reading from big-endian systems, etc. We assume that you have a little-endian
+ * system.
  */
 roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf);
 
@@ -515,6 +527,10 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf);
  *
  * This is meant to be compatible with the Java and Go versions:
  * https://github.com/RoaringBitmap/RoaringFormatSpec
+ *
+ * This function is endian-sensitive: big-endian systems will not be able toCROARING_
+ * reading from big-endian systems, etc. We assume that you have a little-endian
+ * system.
  */
 roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf,
                                                            size_t maxbytes);
@@ -534,6 +550,10 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf,
  *
  * This is meant to be compatible with the Java and Go versions:
  * https://github.com/RoaringBitmap/RoaringFormatSpec
+ *
+ * This function is endian-sensitive: big-endian systems will not be able toCROARING_
+ * reading from big-endian systems, etc. We assume that you have a little-endian
+ * system.
  */
 roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf);
 
@@ -564,6 +584,10 @@ size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r);
  *
  * This is meant to be compatible with the Java and Go versions:
  * https://github.com/RoaringBitmap/RoaringFormatSpec
+ *
+ * This function is endian-sensitive: big-endian systems will not be able toCROARING_
+ * reading from big-endian systems, etc. We assume that you have a little-endian
+ * system.
  */
 size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, char *buf);
 
@@ -594,6 +618,10 @@ size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *r);
 /**
  * Serializes bitmap using frozen format.
  * Buffer size must be at least roaring_bitmap_frozen_size_in_bytes().
+ *
+ * This function is endian-sensitive: big-endian systems will not be able toCROARING_
+ * reading from big-endian systems, etc. We assume that you have a little-endian
+ * system.
  */
 void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *r, char *buf);
 
@@ -607,6 +635,10 @@ void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *r, char *buf);
  * Bitmap returned by this function can be used in all readonly contexts.
  * Bitmap must be freed as usual, by calling roaring_bitmap_free().
  * Underlying buffer must not be freed or modified while it backs any bitmaps.
+ *
+ * This function is endian-sensitive: big-endian systems will not be able toCROARING_
+ * reading from big-endian systems, etc. We assume that you have a little-endian
+ * system.
  */
 const roaring_bitmap_t *roaring_bitmap_frozen_view(const char *buf,
                                                    size_t length);
diff --git a/src/roaring_array.c b/src/roaring_array.c
index 5151e7062..bfcf6f7b2 100644
--- a/src/roaring_array.c
+++ b/src/roaring_array.c
@@ -542,6 +542,7 @@ size_t ra_portable_size_in_bytes(const roaring_array_t *ra) {
     return count;
 }
 
+// This function is endian-sensitive: big-endian systems will not be able to reading from big-endian systems, etc.
 size_t ra_portable_serialize(const roaring_array_t *ra, char *buf) {
     char *initbuf = buf;
     uint32_t startOffset = 0;
@@ -690,10 +691,11 @@ size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes) {
     return bytestotal;
 }
 
-
 // this function populates answer from the content of buf (reading up to maxbytes bytes).
 // The function returns false if a properly serialized bitmap cannot be found.
 // if it returns true, readbytes is populated by how many bytes were read, we have that *readbytes <= maxbytes.
+//
+// This function is endian-sensitive: big-endian systems will not be able to reading from big-endian systems, etc.
 bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const size_t maxbytes, size_t * readbytes) {
     *readbytes = sizeof(int32_t);// for cookie
     if(*readbytes > maxbytes) {
diff --git a/tests/c_example1.c b/tests/c_example1.c
index a0d3480af..0765c1b22 100644
--- a/tests/c_example1.c
+++ b/tests/c_example1.c
@@ -1,4 +1,5 @@
 #include <roaring/roaring.h>
+#include <roaring/misc/configreport.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <assert.h>
@@ -10,6 +11,7 @@ bool roaring_iterator_sumall(uint32_t value, void *param) {
 }
 
 int main() {
+    tellmeall();
     // create a new empty bitmap
     roaring_bitmap_t *r1 = roaring_bitmap_create();
     // then we can add values
@@ -27,7 +29,11 @@ int main() {
     uint32_t expectedsizerun = roaring_bitmap_portable_size_in_bytes(r1);
     printf("size before run optimize %d bytes, and after %d bytes\n",
            expectedsizebasic, expectedsizerun);
-
+#if CROARING_IS_BIG_ENDIAN
+    printf("we omit serialization tests because you have a big endian system.\n");
+    roaring_bitmap_free(r1);
+    return EXIT_SUCCESS;
+#else
     // create a new bitmap containing the values {1,2,3,5,6}
     roaring_bitmap_t *r2 = roaring_bitmap_of(5, 1, 2, 3, 5, 6);
     roaring_bitmap_printf(r2);  // print it
@@ -137,4 +143,5 @@ int main() {
     roaring_bitmap_free(r2);
     roaring_bitmap_free(r3);
     return EXIT_SUCCESS;
+#endif
 }
\ No newline at end of file
diff --git a/tests/cpp_example2.cpp b/tests/cpp_example2.cpp
index bea031345..8e2b3cff6 100644
--- a/tests/cpp_example2.cpp
+++ b/tests/cpp_example2.cpp
@@ -66,6 +66,9 @@ int main() {
     // we can compute intersection two-by-two
     Roaring i1_2 = r1 & r2;
 
+#if CROARING_IS_BIG_ENDIAN
+    printf("We omit serialization tests because you have a big endian system.\n");
+#else
     // we can write a bitmap to a pointer and recover it later
     uint32_t expectedsize = r1.getSizeInBytes();
     char *serializedbytes = new char[expectedsize];
@@ -89,7 +92,7 @@ int main() {
         ++counter;
     }
     // counter == t.cardinality()
-
+#endif
     // we can move iterators to skip values
     const uint32_t manyvalues[] = {2, 3, 4, 7, 8};
     Roaring rogue(5, manyvalues);
diff --git a/tests/cpp_unit.cpp b/tests/cpp_unit.cpp
index 139249c4b..b1b50ac58 100644
--- a/tests/cpp_unit.cpp
+++ b/tests/cpp_unit.cpp
@@ -673,9 +673,11 @@ DEFINE_TEST(test_example_cpp_true) { test_example_cpp(true); }
 
 DEFINE_TEST(test_example_cpp_false) { test_example_cpp(false); }
 
+#if !CROARING_IS_BIG_ENDIAN
 DEFINE_TEST(test_example_cpp_64_true) { test_example_cpp_64(true); }
 
 DEFINE_TEST(test_example_cpp_64_false) { test_example_cpp_64(false); }
+#endif
 
 DEFINE_TEST(test_run_compression_cpp_64_true) {
     test_run_compression_cpp_64(true);
@@ -1868,6 +1870,10 @@ DEFINE_TEST(test_cpp_remove_run_compression) {
 
 // Returns true on success, false on exception.
 bool test64Deserialize(const std::string& filename) {
+#if CROARING_IS_BIG_ENDIAN
+    (void)filename;
+    printf("Big-endian IO unsupported.\n");
+#else // CROARING_IS_BIG_ENDIAN
     std::ifstream in(TEST_DATA_DIR + filename, std::ios::binary);
     std::vector<char> buf1(std::istreambuf_iterator<char>(in), {});
     printf("Reading %lu bytes\n", (unsigned long)buf1.size());
@@ -1878,15 +1884,16 @@ bool test64Deserialize(const std::string& filename) {
     } catch (...) {
         return false;
     }
-#else
+#else // ROARING_EXCEPTIONS
     roaring = Roaring64Map::readSafe(buf1.data(), buf1.size());
-#endif
+#endif // ROARING_EXCEPTIONS
     std::vector<char> buf2(roaring.getSizeInBytes());
     assert_true(buf1.size() == buf2.size());
     assert_true(roaring.write(buf2.data()) == buf2.size());
     for (size_t i = 0; i < buf1.size(); ++i) {
         assert_true(buf1[i] == buf2[i]);
     }
+#endif // CROARING_IS_BIG_ENDIAN
     return true;
 }
 
@@ -1940,12 +1947,14 @@ int main() {
         cmocka_unit_test(test_bitmap_of_32),
         cmocka_unit_test(test_bitmap_of_64),
         cmocka_unit_test(serial_test),
+#if !CROARING_IS_BIG_ENDIAN
         cmocka_unit_test(test_example_true),
         cmocka_unit_test(test_example_false),
         cmocka_unit_test(test_example_cpp_true),
         cmocka_unit_test(test_example_cpp_false),
         cmocka_unit_test(test_example_cpp_64_true),
         cmocka_unit_test(test_example_cpp_64_false),
+#endif
         cmocka_unit_test(test_cpp_add_remove_checked),
         cmocka_unit_test(test_cpp_add_remove_checked_64),
         cmocka_unit_test(test_cpp_add_range),
@@ -1980,6 +1989,7 @@ int main() {
         cmocka_unit_test(test_cpp_flip_64),
         cmocka_unit_test(test_cpp_flip_closed_64),
         cmocka_unit_test(test_combinatoric_flip_many_64),
+#if !CROARING_IS_BIG_ENDIAN
         cmocka_unit_test(test_cpp_deserialize_64_empty),
         cmocka_unit_test(test_cpp_deserialize_64_32bit_vals),
         cmocka_unit_test(test_cpp_deserialize_64_spread_vals),
@@ -1990,6 +2000,7 @@ int main() {
         cmocka_unit_test(test_cpp_deserialize_64_invalid_size),
         cmocka_unit_test(test_cpp_deserialize_64_key_too_small),
 #endif
+#endif // !CROARING_IS_BIG_ENDIAN
         cmocka_unit_test(issue316),
         cmocka_unit_test(test_issue304),
         cmocka_unit_test(issue_336),
diff --git a/tests/format_portability_unit.c b/tests/format_portability_unit.c
index a08584688..a823a9b64 100644
--- a/tests/format_portability_unit.c
+++ b/tests/format_portability_unit.c
@@ -74,7 +74,9 @@ void test_deserialize(char* filename) {
     free(input_buffer);
     roaring_bitmap_free(bitmap);
 }
-
+#if CROARING_IS_BIG_ENDIAN
+// port the test below.
+#else
 DEFINE_TEST(test_deserialize_portable_norun) {
     char filename[1024];
 
@@ -92,14 +94,18 @@ DEFINE_TEST(test_deserialize_portable_wrun) {
 
     test_deserialize(filename);
 }
+#endif
 
 int main() {
     tellmeall();
-
+#if CROARING_IS_BIG_ENDIAN
+    printf("Big-endian IO unsupported.\n");
+    return EXIT_SUCCESS;
+#else
     const struct CMUnitTest tests[] = {
         cmocka_unit_test(test_deserialize_portable_norun),
         cmocka_unit_test(test_deserialize_portable_wrun),
     };
-
     return cmocka_run_group_tests(tests, NULL, NULL);
+#endif 
 }
diff --git a/tests/realdata_unit.c b/tests/realdata_unit.c
index 1d37a3d18..e6e1c7388 100644
--- a/tests/realdata_unit.c
+++ b/tests/realdata_unit.c
@@ -44,6 +44,10 @@ const char *datadir[] = {
     "weather_sept_85_srt", "wikileaks-noquotes", "wikileaks-noquotes_srt"};
 
 bool serialize_correctly(roaring_bitmap_t *r) {
+#if CROARING_IS_BIG_ENDIAN
+    (void)r;
+    return r;
+#else
     uint32_t expectedsize = roaring_bitmap_portable_size_in_bytes(r);
     char *serialized = (char*)malloc(expectedsize);
     if (serialized == NULL) {
@@ -70,6 +74,7 @@ bool serialize_correctly(roaring_bitmap_t *r) {
     }
     roaring_bitmap_free(r2);
     return true;
+#endif
 }
 
 // arrays expected to both be sorted.
diff --git a/tests/robust_deserialization_unit.c b/tests/robust_deserialization_unit.c
index ee6750296..24467a99b 100644
--- a/tests/robust_deserialization_unit.c
+++ b/tests/robust_deserialization_unit.c
@@ -165,7 +165,10 @@ DEFINE_TEST(test_robust_deserialize7) {
 
 int main() {
     tellmeall();
-
+#if CROARING_IS_BIG_ENDIAN
+    printf("Big-endian IO unsupported.\n");
+    return EXIT_SUCCESS;
+#else
     const struct CMUnitTest tests[] = {
         cmocka_unit_test(test_robust_deserialize1),
         cmocka_unit_test(test_robust_deserialize2),
@@ -177,4 +180,5 @@ int main() {
      };
 
     return cmocka_run_group_tests(tests, NULL, NULL);
+#endif
 }
diff --git a/tests/toplevel_unit.c b/tests/toplevel_unit.c
index 4f129870d..1a1e8ab17 100644
--- a/tests/toplevel_unit.c
+++ b/tests/toplevel_unit.c
@@ -163,6 +163,7 @@ bool check_serialization(roaring_bitmap_t *bitmap) {
     return ret;
 }
 
+#if !CROARING_IS_BIG_ENDIAN
 DEFINE_TEST(issue245) {
     roaring_bitmap_t *bitmap = roaring_bitmap_create();
     const uint32_t targetEntries = 2048;
@@ -191,6 +192,7 @@ DEFINE_TEST(issue245) {
     }
     roaring_bitmap_free(bitmap);
 }
+#endif
 
 DEFINE_TEST(issue208) {
     roaring_bitmap_t *r = roaring_bitmap_create();
@@ -4346,7 +4348,9 @@ int main() {
         cmocka_unit_test(test_contains_range_PyRoaringBitMap_issue81),
         cmocka_unit_test(issue316),
         cmocka_unit_test(issue288),
+#if !CROARING_IS_BIG_ENDIAN
         cmocka_unit_test(issue245),
+#endif
         cmocka_unit_test(issue208),
         cmocka_unit_test(issue208b),
         cmocka_unit_test(range_contains),
@@ -4364,8 +4368,10 @@ int main() {
         cmocka_unit_test(test_stress_memory_false),
         cmocka_unit_test(check_interval),
         cmocka_unit_test(test_uint32_iterator_true),
+#if !CROARING_IS_BIG_ENDIAN
         cmocka_unit_test(test_example_true),
         cmocka_unit_test(test_example_false),
+#endif
         cmocka_unit_test(test_clear),
         cmocka_unit_test(can_copy_empty_true),
         cmocka_unit_test(can_copy_empty_false),
@@ -4395,8 +4401,10 @@ int main() {
         cmocka_unit_test(test_iterate_empty),
         cmocka_unit_test(test_iterate_withbitmap),
         cmocka_unit_test(test_iterate_withrun),
+#if !CROARING_IS_BIG_ENDIAN
         cmocka_unit_test(test_serialize),
         cmocka_unit_test(test_portable_serialize),
+#endif
         cmocka_unit_test(test_add),
         cmocka_unit_test(test_add_checked),
         cmocka_unit_test(test_remove_checked),
@@ -4469,9 +4477,11 @@ int main() {
         cmocka_unit_test(test_remove_range),
         cmocka_unit_test(test_remove_many),
         cmocka_unit_test(test_range_cardinality),
+#if !CROARING_IS_BIG_ENDIAN
         cmocka_unit_test(test_frozen_serialization),
         cmocka_unit_test(test_frozen_serialization_max_containers),
         cmocka_unit_test(test_portable_deserialize_frozen),
+#endif
     };
 
     return cmocka_run_group_tests(tests, NULL, NULL);

From 407467cc1eee278950b1b507223a0651991666cf Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Sat, 28 Jan 2023 20:46:38 -0500
Subject: [PATCH 054/162] Dlemire/390 (#424)

* Adding 390 runner

* Adding token

* Fixing branch

* Flushing out big-endian tests.

* Minor fixes.
---
 src/containers/run.c | 7 +++++--
 tests/c_example1.c   | 9 ++-------
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/src/containers/run.c b/src/containers/run.c
index bde8a5371..a7e4ab0b7 100644
--- a/src/containers/run.c
+++ b/src/containers/run.c
@@ -675,7 +675,8 @@ void run_container_printf_as_uint32_array(const run_container_t *cont,
 }
 
 int32_t run_container_write(const run_container_t *container, char *buf) {
-    memcpy(buf, &container->n_runs, sizeof(uint16_t));
+    uint16_t cast_16 = container->n_runs;
+    memcpy(buf, &cast_16, sizeof(uint16_t));
     memcpy(buf + sizeof(uint16_t), container->runs,
            container->n_runs * sizeof(rle16_t));
     return run_container_size_in_bytes(container);
@@ -684,7 +685,9 @@ int32_t run_container_write(const run_container_t *container, char *buf) {
 int32_t run_container_read(int32_t cardinality, run_container_t *container,
                            const char *buf) {
     (void)cardinality;
-    memcpy(&container->n_runs, buf, sizeof(uint16_t));
+    uint16_t cast_16;
+    memcpy(&cast_16, buf, sizeof(uint16_t));
+    container->n_runs = cast_16;
     if (container->n_runs > container->capacity)
         run_container_grow(container, container->n_runs, false);
     if(container->n_runs > 0) {
diff --git a/tests/c_example1.c b/tests/c_example1.c
index 0765c1b22..7e92065f2 100644
--- a/tests/c_example1.c
+++ b/tests/c_example1.c
@@ -29,11 +29,6 @@ int main() {
     uint32_t expectedsizerun = roaring_bitmap_portable_size_in_bytes(r1);
     printf("size before run optimize %d bytes, and after %d bytes\n",
            expectedsizebasic, expectedsizerun);
-#if CROARING_IS_BIG_ENDIAN
-    printf("we omit serialization tests because you have a big endian system.\n");
-    roaring_bitmap_free(r1);
-    return EXIT_SUCCESS;
-#else
     // create a new bitmap containing the values {1,2,3,5,6}
     roaring_bitmap_t *r2 = roaring_bitmap_of(5, 1, 2, 3, 5, 6);
     roaring_bitmap_printf(r2);  // print it
@@ -98,7 +93,7 @@ int main() {
     // reading it
     size_t sizeofbitmap =
         roaring_bitmap_portable_deserialize_size(serializedbytes, expectedsize);
-    printf("sizeofbitmap = %zu \n", sizeofbitmap);
+    printf("\nsizeofbitmap = %zu \n", sizeofbitmap);
     assert_true(sizeofbitmap ==
            expectedsize);  // sizeofbitmap would be zero if no bitmap were found
     // we can also read the bitmap "safely" by specifying a byte size limit:
@@ -142,6 +137,6 @@ int main() {
     roaring_bitmap_free(r1);
     roaring_bitmap_free(r2);
     roaring_bitmap_free(r3);
+    printf("Success.\n");
     return EXIT_SUCCESS;
-#endif
 }
\ No newline at end of file

From 51519e5f4115b90f46d13754e6ebc66c9f7a3b69 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Mon, 30 Jan 2023 08:48:39 -0500
Subject: [PATCH 055/162] Correcting bad comments...

---
 include/roaring/roaring.h | 40 ++++++++++++++++-----------------------
 src/roaring_array.c       |  4 ++--
 2 files changed, 18 insertions(+), 26 deletions(-)

diff --git a/include/roaring/roaring.h b/include/roaring/roaring.h
index 7b52da6d6..e4a732136 100644
--- a/include/roaring/roaring.h
+++ b/include/roaring/roaring.h
@@ -480,9 +480,8 @@ size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r);
  *
  * Returns how many bytes written, should be `roaring_bitmap_size_in_bytes(r)`.
  *
- * This function is endian-sensitive: big-endian systems will not be able toCROARING_
- * reading from big-endian systems, etc. We assume that you have a little-endian
- * system.
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
+ * the data format is going to be big-endian and not compatible with little-endian systems.
  */
 size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf);
 
@@ -492,9 +491,8 @@ size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf);
  * (See `roaring_bitmap_portable_deserialize()` if you want a format that's
  * compatible with Java and Go implementations).
  *
- * This function is endian-sensitive: big-endian systems will not be able toCROARING_
- * reading from big-endian systems, etc. We assume that you have a little-endian
- * system.
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
+ * the data format is going to be big-endian and not compatible with little-endian systems.
  */
 roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf);
 
@@ -515,9 +513,8 @@ size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *r);
  * This is meant to be compatible with the Java and Go versions:
  * https://github.com/RoaringBitmap/RoaringFormatSpec
 *
- * This function is endian-sensitive: big-endian systems will not be able toCROARING_
- * reading from big-endian systems, etc. We assume that you have a little-endian
- * system.
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
+ * the data format is going to be big-endian and not compatible with little-endian systems.
  */
 roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf);
 
@@ -528,9 +525,8 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf);
  * This is meant to be compatible with the Java and Go versions:
  * https://github.com/RoaringBitmap/RoaringFormatSpec
  *
- * This function is endian-sensitive: big-endian systems will not be able toCROARING_
- * reading from big-endian systems, etc. We assume that you have a little-endian
- * system.
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
+ * the data format is going to be big-endian and not compatible with little-endian systems.
  */
 roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf,
                                                            size_t maxbytes);
@@ -551,9 +547,8 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf,
  * This is meant to be compatible with the Java and Go versions:
  * https://github.com/RoaringBitmap/RoaringFormatSpec
  *
- * This function is endian-sensitive: big-endian systems will not be able toCROARING_
- * reading from big-endian systems, etc. We assume that you have a little-endian
- * system.
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
+ * the data format is going to be big-endian and not compatible with little-endian systems.
  */
 roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf);
 
@@ -585,9 +580,8 @@ size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r);
  * This is meant to be compatible with the Java and Go versions:
  * https://github.com/RoaringBitmap/RoaringFormatSpec
  *
- * This function is endian-sensitive: big-endian systems will not be able toCROARING_
- * reading from big-endian systems, etc. We assume that you have a little-endian
- * system.
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
+ * the data format is going to be big-endian and not compatible with little-endian systems.
  */
 size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, char *buf);
 
@@ -619,9 +613,8 @@ size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *r);
  * Serializes bitmap using frozen format.
  * Buffer size must be at least roaring_bitmap_frozen_size_in_bytes().
  *
- * This function is endian-sensitive: big-endian systems will not be able toCROARING_
- * reading from big-endian systems, etc. We assume that you have a little-endian
- * system.
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
+ * the data format is going to be big-endian and not compatible with little-endian systems.
  */
 void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *r, char *buf);
 
@@ -636,9 +629,8 @@ void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *r, char *buf);
  * Bitmap must be freed as usual, by calling roaring_bitmap_free().
  * Underlying buffer must not be freed or modified while it backs any bitmaps.
  *
- * This function is endian-sensitive: big-endian systems will not be able toCROARING_
- * reading from big-endian systems, etc. We assume that you have a little-endian
- * system.
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
+ * the data format is going to be big-endian and not compatible with little-endian systems.
  */
 const roaring_bitmap_t *roaring_bitmap_frozen_view(const char *buf,
                                                    size_t length);
diff --git a/src/roaring_array.c b/src/roaring_array.c
index bfcf6f7b2..bff88f39b 100644
--- a/src/roaring_array.c
+++ b/src/roaring_array.c
@@ -542,7 +542,7 @@ size_t ra_portable_size_in_bytes(const roaring_array_t *ra) {
     return count;
 }
 
-// This function is endian-sensitive: big-endian systems will not be able to reading from big-endian systems, etc.
+// This function is endian-sensitive.
 size_t ra_portable_serialize(const roaring_array_t *ra, char *buf) {
     char *initbuf = buf;
     uint32_t startOffset = 0;
@@ -695,7 +695,7 @@ size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes) {
 // The function returns false if a properly serialized bitmap cannot be found.
 // if it returns true, readbytes is populated by how many bytes were read, we have that *readbytes <= maxbytes.
 //
-// This function is endian-sensitive: big-endian systems will not be able to reading from big-endian systems, etc.
+// This function is endian-sensitive.
 bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const size_t maxbytes, size_t * readbytes) {
     *readbytes = sizeof(int32_t);// for cookie
     if(*readbytes > maxbytes) {

From 9a9c3e979b19c3fbd7b8ab89589be6636c0fc5c6 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Mon, 30 Jan 2023 13:23:40 -0500
Subject: [PATCH 056/162] add doxygen documentation for the C++ code...
 automatically... (#425)

* Preparing an automated doxygen run.

* Minor update.
---
 .github/workflows/documentation.yml |   34 +
 doxygen                             | 2737 +++++++++++++++++++++++++++
 tools/prepare_doxygen.sh            |   22 +
 tools/release.py                    |    5 +
 4 files changed, 2798 insertions(+)
 create mode 100644 .github/workflows/documentation.yml
 create mode 100644 doxygen
 create mode 100755 tools/prepare_doxygen.sh

diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
new file mode 100644
index 000000000..6d59fa4c8
--- /dev/null
+++ b/.github/workflows/documentation.yml
@@ -0,0 +1,34 @@
+name: Doxygen GitHub Pages
+
+on:
+  push:
+    branches:
+      - main
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+
+permissions:
+  contents: write
+  pages: write
+  id-token: write
+
+jobs:
+  deploy:
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - name: Install Doxygen
+        run: sudo apt-get install doxygen graphviz -y
+      - run: mkdir docs
+      - name: Install theme
+        run: ./tools/prepare-doxygen.sh
+      - name: Generate Doxygen Documentation
+        run: doxygen ./doxygen
+      - name: Deploy to GitHub Pages
+        uses: peaceiris/actions-gh-pages@v3
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          publish_dir: docs/html
\ No newline at end of file
diff --git a/doxygen b/doxygen
new file mode 100644
index 000000000..768017761
--- /dev/null
+++ b/doxygen
@@ -0,0 +1,2737 @@
+# Doxyfile 1.9.6
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project.
+#
+# All text after a double hash (##) is considered a comment and is placed in
+# front of the TAG it is preceding.
+#
+# All text after a single hash (#) is considered a comment and will be ignored.
+# The format is:
+# TAG = value [value, ...]
+# For lists, items can also be appended using:
+# TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (\" \").
+#
+# Note:
+#
+# Use doxygen to compare the used configuration file with the template
+# configuration file:
+# doxygen -x [configFile]
+# Use doxygen to compare the used configuration file with the template
+# configuration file without replacing the environment variables or CMake type
+# replacement variables:
+# doxygen -x_noenv [configFile]
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# This tag specifies the encoding used for all characters in the configuration
+# file that follow. The default is UTF-8 which is also the encoding used for all
+# text before the first occurrence of this tag. Doxygen uses libiconv (or the
+# iconv built into libc) for the transcoding. See
+# https://www.gnu.org/software/libiconv/ for the list of possible encodings.
+# The default value is: UTF-8.
+
+DOXYFILE_ENCODING      = UTF-8
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by
+# double-quotes, unless you are using Doxywizard) that should identify the
+# project for which the documentation is generated. This name is used in the
+# title of most generated pages and in a few other places.
+# The default value is: My Project.
+
+PROJECT_NAME           = "CRoaring"
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
+# could be handy for archiving the generated documentation or if some version
+# control system is used.
+
+PROJECT_NUMBER         = "0.9.0"
+
+# Using the PROJECT_BRIEF tag one can provide an optional one line description
+# for a project that appears at the top of each page and should give viewer a
+# quick idea about the purpose of the project. Keep the description short.
+
+PROJECT_BRIEF          = "Roaring bitmaps in C (and C++)"
+
+# With the PROJECT_LOGO tag one can specify a logo or an icon that is included
+# in the documentation. The maximum height of the logo should not exceed 55
+# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy
+# the logo to the output directory.
+
+PROJECT_LOGO           =
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path
+# into which the generated documentation will be written. If a relative path is
+# entered, it will be relative to the location where doxygen was started. If
+# left blank the current directory will be used.
+
+OUTPUT_DIRECTORY       = "docs"
+
+# If the CREATE_SUBDIRS tag is set to YES then doxygen will create up to 4096
+# sub-directories (in 2 levels) under the output directory of each output format
+# and will distribute the generated files over these directories. Enabling this
+# option can be useful when feeding doxygen a huge amount of source files, where
+# putting all generated files in the same directory would otherwise causes
+# performance problems for the file system. Adapt CREATE_SUBDIRS_LEVEL to
+# control the number of sub-directories.
+# The default value is: NO.
+
+CREATE_SUBDIRS         = YES
+
+# Controls the number of sub-directories that will be created when
+# CREATE_SUBDIRS tag is set to YES. Level 0 represents 16 directories, and every
+# level increment doubles the number of directories, resulting in 4096
+# directories at level 8 which is the default and also the maximum value. The
+# sub-directories are organized in 2 levels, the first level always has a fixed
+# number of 16 directories.
+# Minimum value: 0, maximum value: 8, default value: 8.
+# This tag requires that the tag CREATE_SUBDIRS is set to YES.
+
+CREATE_SUBDIRS_LEVEL   = 8
+
+# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII
+# characters to appear in the names of generated files. If set to NO, non-ASCII
+# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode
+# U+3044.
+# The default value is: NO.
+
+ALLOW_UNICODE_NAMES    = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Bulgarian,
+# Catalan, Chinese, Chinese-Traditional, Croatian, Czech, Danish, Dutch, English
+# (United States), Esperanto, Farsi (Persian), Finnish, French, German, Greek,
+# Hindi, Hungarian, Indonesian, Italian, Japanese, Japanese-en (Japanese with
+# English messages), Korean, Korean-en (Korean with English messages), Latvian,
+# Lithuanian, Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese,
+# Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish,
+# Swedish, Turkish, Ukrainian and Vietnamese.
+# The default value is: English.
+
+OUTPUT_LANGUAGE        = English
+
+# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member
+# descriptions after the members that are listed in the file and class
+# documentation (similar to Javadoc). Set to NO to disable this.
+# The default value is: YES.
+
+BRIEF_MEMBER_DESC      = YES
+
+# If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief
+# description of a member or function before the detailed description
+#
+# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+# The default value is: YES.
+
+REPEAT_BRIEF           = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator that is
+# used to form the text in various listings. Each string in this list, if found
+# as the leading text of the brief description, will be stripped from the text
+# and the result, after processing the whole list, is used as the annotated
+# text. Otherwise, the brief description is used as-is. If left blank, the
+# following values are used ($name is automatically replaced with the name of
+# the entity):The $name class, The $name widget, The $name file, is, provides,
+# specifies, contains, represents, a, an and the.
+
+ABBREVIATE_BRIEF       = "The $name class" \
+                         "The $name widget" \
+                         "The $name file" \
+                         is \
+                         provides \
+                         specifies \
+                         contains \
+                         represents \
+                         a \
+                         an \
+                         the
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# doxygen will generate a detailed section even if there is only a brief
+# description.
+# The default value is: NO.
+
+ALWAYS_DETAILED_SEC    = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
+# inherited members of a class in the documentation of that class as if those
+# members were ordinary class members. Constructors, destructors and assignment
+# operators of the base classes will not be shown.
+# The default value is: NO.
+
+INLINE_INHERITED_MEMB  = NO
+
+# If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path
+# before files name in the file list and in the header files. If set to NO the
+# shortest path that makes the file name unique will be used
+# The default value is: YES.
+
+FULL_PATH_NAMES        = YES
+
+# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path.
+# Stripping is only done if one of the specified strings matches the left-hand
+# part of the path. The tag can be used to show relative paths in the file list.
+# If left blank the directory from which doxygen is run is used as the path to
+# strip.
+#
+# Note that you can specify absolute paths here, but also relative paths, which
+# will be relative from the directory where doxygen is started.
+# This tag requires that the tag FULL_PATH_NAMES is set to YES.
+
+STRIP_FROM_PATH        =
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the
+# path mentioned in the documentation of a class, which tells the reader which
+# header file to include in order to use a class. If left blank only the name of
+# the header file containing the class definition is used. Otherwise one should
+# specify the list of include paths that are normally passed to the compiler
+# using the -I flag.
+
+STRIP_FROM_INC_PATH    =
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but
+# less readable) file names. This can be useful is your file systems doesn't
+# support long names like on DOS, Mac, or CD-ROM.
+# The default value is: NO.
+
+SHORT_NAMES            = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the
+# first line (until the first dot) of a Javadoc-style comment as the brief
+# description. If set to NO, the Javadoc-style will behave just like regular Qt-
+# style comments (thus requiring an explicit @brief command for a brief
+# description.)
+# The default value is: NO.
+
+JAVADOC_AUTOBRIEF      = NO
+
+# If the JAVADOC_BANNER tag is set to YES then doxygen will interpret a line
+# such as
+# /***************
+# as being the beginning of a Javadoc-style comment "banner". If set to NO, the
+# Javadoc-style will behave just like regular comments and it will not be
+# interpreted by doxygen.
+# The default value is: NO.
+
+JAVADOC_BANNER         = YES
+
+# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first
+# line (until the first dot) of a Qt-style comment as the brief description. If
+# set to NO, the Qt-style will behave just like regular Qt-style comments (thus
+# requiring an explicit \brief command for a brief description.)
+# The default value is: NO.
+
+QT_AUTOBRIEF           = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a
+# multi-line C++ special comment block (i.e. a block of //! or /// comments) as
+# a brief description. This used to be the default behavior. The new default is
+# to treat a multi-line C++ comment block as a detailed description. Set this
+# tag to YES if you prefer the old behavior instead.
+#
+# Note that setting this tag to YES also means that rational rose comments are
+# not recognized any more.
+# The default value is: NO.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# By default Python docstrings are displayed as preformatted text and doxygen's
+# special commands cannot be used. By setting PYTHON_DOCSTRING to NO the
+# doxygen's special commands can be used and the contents of the docstring
+# documentation blocks is shown as doxygen documentation.
+# The default value is: YES.
+
+PYTHON_DOCSTRING       = YES
+
+# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the
+# documentation from any documented member that it re-implements.
+# The default value is: YES.
+
+INHERIT_DOCS           = YES
+
+# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new
+# page for each member. If set to NO, the documentation of a member will be part
+# of the file/class/namespace that contains it.
+# The default value is: NO.
+
+SEPARATE_MEMBER_PAGES  = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen
+# uses this value to replace tabs by spaces in code fragments.
+# Minimum value: 1, maximum value: 16, default value: 4.
+
+TAB_SIZE               = 2
+
+# This tag can be used to specify a number of aliases that act as commands in
+# the documentation. An alias has the form:
+# name=value
+# For example adding
+# "sideeffect=@par Side Effects:^^"
+# will allow you to put the command \sideeffect (or @sideeffect) in the
+# documentation, which will result in a user-defined paragraph with heading
+# "Side Effects:". Note that you cannot put \n's in the value part of an alias
+# to insert newlines (in the resulting output). You can put ^^ in the value part
+# of an alias to insert a newline as if a physical newline was in the original
+# file. When you need a literal { or } or , in the value part of an alias you
+# have to escape them by means of a backslash (\), this can lead to conflicts
+# with the commands \{ and \} for these it is advised to use the version @{ and
+# @} or use a double escape (\\{ and \\})
+
+ALIASES                =
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
+# only. Doxygen will then generate output that is more tailored for C. For
+# instance, some of the names that are used will be different. The list of all
+# members will be omitted, etc.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_FOR_C  = NO
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or
+# Python sources only. Doxygen will then generate output that is more tailored
+# for that language. For instance, namespaces will be presented as packages,
+# qualified scopes will look different, etc.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_JAVA   = NO
+
+# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
+# sources. Doxygen will then generate output that is tailored for Fortran.
+# The default value is: NO.
+
+OPTIMIZE_FOR_FORTRAN   = NO
+
+# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
+# sources. Doxygen will then generate output that is tailored for VHDL.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_VHDL   = NO
+
+# Set the OPTIMIZE_OUTPUT_SLICE tag to YES if your project consists of Slice
+# sources only. Doxygen will then generate output that is more tailored for that
+# language. For instance, namespaces will be presented as modules, types will be
+# separated into more groups, etc.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_SLICE  = NO
+
+# Doxygen selects the parser to use depending on the extension of the files it
+# parses. With this tag you can assign which parser to use for a given
+# extension. Doxygen has a built-in mapping, but you can override or extend it
+# using this tag. The format is ext=language, where ext is a file extension, and
+# language is one of the parsers supported by doxygen: IDL, Java, JavaScript,
+# Csharp (C#), C, C++, Lex, D, PHP, md (Markdown), Objective-C, Python, Slice,
+# VHDL, Fortran (fixed format Fortran: FortranFixed, free formatted Fortran:
+# FortranFree, unknown formatted Fortran: Fortran. In the later case the parser
+# tries to guess whether the code is fixed or free formatted code, this is the
+# default for Fortran type files). For instance to make doxygen treat .inc files
+# as Fortran files (default is PHP), and .f files as C (default is Fortran),
+# use: inc=Fortran f=C.
+#
+# Note: For files without extension you can use no_extension as a placeholder.
+#
+# Note that for custom extensions you also need to set FILE_PATTERNS otherwise
+# the files are not read by doxygen. When specifying no_extension you should add
+# * to the FILE_PATTERNS.
+#
+# Note see also the list of default file extension mappings.
+
+EXTENSION_MAPPING      =
+
+# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments
+# according to the Markdown format, which allows for more readable
+# documentation. See https://daringfireball.net/projects/markdown/ for details.
+# The output of markdown processing is further processed by doxygen, so you can
+# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in
+# case of backward compatibilities issues.
+# The default value is: YES.
+
+MARKDOWN_SUPPORT       = YES
+
+# When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up
+# to that level are automatically included in the table of contents, even if
+# they do not have an id attribute.
+# Note: This feature currently applies only to Markdown headings.
+# Minimum value: 0, maximum value: 99, default value: 5.
+# This tag requires that the tag MARKDOWN_SUPPORT is set to YES.
+
+TOC_INCLUDE_HEADINGS   = 5
+
+# When enabled doxygen tries to link words that correspond to documented
+# classes, or namespaces to their corresponding documentation. Such a link can
+# be prevented in individual cases by putting a % sign in front of the word or
+# globally by setting AUTOLINK_SUPPORT to NO.
+# The default value is: YES.
+
+AUTOLINK_SUPPORT       = YES
+
+# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
+# to include (a tag file for) the STL sources as input, then you should set this
+# tag to YES in order to let doxygen match functions declarations and
+# definitions whose arguments contain STL classes (e.g. func(std::string);
+# versus func(std::string) {}). This also make the inheritance and collaboration
+# diagrams that involve STL classes more complete and accurate.
+# The default value is: NO.
+
+BUILTIN_STL_SUPPORT    = NO
+
+# If you use Microsoft's C++/CLI language, you should set this option to YES to
+# enable parsing support.
+# The default value is: NO.
+
+CPP_CLI_SUPPORT        = NO
+
+# Set the SIP_SUPPORT tag to YES if your project consists of sip (see:
+# https://www.riverbankcomputing.com/software/sip/intro) sources only. Doxygen
+# will parse them like normal C++ but will assume all classes use public instead
+# of private inheritance when no explicit protection keyword is present.
+# The default value is: NO.
+
+SIP_SUPPORT            = NO
+
+# For Microsoft's IDL there are propget and propput attributes to indicate
+# getter and setter methods for a property. Setting this option to YES will make
+# doxygen to replace the get and set methods by a property in the documentation.
+# This will only work if the methods are indeed getting or setting a simple
+# type. If this is not the case, or you want to show the methods anyway, you
+# should set this option to NO.
+# The default value is: YES.
+
+IDL_PROPERTY_SUPPORT   = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+# The default value is: NO.
+
+DISTRIBUTE_GROUP_DOC   = YES
+
+# If one adds a struct or class to a group and this option is enabled, then also
+# any nested class or struct is added to the same group. By default this option
+# is disabled and one has to add nested compounds explicitly via \ingroup.
+# The default value is: NO.
+
+GROUP_NESTED_COMPOUNDS = NO
+
+# Set the SUBGROUPING tag to YES to allow class member groups of the same type
+# (for instance a group of public functions) to be put as a subgroup of that
+# type (e.g. under the Public Functions section). Set it to NO to prevent
+# subgrouping. Alternatively, this can be done per class using the
+# \nosubgrouping command.
+# The default value is: YES.
+
+SUBGROUPING            = YES
+
+# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions
+# are shown inside the group in which they are included (e.g. using \ingroup)
+# instead of on a separate page (for HTML and Man pages) or section (for LaTeX
+# and RTF).
+#
+# Note that this feature does not work in combination with
+# SEPARATE_MEMBER_PAGES.
+# The default value is: NO.
+
+INLINE_GROUPED_CLASSES = NO
+
+# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions
+# with only public data fields or simple typedef fields will be shown inline in
+# the documentation of the scope in which they are defined (i.e. file,
+# namespace, or group documentation), provided this scope is documented. If set
+# to NO, structs, classes, and unions are shown on a separate page (for HTML and
+# Man pages) or section (for LaTeX and RTF).
+# The default value is: NO.
+
+INLINE_SIMPLE_STRUCTS  = NO
+
+# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or
+# enum is documented as struct, union, or enum with the name of the typedef. So
+# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
+# with name TypeT. When disabled the typedef will appear as a member of a file,
+# namespace, or class. And the struct will be named TypeS. This can typically be
+# useful for C code in case the coding convention dictates that all compound
+# types are typedef'ed and only the typedef is referenced, never the tag name.
+# The default value is: NO.
+
+TYPEDEF_HIDES_STRUCT   = NO
+
+# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This
+# cache is used to resolve symbols given their name and scope. Since this can be
+# an expensive process and often the same symbol appears multiple times in the
+# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small
+# doxygen will become slower. If the cache is too large, memory is wasted. The
+# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range
+# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536
+# symbols. At the end of a run doxygen will report the cache usage and suggest
+# the optimal cache size from a speed point of view.
+# Minimum value: 0, maximum value: 9, default value: 0.
+
+LOOKUP_CACHE_SIZE      = 0
+
+# The NUM_PROC_THREADS specifies the number of threads doxygen is allowed to use
+# during processing. When set to 0 doxygen will based this on the number of
+# cores available in the system. You can set it explicitly to a value larger
+# than 0 to get more control over the balance between CPU load and processing
+# speed. At this moment only the input processing can be done using multiple
+# threads. Since this is still an experimental feature the default is set to 1,
+# which effectively disables parallel processing. Please report any issues you
+# encounter. Generating dot graphs in parallel is controlled by the
+# DOT_NUM_THREADS setting.
+# Minimum value: 0, maximum value: 32, default value: 1.
+
+NUM_PROC_THREADS       = 1
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in
+# documentation are documented, even if no documentation was available. Private
+# class members and static file members will be hidden unless the
+# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES.
+# Note: This will also disable the warnings about undocumented members that are
+# normally produced when WARNINGS is set to YES.
+# The default value is: NO.
+
+EXTRACT_ALL            = YES
+
+# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will
+# be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PRIVATE        = NO
+
+# If the EXTRACT_PRIV_VIRTUAL tag is set to YES, documented private virtual
+# methods of a class will be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PRIV_VIRTUAL   = NO
+
+# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal
+# scope will be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PACKAGE        = YES
+
+# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be
+# included in the documentation.
+# The default value is: NO.
+
+EXTRACT_STATIC         = YES
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined
+# locally in source files will be included in the documentation. If set to NO,
+# only classes defined in header files are included. Does not have any effect
+# for Java sources.
+# The default value is: YES.
+
+EXTRACT_LOCAL_CLASSES  = YES
+
+# This flag is only useful for Objective-C code. If set to YES, local methods,
+# which are defined in the implementation section but not in the interface are
+# included in the documentation. If set to NO, only methods in the interface are
+# included.
+# The default value is: NO.
+
+EXTRACT_LOCAL_METHODS  = YES
+
+# If this flag is set to YES, the members of anonymous namespaces will be
+# extracted and appear in the documentation as a namespace called
+# 'anonymous_namespace{file}', where file will be replaced with the base name of
+# the file that contains the anonymous namespace. By default anonymous namespace
+# are hidden.
+# The default value is: NO.
+
+EXTRACT_ANON_NSPACES   = YES
+
+# If this flag is set to YES, the name of an unnamed parameter in a declaration
+# will be determined by the corresponding definition. By default unnamed
+# parameters remain unnamed in the output.
+# The default value is: YES.
+
+RESOLVE_UNNAMED_PARAMS = YES
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
+# undocumented members inside documented classes or files. If set to NO these
+# members will be included in the various overviews, but no documentation
+# section is generated. This option has no effect if EXTRACT_ALL is enabled.
+# The default value is: NO.
+
+HIDE_UNDOC_MEMBERS     = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy. If set
+# to NO, these classes will be included in the various overviews. This option
+# will also hide undocumented C++ concepts if enabled. This option has no effect
+# if EXTRACT_ALL is enabled.
+# The default value is: NO.
+
+HIDE_UNDOC_CLASSES     = NO
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend
+# declarations. If set to NO, these declarations will be included in the
+# documentation.
+# The default value is: NO.
+
+HIDE_FRIEND_COMPOUNDS  = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any
+# documentation blocks found inside the body of a function. If set to NO, these
+# blocks will be appended to the function's detailed documentation block.
+# The default value is: NO.
+
+HIDE_IN_BODY_DOCS      = NO
+
+# The INTERNAL_DOCS tag determines if documentation that is typed after a
+# \internal command is included. If the tag is set to NO then the documentation
+# will be excluded. Set it to YES to include the internal documentation.
+# The default value is: NO.
+
+INTERNAL_DOCS          = NO
+
+# With the correct setting of option CASE_SENSE_NAMES doxygen will better be
+# able to match the capabilities of the underlying filesystem. In case the
+# filesystem is case sensitive (i.e. it supports files in the same directory
+# whose names only differ in casing), the option must be set to YES to properly
+# deal with such files in case they appear in the input. For filesystems that
+# are not case sensitive the option should be set to NO to properly deal with
+# output files written for symbols that only differ in casing, such as for two
+# classes, one named CLASS and the other named Class, and to also support
+# references to files without having to specify the exact matching casing. On
+# Windows (including Cygwin) and MacOS, users should typically set this option
+# to NO, whereas on Linux or other Unix flavors it should typically be set to
+# YES.
+# Possible values are: SYSTEM, NO and YES.
+# The default value is: SYSTEM.
+
+CASE_SENSE_NAMES       = SYSTEM
+
+# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with
+# their full class and namespace scopes in the documentation. If set to YES, the
+# scope will be hidden.
+# The default value is: NO.
+
+HIDE_SCOPE_NAMES       = NO
+
+# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will
+# append additional text to a page's title, such as Class Reference. If set to
+# YES the compound reference will be hidden.
+# The default value is: NO.
+
+HIDE_COMPOUND_REFERENCE= NO
+
+# If the SHOW_HEADERFILE tag is set to YES then the documentation for a class
+# will show which file needs to be included to use the class.
+# The default value is: YES.
+
+SHOW_HEADERFILE        = YES
+
+# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of
+# the files that are included by a file in the documentation of that file.
+# The default value is: YES.
+
+SHOW_INCLUDE_FILES     = YES
+
+# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each
+# grouped member an include statement to the documentation, telling the reader
+# which file to include in order to use the member.
+# The default value is: NO.
+
+SHOW_GROUPED_MEMB_INC  = NO
+
+# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include
+# files with double quotes in the documentation rather than with sharp brackets.
+# The default value is: NO.
+
+FORCE_LOCAL_INCLUDES   = NO
+
+# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the
+# documentation for inline members.
+# The default value is: YES.
+
+INLINE_INFO            = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the
+# (detailed) documentation of file and class members alphabetically by member
+# name. If set to NO, the members will appear in declaration order.
+# The default value is: YES.
+
+SORT_MEMBER_DOCS       = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief
+# descriptions of file, namespace and class members alphabetically by member
+# name. If set to NO, the members will appear in declaration order. Note that
+# this will also influence the order of the classes in the class list.
+# The default value is: NO.
+
+SORT_BRIEF_DOCS        = NO
+
+# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the
+# (brief and detailed) documentation of class members so that constructors and
+# destructors are listed first. If set to NO the constructors will appear in the
+# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS.
+# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief
+# member documentation.
+# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting
+# detailed member documentation.
+# The default value is: NO.
+
+SORT_MEMBERS_CTORS_1ST = NO
+
+# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy
+# of group names into alphabetical order. If set to NO the group names will
+# appear in their defined order.
+# The default value is: NO.
+
+SORT_GROUP_NAMES       = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by
+# fully-qualified names, including namespaces. If set to NO, the class list will
+# be sorted only by class name, not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the alphabetical
+# list.
+# The default value is: NO.
+
+SORT_BY_SCOPE_NAME     = NO
+
+# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper
+# type resolution of all parameters of a function it will reject a match between
+# the prototype and the implementation of a member function even if there is
+# only one candidate or it is obvious which candidate to choose by doing a
+# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still
+# accept a match between prototype and implementation in such cases.
+# The default value is: NO.
+
+STRICT_PROTO_MATCHING  = NO
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo
+# list. This list is created by putting \todo commands in the documentation.
+# The default value is: YES.
+
+GENERATE_TODOLIST      = NO
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test
+# list. This list is created by putting \test commands in the documentation.
+# The default value is: YES.
+
+GENERATE_TESTLIST      = NO
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug
+# list. This list is created by putting \bug commands in the documentation.
+# The default value is: YES.
+
+GENERATE_BUGLIST       = NO
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO)
+# the deprecated list. This list is created by putting \deprecated commands in
+# the documentation.
+# The default value is: YES.
+
+GENERATE_DEPRECATEDLIST= NO
+
+# The ENABLED_SECTIONS tag can be used to enable conditional documentation
+# sections, marked by \if <section_label> ... \endif and \cond <section_label>
+# ... \endcond blocks.
+
+ENABLED_SECTIONS       =
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the
+# initial value of a variable or macro / define can have for it to appear in the
+# documentation. If the initializer consists of more lines than specified here
+# it will be hidden. Use a value of 0 to hide initializers completely. The
+# appearance of the value of individual variables and macros / defines can be
+# controlled using \showinitializer or \hideinitializer command in the
+# documentation regardless of this setting.
+# Minimum value: 0, maximum value: 10000, default value: 30.
+
+MAX_INITIALIZER_LINES  = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at
+# the bottom of the documentation of classes and structs. If set to YES, the
+# list will mention the files that were used to generate the documentation.
+# The default value is: YES.
+
+SHOW_USED_FILES        = YES
+
+# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This
+# will remove the Files entry from the Quick Index and from the Folder Tree View
+# (if specified).
+# The default value is: YES.
+
+SHOW_FILES             = YES
+
+# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces
+# page. This will remove the Namespaces entry from the Quick Index and from the
+# Folder Tree View (if specified).
+# The default value is: YES.
+
+SHOW_NAMESPACES        = YES
+
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that
+# doxygen should invoke to get the current version for each file (typically from
+# the version control system). Doxygen will invoke the program by executing (via
+# popen()) the command command input-file, where command is the value of the
+# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided
+# by doxygen. Whatever the program writes to standard output is used as the file
+# version. For an example see the documentation.
+
+FILE_VERSION_FILTER    =
+
+# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
+# by doxygen. The layout file controls the global structure of the generated
+# output files in an output format independent way. To create the layout file
+# that represents doxygen's defaults, run doxygen with the -l option. You can
+# optionally specify a file name after the option, if omitted DoxygenLayout.xml
+# will be used as the name of the layout file. See also section "Changing the
+# layout of pages" for information.
+#
+# Note that if you run doxygen from a directory containing a file called
+# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE
+# tag is left empty.
+
+LAYOUT_FILE            =
+
+# The CITE_BIB_FILES tag can be used to specify one or more bib files containing
+# the reference definitions. This must be a list of .bib files. The .bib
+# extension is automatically appended if omitted. This requires the bibtex tool
+# to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info.
+# For LaTeX the style of the bibliography can be controlled using
+# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the
+# search path. See also \cite for info how to create references.
+
+CITE_BIB_FILES         =
+
+#---------------------------------------------------------------------------
+# Configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated to
+# standard output by doxygen. If QUIET is set to YES this implies that the
+# messages are off.
+# The default value is: NO.
+
+QUIET                  = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES
+# this implies that the warnings are on.
+#
+# Tip: Turn warnings on while writing the documentation.
+# The default value is: YES.
+
+WARNINGS               = YES
+
+# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate
+# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag
+# will automatically be disabled.
+# The default value is: YES.
+
+WARN_IF_UNDOCUMENTED   = YES
+
+# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as documenting some parameters in
+# a documented function twice, or documenting parameters that don't exist or
+# using markup commands wrongly.
+# The default value is: YES.
+
+WARN_IF_DOC_ERROR      = YES
+
+# If WARN_IF_INCOMPLETE_DOC is set to YES, doxygen will warn about incomplete
+# function parameter documentation. If set to NO, doxygen will accept that some
+# parameters have no documentation without warning.
+# The default value is: YES.
+
+WARN_IF_INCOMPLETE_DOC = YES
+
+# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that
+# are documented, but have no documentation for their parameters or return
+# value. If set to NO, doxygen will only warn about wrong parameter
+# documentation, but not about the absence of documentation. If EXTRACT_ALL is
+# set to YES then this flag will automatically be disabled. See also
+# WARN_IF_INCOMPLETE_DOC
+# The default value is: NO.
+
+WARN_NO_PARAMDOC       = YES
+
+# If WARN_IF_UNDOC_ENUM_VAL option is set to YES, doxygen will warn about
+# undocumented enumeration values. If set to NO, doxygen will accept
+# undocumented enumeration values. If EXTRACT_ALL is set to YES then this flag
+# will automatically be disabled.
+# The default value is: NO.
+
+WARN_IF_UNDOC_ENUM_VAL = NO
+
+# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when
+# a warning is encountered. If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS
+# then doxygen will continue running as if WARN_AS_ERROR tag is set to NO, but
+# at the end of the doxygen process doxygen will return with a non-zero status.
+# Possible values are: NO, YES and FAIL_ON_WARNINGS.
+# The default value is: NO.
+
+WARN_AS_ERROR          = NO
+
+# The WARN_FORMAT tag determines the format of the warning messages that doxygen
+# can produce. The string should contain the $file, $line, and $text tags, which
+# will be replaced by the file and line number from which the warning originated
+# and the warning text. Optionally the format may contain $version, which will
+# be replaced by the version of the file (if it could be obtained via
+# FILE_VERSION_FILTER)
+# See also: WARN_LINE_FORMAT
+# The default value is: $file:$line: $text.
+
+WARN_FORMAT            = "$file:$line: $text"
+
+# In the $text part of the WARN_FORMAT command it is possible that a reference
+# to a more specific place is given. To make it easier to jump to this place
+# (outside of doxygen) the user can define a custom "cut" / "paste" string.
+# Example:
+# WARN_LINE_FORMAT = "'vi $file +$line'"
+# See also: WARN_FORMAT
+# The default value is: at line $line of file $file.
+
+WARN_LINE_FORMAT       = "at line $line of file $file"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning and error
+# messages should be written. If left blank the output is written to standard
+# error (stderr). In case the file specified cannot be opened for writing the
+# warning and error messages are written to standard error. When as file - is
+# specified the warning and error messages are written to standard output
+# (stdout).
+
+WARN_LOGFILE           =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag is used to specify the files and/or directories that contain
+# documented source files. You may enter file names like myfile.cpp or
+# directories like /usr/src/myproject. Separate the files or directories with
+# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
+# Note: If this tag is empty the current directory is searched.
+
+INPUT                  = cpp/roaring.hh cpp/roaring64map.hh
+
+# This tag can be used to specify the character encoding of the source files
+# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
+# libiconv (or the iconv built into libc) for the transcoding. See the libiconv
+# documentation (see:
+# https://www.gnu.org/software/libiconv/) for the list of possible encodings.
+# See also: INPUT_FILE_ENCODING
+# The default value is: UTF-8.
+
+INPUT_ENCODING         = UTF-8
+
+# This tag can be used to specify the character encoding of the source files
+# that doxygen parses The INPUT_FILE_ENCODING tag can be used to specify
+# character encoding on a per file pattern basis. Doxygen will compare the file
+# name with each pattern and apply the encoding instead of the default
+# INPUT_ENCODING) if there is a match. The character encodings are a list of the
+# form: pattern=encoding (like *.php=ISO-8859-1). See cfg_input_encoding
+# "INPUT_ENCODING" for further information on supported encodings.
+
+INPUT_FILE_ENCODING    =
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and
+# *.h) to filter out the source-files in the directories.
+#
+# Note that for custom extensions or not directly supported extensions you also
+# need to set EXTENSION_MAPPING for the extension otherwise the files are not
+# read by doxygen.
+#
+# Note the list of default checked file patterns might differ from the list of
+# default file extension mappings.
+#
+# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp,
+# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h,
+# *.hh, *.hxx, *.hpp, *.h++, *.l, *.cs, *.d, *.php, *.php4, *.php5, *.phtml,
+# *.inc, *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C
+# comment), *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd,
+# *.vhdl, *.ucf, *.qsf and *.ice.
+
+FILE_PATTERNS          = *.c \
+                         *.cc \
+                         *.cxx \
+                         *.cpp \
+                         *.c++ \
+                         *.java \
+                         *.ii \
+                         *.ixx \
+                         *.ipp \
+                         *.i++ \
+                         *.inl \
+                         *.idl \
+                         *.ddl \
+                         *.odl \
+                         *.h \
+                         *.hh \
+                         *.hxx \
+                         *.hpp \
+                         *.h++ \
+                         *.l \
+                         *.cs \
+                         *.d \
+                         *.php \
+                         *.php4 \
+                         *.php5 \
+                         *.phtml \
+                         *.inc \
+                         *.m \
+                         *.markdown \
+                         *.md \
+                         *.mm \
+                         *.dox \
+                         *.py \
+                         *.pyw \
+                         *.f90 \
+                         *.f95 \
+                         *.f03 \
+                         *.f08 \
+                         *.f18 \
+                         *.f \
+                         *.for \
+                         *.vhd \
+                         *.vhdl \
+                         *.ucf \
+                         *.qsf \
+                         *.ice
+
+# The RECURSIVE tag can be used to specify whether or not subdirectories should
+# be searched for input files as well.
+# The default value is: NO.
+
+RECURSIVE              = YES
+
+# The EXCLUDE tag can be used to specify files and/or directories that should be
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+#
+# Note that relative paths are relative to the directory from which doxygen is
+# run.
+
+EXCLUDE                = benchmarks, tests, Testing, tools, build, docs
+
+# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
+# directories that are symbolic links (a Unix file system feature) are excluded
+# from the input.
+# The default value is: NO.
+
+EXCLUDE_SYMLINKS       = NO
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories.
+#
+# Note that the wildcards are matched against the file with absolute path, so to
+# exclude all test directories for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       = "*/test/*"
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# ANamespace::AClass, ANamespace::*Test
+#
+# Note that the wildcards are matched against the file with absolute path, so to
+# exclude all test directories use the pattern */test/*
+
+EXCLUDE_SYMBOLS        =
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or directories
+# that contain example code fragments that are included (see the \include
+# command).
+
+EXAMPLE_PATH           =
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and
+# *.h) to filter out the source-files in the directories. If left blank all
+# files are included.
+
+EXAMPLE_PATTERNS       = *
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude commands
+# irrespective of the value of the RECURSIVE tag.
+# The default value is: NO.
+
+EXAMPLE_RECURSIVE      = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or directories
+# that contain images that are to be included in the documentation (see the
+# \image command).
+
+IMAGE_PATH             =
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command:
+#
+# <filter> <input-file>
+#
+# where <filter> is the value of the INPUT_FILTER tag, and <input-file> is the
+# name of an input file. Doxygen will then use the output that the filter
+# program writes to standard output. If FILTER_PATTERNS is specified, this tag
+# will be ignored.
+#
+# Note that the filter must not add or remove lines; it is applied before the
+# code is scanned, but not when the output code is generated. If lines are added
+# or removed, the anchors will not be placed correctly.
+#
+# Note that doxygen will use the data processed and written to standard output
+# for further processing, therefore nothing else, like debug statements or used
+# commands (so in case of a Windows batch file always use @echo OFF), should be
+# written to standard output.
+#
+# Note that for custom extensions or not directly supported extensions you also
+# need to set EXTENSION_MAPPING for the extension otherwise the files are not
+# properly processed by doxygen.
+
+INPUT_FILTER           =
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
+# basis. Doxygen will compare the file name with each pattern and apply the
+# filter if there is a match. The filters are a list of the form: pattern=filter
+# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how
+# filters are used. If the FILTER_PATTERNS tag is empty or if none of the
+# patterns match the file name, INPUT_FILTER is applied.
+#
+# Note that for custom extensions or not directly supported extensions you also
+# need to set EXTENSION_MAPPING for the extension otherwise the files are not
+# properly processed by doxygen.
+
+FILTER_PATTERNS        =
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER) will also be used to filter the input files that are used for
+# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES).
+# The default value is: NO.
+
+FILTER_SOURCE_FILES    = NO
+
+# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file
+# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and
+# it is also possible to disable source filtering for a specific pattern using
+# *.ext= (so without naming a filter).
+# This tag requires that the tag FILTER_SOURCE_FILES is set to YES.
+
+FILTER_SOURCE_PATTERNS =
+
+# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that
+# is part of the input, its contents will be placed on the main page
+# (index.html). This can be useful if you have a project on for instance GitHub
+# and want to reuse the introduction page also for the doxygen output.
+
+USE_MDFILE_AS_MAINPAGE = README.md
+
+# The Fortran standard specifies that for fixed formatted Fortran code all
+# characters from position 72 are to be considered as comment. A common
+# extension is to allow longer lines before the automatic comment starts. The
+# setting FORTRAN_COMMENT_AFTER will also make it possible that longer lines can
+# be processed before the automatic comment starts.
+# Minimum value: 7, maximum value: 10000, default value: 72.
+
+FORTRAN_COMMENT_AFTER  = 72
+
+#---------------------------------------------------------------------------
+# Configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will be
+# generated. Documented entities will be cross-referenced with these sources.
+#
+# Note: To get rid of all source code in the generated output, make sure that
+# also VERBATIM_HEADERS is set to NO.
+# The default value is: NO.
+
+SOURCE_BROWSER         = YES
+
+# Setting the INLINE_SOURCES tag to YES will include the body of functions,
+# classes and enums directly into the documentation.
+# The default value is: NO.
+
+INLINE_SOURCES         = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any
+# special comment blocks from generated source code fragments. Normal C, C++ and
+# Fortran comments will always remain visible.
+# The default value is: YES.
+
+STRIP_CODE_COMMENTS    = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES then for each documented
+# entity all documented functions referencing it will be listed.
+# The default value is: NO.
+
+REFERENCED_BY_RELATION = YES
+
+# If the REFERENCES_RELATION tag is set to YES then for each documented function
+# all documented entities called/used by that function will be listed.
+# The default value is: NO.
+
+REFERENCES_RELATION    = YES
+
+# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set
+# to YES then the hyperlinks from functions in REFERENCES_RELATION and
+# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will
+# link to the documentation.
+# The default value is: YES.
+
+REFERENCES_LINK_SOURCE = NO
+
+# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the
+# source code will show a tooltip with additional information such as prototype,
+# brief description and links to the definition and documentation. Since this
+# will make the HTML file larger and loading of large files a bit slower, you
+# can opt to disable this feature.
+# The default value is: YES.
+# This tag requires that the tag SOURCE_BROWSER is set to YES.
+
+SOURCE_TOOLTIPS        = YES
+
+# If the USE_HTAGS tag is set to YES then the references to source code will
+# point to the HTML generated by the htags(1) tool instead of doxygen built-in
+# source browser. The htags tool is part of GNU's global source tagging system
+# (see https://www.gnu.org/software/global/global.html). You will need version
+# 4.8.6 or higher.
+#
+# To use it do the following:
+# - Install the latest version of global
+# - Enable SOURCE_BROWSER and USE_HTAGS in the configuration file
+# - Make sure the INPUT points to the root of the source tree
+# - Run doxygen as normal
+#
+# Doxygen will invoke htags (and that will in turn invoke gtags), so these
+# tools must be available from the command line (i.e. in the search path).
+#
+# The result: instead of the source browser generated by doxygen, the links to
+# source code will now point to the output of htags.
+# The default value is: NO.
+# This tag requires that the tag SOURCE_BROWSER is set to YES.
+
+USE_HTAGS              = NO
+
+# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a
+# verbatim copy of the header file for each class for which an include is
+# specified. Set to NO to disable this.
+# See also: Section \class.
+# The default value is: YES.
+
+VERBATIM_HEADERS       = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all
+# compounds will be generated. Enable this if the project contains a lot of
+# classes, structs, unions or interfaces.
+# The default value is: YES.
+
+ALPHABETICAL_INDEX     = YES
+
+# The IGNORE_PREFIX tag can be used to specify a prefix (or a list of prefixes)
+# that should be ignored while generating the index headers. The IGNORE_PREFIX
+# tag works for classes, function and member names. The entity will be placed in
+# the alphabetical list under the first letter of the entity name that remains
+# after removing the prefix.
+# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
+
+IGNORE_PREFIX          =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output
+# The default value is: YES.
+
+GENERATE_HTML          = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: html.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_OUTPUT            = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each
+# generated HTML page (for example: .htm, .php, .asp).
+# The default value is: .html.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_FILE_EXTENSION    = .html
+
+# The HTML_HEADER tag can be used to specify a user-defined HTML header file for
+# each generated HTML page. If the tag is left blank doxygen will generate a
+# standard header.
+#
+# To get valid HTML the header file that includes any scripts and style sheets
+# that doxygen needs, which is dependent on the configuration options used (e.g.
+# the setting GENERATE_TREEVIEW). It is highly recommended to start with a
+# default header using
+# doxygen -w html new_header.html new_footer.html new_stylesheet.css
+# YourConfigFile
+# and then modify the file new_header.html. See also section "Doxygen usage"
+# for information on how to generate the default header that doxygen normally
+# uses.
+# Note: The header is subject to change so you typically have to regenerate the
+# default header when upgrading to a newer version of doxygen. For a description
+# of the possible markers and block names see the documentation.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_HEADER            =
+
+# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each
+# generated HTML page. If the tag is left blank doxygen will generate a standard
+# footer. See HTML_HEADER for more information on how to generate a default
+# footer and what special commands can be used inside the footer. See also
+# section "Doxygen usage" for information on how to generate the default footer
+# that doxygen normally uses.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_FOOTER            =
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style
+# sheet that is used by each HTML page. It can be used to fine-tune the look of
+# the HTML output. If left blank doxygen will generate a default style sheet.
+# See also section "Doxygen usage" for information on how to generate the style
+# sheet that doxygen normally uses.
+# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as
+# it is more robust and this tag (HTML_STYLESHEET) will in the future become
+# obsolete.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_STYLESHEET        =
+
+# The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined
+# cascading style sheets that are included after the standard style sheets
+# created by doxygen. Using this option one can overrule certain style aspects.
+# This is preferred over using HTML_STYLESHEET since it does not replace the
+# standard style sheet and is therefore more robust against future updates.
+# Doxygen will copy the style sheet files to the output directory.
+# Note: The order of the extra style sheet files is of importance (e.g. the last
+# style sheet in the list overrules the setting of the previous ones in the
+# list).
+# Note: Since the styling of scrollbars can currently not be overruled in
+# Webkit/Chromium, the styling will be left out of the default doxygen.css if
+# one or more extra stylesheets have been specified. So if scrollbar
+# customization is desired it has to be added explicitly. For an example see the
+# documentation.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_EXTRA_STYLESHEET  = docs/theme/doxygen-awesome.css \
+                         docs/theme/doxygen-awesome-sidebar-only.css
+
+# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the HTML output directory. Note
+# that these files will be copied to the base HTML output directory. Use the
+# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these
+# files. In the HTML_STYLESHEET file, use the file name only. Also note that the
+# files will be copied as-is; there are no commands or markers available.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_EXTRA_FILES       = docs/theme/doxygen-awesome-darkmode-toggle.js
+
+# The HTML_COLORSTYLE tag can be used to specify if the generated HTML output
+# should be rendered with a dark or light theme.
+# Possible values are: LIGHT always generate light mode output, DARK always
+# generate dark mode output, AUTO_LIGHT automatically set the mode according to
+# the user preference, use light mode if no preference is set (the default),
+# AUTO_DARK automatically set the mode according to the user preference, use
+# dark mode if no preference is set and TOGGLE allow to user to switch between
+# light and dark mode via a button.
+# The default value is: AUTO_LIGHT.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE        = LIGHT
+
+# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen
+# will adjust the colors in the style sheet and background images according to
+# this color. Hue is specified as an angle on a color-wheel, see
+# https://en.wikipedia.org/wiki/Hue for more information. For instance the value
+# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300
+# purple, and 360 is red again.
+# Minimum value: 0, maximum value: 359, default value: 220.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_HUE    = 209
+
+# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors
+# in the HTML output. For a value of 0 the output will use gray-scales only. A
+# value of 255 will produce the most vivid colors.
+# Minimum value: 0, maximum value: 255, default value: 100.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_SAT    = 255
+
+# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the
+# luminance component of the colors in the HTML output. Values below 100
+# gradually make the output lighter, whereas values above 100 make the output
+# darker. The value divided by 100 is the actual gamma applied, so 80 represents
+# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not
+# change the gamma.
+# Minimum value: 40, maximum value: 240, default value: 80.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_GAMMA  = 113
+
+# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
+# page will contain the date and time when the page was generated. Setting this
+# to YES can help to show when doxygen was last run and thus if the
+# documentation is up to date.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_TIMESTAMP         = NO
+
+# If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML
+# documentation will contain a main index with vertical navigation menus that
+# are dynamically created via JavaScript. If disabled, the navigation index will
+# consists of multiple levels of tabs that are statically embedded in every HTML
+# page. Disable this option to support browsers that do not have JavaScript,
+# like the Qt help browser.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_DYNAMIC_MENUS     = YES
+
+# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
+# documentation will contain sections that can be hidden and shown after the
+# page has loaded.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_DYNAMIC_SECTIONS  = NO
+
+# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries
+# shown in the various tree structured indices initially; the user can expand
+# and collapse entries dynamically later on. Doxygen will expand the tree to
+# such a level that at most the specified number of entries are visible (unless
+# a fully collapsed tree already exceeds this amount). So setting the number of
+# entries 1 will produce a full collapsed tree by default. 0 is a special value
+# representing an infinite number of entries and will result in a full expanded
+# tree by default.
+# Minimum value: 0, maximum value: 9999, default value: 100.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_INDEX_NUM_ENTRIES = 100
+
+# If the GENERATE_DOCSET tag is set to YES, additional index files will be
+# generated that can be used as input for Apple's Xcode 3 integrated development
+# environment (see:
+# https://developer.apple.com/xcode/), introduced with OSX 10.5 (Leopard). To
+# create a documentation set, doxygen will generate a Makefile in the HTML
+# output directory. Running make will produce the docset in that directory and
+# running make install will install the docset in
+# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at
+# startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy
+# genXcode/_index.html for more information.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_DOCSET        = NO
+
+# This tag determines the name of the docset feed. A documentation feed provides
+# an umbrella under which multiple documentation sets from a single provider
+# (such as a company or product suite) can be grouped.
+# The default value is: Doxygen generated docs.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_FEEDNAME        = "Doxygen generated docs"
+
+# This tag determines the URL of the docset feed. A documentation feed provides
+# an umbrella under which multiple documentation sets from a single provider
+# (such as a company or product suite) can be grouped.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_FEEDURL         =
+
+# This tag specifies a string that should uniquely identify the documentation
+# set bundle. This should be a reverse domain-name style string, e.g.
+# com.mycompany.MyDocSet. Doxygen will append .docset to the name.
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_BUNDLE_ID       = org.doxygen.Project
+
+# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify
+# the documentation publisher. This should be a reverse domain-name style
+# string, e.g. com.mycompany.MyDocSet.documentation.
+# The default value is: org.doxygen.Publisher.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
+
+# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher.
+# The default value is: Publisher.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_PUBLISHER_NAME  = Publisher
+
+# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three
+# additional HTML index files: index.hhp, index.hhc, and index.hhk. The
+# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop
+# on Windows. In the beginning of 2021 Microsoft took the original page, with
+# a.o. the download links, offline the HTML help workshop was already many years
+# in maintenance mode). You can download the HTML help workshop from the web
+# archives at Installation executable (see:
+# http://web.archive.org/web/20160201063255/http://download.microsoft.com/downlo
+# ad/0/A/9/0A939EF6-E31C-430F-A3DF-DFAE7960D564/htmlhelp.exe).
+#
+# The HTML Help Workshop contains a compiler that can convert all HTML output
+# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML
+# files are now used as the Windows 98 help format, and will replace the old
+# Windows help format (.hlp) on all Windows platforms in the future. Compressed
+# HTML files also contain an index, a table of contents, and you can search for
+# words in the documentation. The HTML workshop also contains a viewer for
+# compressed HTML files.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_HTMLHELP      = NO
+
+# The CHM_FILE tag can be used to specify the file name of the resulting .chm
+# file. You can add a path in front of the file if the result should not be
+# written to the html output directory.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+CHM_FILE               =
+
+# The HHC_LOCATION tag can be used to specify the location (absolute path
+# including file name) of the HTML help compiler (hhc.exe). If non-empty,
+# doxygen will try to run the HTML help compiler on the generated index.hhp.
+# The file has to be specified with full path.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+HHC_LOCATION           =
+
+# The GENERATE_CHI flag controls if a separate .chi index file is generated
+# (YES) or that it should be included in the main .chm file (NO).
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+GENERATE_CHI           = NO
+
+# The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc)
+# and project file content.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+CHM_INDEX_ENCODING     =
+
+# The BINARY_TOC flag controls whether a binary table of contents is generated
+# (YES) or a normal table of contents (NO) in the .chm file. Furthermore it
+# enables the Previous and Next buttons.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+BINARY_TOC             = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members to
+# the table of contents of the HTML help documentation and to the tree view.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+TOC_EXPAND             = NO
+
+# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
+# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that
+# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help
+# (.qch) of the generated HTML documentation.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_QHP           = NO
+
+# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify
+# the file name of the resulting .qch file. The path specified is relative to
+# the HTML output folder.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QCH_FILE               =
+
+# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help
+# Project output. For more information please see Qt Help Project / Namespace
+# (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace).
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_NAMESPACE          = org.doxygen.Project
+
+# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt
+# Help Project output. For more information please see Qt Help Project / Virtual
+# Folders (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-folders).
+# The default value is: doc.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_VIRTUAL_FOLDER     = doc
+
+# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom
+# filter to add. For more information please see Qt Help Project / Custom
+# Filters (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_CUST_FILTER_NAME   =
+
+# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the
+# custom filter to add. For more information please see Qt Help Project / Custom
+# Filters (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_CUST_FILTER_ATTRS  =
+
+# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
+# project's filter section matches. Qt Help Project / Filter Attributes (see:
+# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#filter-attributes).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_SECT_FILTER_ATTRS  =
+
+# The QHG_LOCATION tag can be used to specify the location (absolute path
+# including file name) of Qt's qhelpgenerator. If non-empty doxygen will try to
+# run qhelpgenerator on the generated .qhp file.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHG_LOCATION           =
+
+# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be
+# generated, together with the HTML files, they form an Eclipse help plugin. To
+# install this plugin and make it available under the help contents menu in
+# Eclipse, the contents of the directory containing the HTML and XML files needs
+# to be copied into the plugins directory of eclipse. The name of the directory
+# within the plugins directory should be the same as the ECLIPSE_DOC_ID value.
+# After copying Eclipse needs to be restarted before the help appears.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_ECLIPSEHELP   = NO
+
+# A unique identifier for the Eclipse help plugin. When installing the plugin
+# the directory name containing the HTML and XML files should also have this
+# name. Each documentation set should have its own identifier.
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES.
+
+ECLIPSE_DOC_ID         = org.doxygen.Project
+
+# If you want full control over the layout of the generated HTML pages it might
+# be necessary to disable the index and replace it with your own. The
+# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top
+# of each HTML page. A value of NO enables the index and the value YES disables
+# it. Since the tabs in the index contain the same information as the navigation
+# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+DISABLE_INDEX          = NO
+
+# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
+# structure should be generated to display hierarchical information. If the tag
+# value is set to YES, a side panel will be generated containing a tree-like
+# index structure (just like the one that is generated for HTML Help). For this
+# to work a browser that supports JavaScript, DHTML, CSS and frames is required
+# (i.e. any modern browser). Windows users are probably better off using the
+# HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can
+# further fine tune the look of the index (see "Fine-tuning the output"). As an
+# example, the default style sheet generated by doxygen has an example that
+# shows how to put an image at the root of the tree instead of the PROJECT_NAME.
+# Since the tree basically has the same information as the tab index, you could
+# consider setting DISABLE_INDEX to YES when enabling this option.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_TREEVIEW      = YES
+
+# When both GENERATE_TREEVIEW and DISABLE_INDEX are set to YES, then the
+# FULL_SIDEBAR option determines if the side bar is limited to only the treeview
+# area (value NO) or if it should extend to the full height of the window (value
+# YES). Setting this to YES gives a layout similar to
+# https://docs.readthedocs.io with more room for contents, but less room for the
+# project logo, title, and description. If either GENERATE_TREEVIEW or
+# DISABLE_INDEX is set to NO, this option has no effect.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+FULL_SIDEBAR           = NO
+
+# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that
+# doxygen will group on one line in the generated HTML documentation.
+#
+# Note that a value of 0 will completely suppress the enum values from appearing
+# in the overview section.
+# Minimum value: 0, maximum value: 20, default value: 4.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+ENUM_VALUES_PER_LINE   = 4
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used
+# to set the initial width (in pixels) of the frame in which the tree is shown.
+# Minimum value: 0, maximum value: 1500, default value: 250.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+TREEVIEW_WIDTH         = 250
+
+# If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to
+# external symbols imported via tag files in a separate window.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+EXT_LINKS_IN_WINDOW    = NO
+
+# If the OBFUSCATE_EMAILS tag is set to YES, doxygen will obfuscate email
+# addresses.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+OBFUSCATE_EMAILS       = YES
+
+# If the HTML_FORMULA_FORMAT option is set to svg, doxygen will use the pdf2svg
+# tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see
+# https://inkscape.org) to generate formulas as SVG images instead of PNGs for
+# the HTML output. These images will generally look nicer at scaled resolutions.
+# Possible values are: png (the default) and svg (looks nicer but requires the
+# pdf2svg or inkscape tool).
+# The default value is: png.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_FORMULA_FORMAT    = png
+
+# Use this tag to change the font size of LaTeX formulas included as images in
+# the HTML documentation. When you change the font size after a successful
+# doxygen run you need to manually remove any form_*.png images from the HTML
+# output directory to force them to be regenerated.
+# Minimum value: 8, maximum value: 50, default value: 10.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+FORMULA_FONTSIZE       = 10
+
+# The FORMULA_MACROFILE can contain LaTeX \newcommand and \renewcommand commands
+# to create new LaTeX commands to be used in formulas as building blocks. See
+# the section "Including formulas" for details.
+
+FORMULA_MACROFILE      =
+
+# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see
+# https://www.mathjax.org) which uses client side JavaScript for the rendering
+# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX
+# installed or if you want to formulas look prettier in the HTML output. When
+# enabled you may also need to install MathJax separately and configure the path
+# to it using the MATHJAX_RELPATH option.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+USE_MATHJAX            = NO
+
+# With MATHJAX_VERSION it is possible to specify the MathJax version to be used.
+# Note that the different versions of MathJax have different requirements with
+# regards to the different settings, so it is possible that also other MathJax
+# settings have to be changed when switching between the different MathJax
+# versions.
+# Possible values are: MathJax_2 and MathJax_3.
+# The default value is: MathJax_2.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_VERSION        = MathJax_2
+
+# When MathJax is enabled you can set the default output format to be used for
+# the MathJax output. For more details about the output format see MathJax
+# version 2 (see:
+# http://docs.mathjax.org/en/v2.7-latest/output.html) and MathJax version 3
+# (see:
+# http://docs.mathjax.org/en/latest/web/components/output.html).
+# Possible values are: HTML-CSS (which is slower, but has the best
+# compatibility. This is the name for Mathjax version 2, for MathJax version 3
+# this will be translated into chtml), NativeMML (i.e. MathML. Only supported
+# for NathJax 2. For MathJax version 3 chtml will be used instead.), chtml (This
+# is the name for Mathjax version 3, for MathJax version 2 this will be
+# translated into HTML-CSS) and SVG.
+# The default value is: HTML-CSS.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_FORMAT         = HTML-CSS
+
+# When MathJax is enabled you need to specify the location relative to the HTML
+# output directory using the MATHJAX_RELPATH option. The destination directory
+# should contain the MathJax.js script. For instance, if the mathjax directory
+# is located at the same level as the HTML output directory, then
+# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax
+# Content Delivery Network so you can quickly see the result without installing
+# MathJax. However, it is strongly recommended to install a local copy of
+# MathJax from https://www.mathjax.org before deployment. The default value is:
+# - in case of MathJax version 2: https://cdn.jsdelivr.net/npm/mathjax@2
+# - in case of MathJax version 3: https://cdn.jsdelivr.net/npm/mathjax@3
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_RELPATH        =
+
+# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax
+# extension names that should be enabled during MathJax rendering. For example
+# for MathJax version 2 (see
+# https://docs.mathjax.org/en/v2.7-latest/tex.html#tex-and-latex-extensions):
+# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols
+# For example for MathJax version 3 (see
+# http://docs.mathjax.org/en/latest/input/tex/extensions/index.html):
+# MATHJAX_EXTENSIONS = ams
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_EXTENSIONS     =
+
+# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces
+# of code that will be used on startup of the MathJax code. See the MathJax site
+# (see:
+# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details. For an
+# example see the documentation.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_CODEFILE       =
+
+# When the SEARCHENGINE tag is enabled doxygen will generate a search box for
+# the HTML output. The underlying search engine uses javascript and DHTML and
+# should work on any modern browser. Note that when using HTML help
+# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET)
+# there is already a search function so this one should typically be disabled.
+# For large projects the javascript based search engine can be slow, then
+# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to
+# search using the keyboard; to jump to the search box use <access key> + S
+# (what the <access key> is depends on the OS and browser, but it is typically
+# <CTRL>, <ALT>/<option>, or both). Inside the search box use the <cursor down
+# key> to jump into the search results window, the results can be navigated
+# using the <cursor keys>. Press <Enter> to select an item or <escape> to cancel
+# the search. The filter options can be selected when the cursor is inside the
+# search box by pressing <Shift>+<cursor down>. Also here use the <cursor keys>
+# to select a filter and <Enter> or <escape> to activate or cancel the filter
+# option.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+SEARCHENGINE           = YES
+
+# When the SERVER_BASED_SEARCH tag is enabled the search engine will be
+# implemented using a web server instead of a web client using JavaScript. There
+# are two flavors of web server based searching depending on the EXTERNAL_SEARCH
+# setting. When disabled, doxygen will generate a PHP script for searching and
+# an index file used by the script. When EXTERNAL_SEARCH is enabled the indexing
+# and searching needs to be provided by external tools. See the section
+# "External Indexing and Searching" for details.
+# The default value is: NO.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SERVER_BASED_SEARCH    = NO
+
+# When EXTERNAL_SEARCH tag is enabled doxygen will no longer generate the PHP
+# script for searching. Instead the search results are written to an XML file
+# which needs to be processed by an external indexer. Doxygen will invoke an
+# external search engine pointed to by the SEARCHENGINE_URL option to obtain the
+# search results.
+#
+# Doxygen ships with an example indexer (doxyindexer) and search engine
+# (doxysearch.cgi) which are based on the open source search engine library
+# Xapian (see:
+# https://xapian.org/).
+#
+# See the section "External Indexing and Searching" for details.
+# The default value is: NO.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTERNAL_SEARCH        = NO
+
+# The SEARCHENGINE_URL should point to a search engine hosted by a web server
+# which will return the search results when EXTERNAL_SEARCH is enabled.
+#
+# Doxygen ships with an example indexer (doxyindexer) and search engine
+# (doxysearch.cgi) which are based on the open source search engine library
+# Xapian (see:
+# https://xapian.org/). See the section "External Indexing and Searching" for
+# details.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SEARCHENGINE_URL       =
+
+# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed
+# search data is written to a file for indexing by an external tool. With the
+# SEARCHDATA_FILE tag the name of this file can be specified.
+# The default file is: searchdata.xml.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SEARCHDATA_FILE        = searchdata.xml
+
+# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the
+# EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is
+# useful in combination with EXTRA_SEARCH_MAPPINGS to search through multiple
+# projects and redirect the results back to the right project.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTERNAL_SEARCH_ID     =
+
+# The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen
+# projects other than the one defined by this configuration file, but that are
+# all added to the same external search index. Each project needs to have a
+# unique id set via EXTERNAL_SEARCH_ID. The search mapping then maps the id of
+# to a relative location where the documentation can be found. The format is:
+# EXTRA_SEARCH_MAPPINGS = tagname1=loc1 tagname2=loc2 ...
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTRA_SEARCH_MAPPINGS  =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES, doxygen will generate LaTeX output.
+# The default value is: YES.
+
+GENERATE_LATEX         = YES
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: latex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_OUTPUT           = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked.
+#
+# Note that when not enabling USE_PDFLATEX the default is latex when enabling
+# USE_PDFLATEX the default is pdflatex and when in the later case latex is
+# chosen this is overwritten by pdflatex. For specific output languages the
+# default can have been set differently, this depends on the implementation of
+# the output language.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_CMD_NAME         =
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to generate
+# index for LaTeX.
+# Note: This tag is used in the Makefile / make.bat.
+# See also: LATEX_MAKEINDEX_CMD for the part in the generated output file
+# (.tex).
+# The default file is: makeindex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+MAKEINDEX_CMD_NAME     = makeindex
+
+# The LATEX_MAKEINDEX_CMD tag can be used to specify the command name to
+# generate index for LaTeX. In case there is no backslash (\) as first character
+# it will be automatically added in the LaTeX code.
+# Note: This tag is used in the generated output file (.tex).
+# See also: MAKEINDEX_CMD_NAME for the part in the Makefile / make.bat.
+# The default value is: makeindex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_MAKEINDEX_CMD    = makeindex
+
+# If the COMPACT_LATEX tag is set to YES, doxygen generates more compact LaTeX
+# documents. This may be useful for small projects and may help to save some
+# trees in general.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+COMPACT_LATEX          = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used by the
+# printer.
+# Possible values are: a4 (210 x 297 mm), letter (8.5 x 11 inches), legal (8.5 x
+# 14 inches) and executive (7.25 x 10.5 inches).
+# The default value is: a4.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+PAPER_TYPE             = a4
+
+# The EXTRA_PACKAGES tag can be used to specify one or more LaTeX package names
+# that should be included in the LaTeX output. The package can be specified just
+# by its name or with the correct syntax as to be used with the LaTeX
+# \usepackage command. To get the times font for instance you can specify :
+# EXTRA_PACKAGES=times or EXTRA_PACKAGES={times}
+# To use the option intlimits with the amsmath package you can specify:
+# EXTRA_PACKAGES=[intlimits]{amsmath}
+# If left blank no extra packages will be included.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+EXTRA_PACKAGES         =
+
+# The LATEX_HEADER tag can be used to specify a user-defined LaTeX header for
+# the generated LaTeX document. The header should contain everything until the
+# first chapter. If it is left blank doxygen will generate a standard header. It
+# is highly recommended to start with a default header using
+# doxygen -w latex new_header.tex new_footer.tex new_stylesheet.sty
+# and then modify the file new_header.tex. See also section "Doxygen usage" for
+# information on how to generate the default header that doxygen normally uses.
+#
+# Note: Only use a user-defined header if you know what you are doing!
+# Note: The header is subject to change so you typically have to regenerate the
+# default header when upgrading to a newer version of doxygen. The following
+# commands have a special meaning inside the header (and footer): For a
+# description of the possible markers and block names see the documentation.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_HEADER           =
+
+# The LATEX_FOOTER tag can be used to specify a user-defined LaTeX footer for
+# the generated LaTeX document. The footer should contain everything after the
+# last chapter. If it is left blank doxygen will generate a standard footer. See
+# LATEX_HEADER for more information on how to generate a default footer and what
+# special commands can be used inside the footer. See also section "Doxygen
+# usage" for information on how to generate the default footer that doxygen
+# normally uses. Note: Only use a user-defined footer if you know what you are
+# doing!
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_FOOTER           =
+
+# The LATEX_EXTRA_STYLESHEET tag can be used to specify additional user-defined
+# LaTeX style sheets that are included after the standard style sheets created
+# by doxygen. Using this option one can overrule certain style aspects. Doxygen
+# will copy the style sheet files to the output directory.
+# Note: The order of the extra style sheet files is of importance (e.g. the last
+# style sheet in the list overrules the setting of the previous ones in the
+# list).
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_EXTRA_STYLESHEET =
+
+# The LATEX_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the LATEX_OUTPUT output
+# directory. Note that the files will be copied as-is; there are no commands or
+# markers available.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_EXTRA_FILES      =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated is
+# prepared for conversion to PDF (using ps2pdf or pdflatex). The PDF file will
+# contain links (just like the HTML output) instead of page references. This
+# makes the output suitable for online browsing using a PDF viewer.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+PDF_HYPERLINKS         = YES
+
+# If the USE_PDFLATEX tag is set to YES, doxygen will use the engine as
+# specified with LATEX_CMD_NAME to generate the PDF file directly from the LaTeX
+# files. Set this option to YES, to get a higher quality PDF documentation.
+#
+# See also section LATEX_CMD_NAME for selecting the engine.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+USE_PDFLATEX           = YES
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \batchmode
+# command to the generated LaTeX files. This will instruct LaTeX to keep running
+# if errors occur, instead of asking the user for help.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_BATCHMODE        = NO
+
+# If the LATEX_HIDE_INDICES tag is set to YES then doxygen will not include the
+# index chapters (such as File Index, Compound Index, etc.) in the output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_HIDE_INDICES     = NO
+
+# The LATEX_BIB_STYLE tag can be used to specify the style to use for the
+# bibliography, e.g. plainnat, or ieeetr. See
+# https://en.wikipedia.org/wiki/BibTeX and \cite for more info.
+# The default value is: plain.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_BIB_STYLE        = plain
+
+# If the LATEX_TIMESTAMP tag is set to YES then the footer of each generated
+# page will contain the date and time when the page was generated. Setting this
+# to NO can help when comparing the output of multiple runs.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_TIMESTAMP        = NO
+
+# The LATEX_EMOJI_DIRECTORY tag is used to specify the (relative or absolute)
+# path from which the emoji images will be read. If a relative path is entered,
+# it will be relative to the LATEX_OUTPUT directory. If left blank the
+# LATEX_OUTPUT directory will be used.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_EMOJI_DIRECTORY  =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES, doxygen will generate RTF output. The
+# RTF output is optimized for Word 97 and may not look too pretty with other RTF
+# readers/editors.
+# The default value is: NO.
+
+GENERATE_RTF           = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: rtf.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_OUTPUT             = rtf
+
+# If the COMPACT_RTF tag is set to YES, doxygen generates more compact RTF
+# documents. This may be useful for small projects and may help to save some
+# trees in general.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+COMPACT_RTF            = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated will
+# contain hyperlink fields. The RTF file will contain links (just like the HTML
+# output) instead of page references. This makes the output suitable for online
+# browsing using Word or some other Word compatible readers that support those
+# fields.
+#
+# Note: WordPad (write) and others do not support links.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_HYPERLINKS         = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's
+# configuration file, i.e. a series of assignments. You only have to provide
+# replacements, missing definitions are set to their default value.
+#
+# See also section "Doxygen usage" for information on how to generate the
+# default style sheet that doxygen normally uses.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_STYLESHEET_FILE    =
+
+# Set optional variables used in the generation of an RTF document. Syntax is
+# similar to doxygen's configuration file. A template extensions file can be
+# generated using doxygen -e rtf extensionFile.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_EXTENSIONS_FILE    =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES, doxygen will generate man pages for
+# classes and files.
+# The default value is: NO.
+
+GENERATE_MAN           = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it. A directory man3 will be created inside the directory specified by
+# MAN_OUTPUT.
+# The default directory is: man.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_OUTPUT             = man
+
+# The MAN_EXTENSION tag determines the extension that is added to the generated
+# man pages. In case the manual section does not start with a number, the number
+# 3 is prepended. The dot (.) at the beginning of the MAN_EXTENSION tag is
+# optional.
+# The default value is: .3.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_EXTENSION          = .3
+
+# The MAN_SUBDIR tag determines the name of the directory created within
+# MAN_OUTPUT in which the man pages are placed. If defaults to man followed by
+# MAN_EXTENSION with the initial . removed.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_SUBDIR             =
+
+# If the MAN_LINKS tag is set to YES and doxygen generates man output, then it
+# will generate one additional man file for each entity documented in the real
+# man page(s). These additional files only source the real man page, but without
+# them the man command would be unable to find the correct page.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_LINKS              = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES, doxygen will generate an XML file that
+# captures the structure of the code including all documentation.
+# The default value is: NO.
+
+GENERATE_XML           = NO
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: xml.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_OUTPUT             = xml
+
+# If the XML_PROGRAMLISTING tag is set to YES, doxygen will dump the program
+# listings (including syntax highlighting and cross-referencing information) to
+# the XML output. Note that enabling this will significantly increase the size
+# of the XML output.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_PROGRAMLISTING     = YES
+
+# If the XML_NS_MEMB_FILE_SCOPE tag is set to YES, doxygen will include
+# namespace members in file scope as well, matching the HTML output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_NS_MEMB_FILE_SCOPE = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the DOCBOOK output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_DOCBOOK tag is set to YES, doxygen will generate Docbook files
+# that can be used to generate PDF.
+# The default value is: NO.
+
+GENERATE_DOCBOOK       = NO
+
+# The DOCBOOK_OUTPUT tag is used to specify where the Docbook pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be put in
+# front of it.
+# The default directory is: docbook.
+# This tag requires that the tag GENERATE_DOCBOOK is set to YES.
+
+DOCBOOK_OUTPUT         = docbook
+
+#---------------------------------------------------------------------------
+# Configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES, doxygen will generate an
+# AutoGen Definitions (see http://autogen.sourceforge.net/) file that captures
+# the structure of the code including all documentation. Note that this feature
+# is still experimental and incomplete at the moment.
+# The default value is: NO.
+
+GENERATE_AUTOGEN_DEF   = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES, doxygen will generate a Perl module
+# file that captures the structure of the code including all documentation.
+#
+# Note that this feature is still experimental and incomplete at the moment.
+# The default value is: NO.
+
+GENERATE_PERLMOD       = NO
+
+# If the PERLMOD_LATEX tag is set to YES, doxygen will generate the necessary
+# Makefile rules, Perl scripts and LaTeX code to be able to generate PDF and DVI
+# output from the Perl module output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_LATEX          = NO
+
+# If the PERLMOD_PRETTY tag is set to YES, the Perl module output will be nicely
+# formatted so it can be parsed by a human reader. This is useful if you want to
+# understand what is going on. On the other hand, if this tag is set to NO, the
+# size of the Perl module output will be much smaller and Perl will parse it
+# just the same.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_PRETTY         = YES
+
+# The names of the make variables in the generated doxyrules.make file are
+# prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. This is useful
+# so different doxyrules.make files included by the same Makefile don't
+# overwrite each other's variables.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_MAKEVAR_PREFIX =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES, doxygen will evaluate all
+# C-preprocessor directives found in the sources and include files.
+# The default value is: YES.
+
+ENABLE_PREPROCESSING   = YES
+
+# If the MACRO_EXPANSION tag is set to YES, doxygen will expand all macro names
+# in the source code. If set to NO, only conditional compilation will be
+# performed. Macro expansion can be done in a controlled way by setting
+# EXPAND_ONLY_PREDEF to YES.
+# The default value is: NO.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+MACRO_EXPANSION        = NO
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then
+# the macro expansion is limited to the macros specified with the PREDEFINED and
+# EXPAND_AS_DEFINED tags.
+# The default value is: NO.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+EXPAND_ONLY_PREDEF     = NO
+
+# If the SEARCH_INCLUDES tag is set to YES, the include files in the
+# INCLUDE_PATH will be searched if a #include is found.
+# The default value is: YES.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+SEARCH_INCLUDES        = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by the
+# preprocessor. Note that the INCLUDE_PATH is not recursive, so the setting of
+# RECURSIVE has no effect here.
+# This tag requires that the tag SEARCH_INCLUDES is set to YES.
+
+INCLUDE_PATH           =
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will be
+# used.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+INCLUDE_FILE_PATTERNS  =
+
+# The PREDEFINED tag can be used to specify one or more macro names that are
+# defined before the preprocessor is started (similar to the -D option of e.g.
+# gcc). The argument of the tag is a list of macros of the form: name or
+# name=definition (no spaces). If the definition and the "=" are omitted, "=1"
+# is assumed. To prevent a macro definition from being undefined via #undef or
+# recursively expanded use the := operator instead of the = operator.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+PREDEFINED             =
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
+# tag can be used to specify a list of macro names that should be expanded. The
+# macro definition that is found in the sources will be used. Use the PREDEFINED
+# tag if you want to use a different macro definition that overrules the
+# definition found in the source code.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+EXPAND_AS_DEFINED      =
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES then doxygen's preprocessor will
+# remove all references to function-like macros that are alone on a line, have
+# an all uppercase name, and do not end with a semicolon. Such function macros
+# are typically used for boiler-plate code, and will confuse the parser if not
+# removed.
+# The default value is: YES.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+SKIP_FUNCTION_MACROS   = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to external references
+#---------------------------------------------------------------------------
+
+# The TAGFILES tag can be used to specify one or more tag files. For each tag
+# file the location of the external documentation should be added. The format of
+# a tag file without this location is as follows:
+# TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+# TAGFILES = file1=loc1 "file2 = loc2" ...
+# where loc1 and loc2 can be relative or absolute paths or URLs. See the
+# section "Linking to external documentation" for more information about the use
+# of tag files.
+# Note: Each tag file must have a unique name (where the name does NOT include
+# the path). If a tag file is not located in the directory in which doxygen is
+# run, you must also specify the path to the tagfile here.
+
+TAGFILES               =
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create a
+# tag file that is based on the input files it reads. See section "Linking to
+# external documentation" for more information about the usage of tag files.
+
+GENERATE_TAGFILE       =
+
+# If the ALLEXTERNALS tag is set to YES, all external class will be listed in
+# the class index. If set to NO, only the inherited external classes will be
+# listed.
+# The default value is: NO.
+
+ALLEXTERNALS           = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES, all external groups will be listed
+# in the modules index. If set to NO, only the current project's groups will be
+# listed.
+# The default value is: YES.
+
+EXTERNAL_GROUPS        = YES
+
+# If the EXTERNAL_PAGES tag is set to YES, all external pages will be listed in
+# the related pages index. If set to NO, only the current project's pages will
+# be listed.
+# The default value is: YES.
+
+EXTERNAL_PAGES         = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+# You can include diagrams made with dia in doxygen documentation. Doxygen will
+# then run dia to produce the diagram and insert it in the documentation. The
+# DIA_PATH tag allows you to specify the directory where the dia binary resides.
+# If left empty dia is assumed to be found in the default search path.
+
+DIA_PATH               =
+
+# If set to YES the inheritance and collaboration graphs will hide inheritance
+# and usage relations if the target is undocumented or is not a class.
+# The default value is: YES.
+
+HIDE_UNDOC_RELATIONS   = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz (see:
+# http://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent
+# Bell Labs. The other options in this section have no effect if this option is
+# set to NO
+# The default value is: NO.
+
+HAVE_DOT               = NO
+
+# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is allowed
+# to run in parallel. When set to 0 doxygen will base this on the number of
+# processors available in the system. You can set it explicitly to a value
+# larger than 0 to get control over the balance between CPU load and processing
+# speed.
+# Minimum value: 0, maximum value: 32, default value: 0.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_NUM_THREADS        = 0
+
+# DOT_COMMON_ATTR is common attributes for nodes, edges and labels of
+# subgraphs. When you want a differently looking font in the dot files that
+# doxygen generates you can specify fontname, fontcolor and fontsize attributes.
+# For details please see <a href=https://graphviz.org/doc/info/attrs.html>Node,
+# Edge and Graph Attributes specification</a> You need to make sure dot is able
+# to find the font, which can be done by putting it in a standard location or by
+# setting the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the
+# directory containing the font. Default graphviz fontsize is 14.
+# The default value is: fontname=Helvetica,fontsize=10.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_COMMON_ATTR        = "fontname=Helvetica,fontsize=10"
+
+# DOT_EDGE_ATTR is concatenated with DOT_COMMON_ATTR. For elegant style you can
+# add 'arrowhead=open, arrowtail=open, arrowsize=0.5'. <a
+# href=https://graphviz.org/doc/info/arrows.html>Complete documentation about
+# arrows shapes.</a>
+# The default value is: labelfontname=Helvetica,labelfontsize=10.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_EDGE_ATTR          = "labelfontname=Helvetica,labelfontsize=10"
+
+# DOT_NODE_ATTR is concatenated with DOT_COMMON_ATTR. For view without boxes
+# around nodes set 'shape=plain' or 'shape=plaintext' <a
+# href=https://www.graphviz.org/doc/info/shapes.html>Shapes specification</a>
+# The default value is: shape=box,height=0.2,width=0.4.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_NODE_ATTR          = "shape=box,height=0.2,width=0.4"
+
+# You can set the path where dot can find font specified with fontname in
+# DOT_COMMON_ATTR and others dot attributes.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTPATH           =
+
+# If the CLASS_GRAPH tag is set to YES (or GRAPH) then doxygen will generate a
+# graph for each documented class showing the direct and indirect inheritance
+# relations. In case HAVE_DOT is set as well dot will be used to draw the graph,
+# otherwise the built-in generator will be used. If the CLASS_GRAPH tag is set
+# to TEXT the direct and indirect inheritance relations will be shown as texts /
+# links.
+# Possible values are: NO, YES, TEXT and GRAPH.
+# The default value is: YES.
+
+CLASS_GRAPH            = YES
+
+# If the COLLABORATION_GRAPH tag is set to YES then doxygen will generate a
+# graph for each documented class showing the direct and indirect implementation
+# dependencies (inheritance, containment, and class references variables) of the
+# class with other documented classes.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+COLLABORATION_GRAPH    = YES
+
+# If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for
+# groups, showing the direct groups dependencies. See also the chapter Grouping
+# in the manual.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GROUP_GRAPHS           = YES
+
+# If the UML_LOOK tag is set to YES, doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
+# Language.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+UML_LOOK               = NO
+
+# If the UML_LOOK tag is enabled, the fields and methods are shown inside the
+# class node. If there are many fields or methods and many nodes the graph may
+# become too big to be useful. The UML_LIMIT_NUM_FIELDS threshold limits the
+# number of items for each type to make the size more manageable. Set this to 0
+# for no limit. Note that the threshold may be exceeded by 50% before the limit
+# is enforced. So when you set the threshold to 10, up to 15 fields may appear,
+# but if the number exceeds 15, the total amount of fields shown is limited to
+# 10.
+# Minimum value: 0, maximum value: 100, default value: 10.
+# This tag requires that the tag UML_LOOK is set to YES.
+
+UML_LIMIT_NUM_FIELDS   = 10
+
+# If the DOT_UML_DETAILS tag is set to NO, doxygen will show attributes and
+# methods without types and arguments in the UML graphs. If the DOT_UML_DETAILS
+# tag is set to YES, doxygen will add type and arguments for attributes and
+# methods in the UML graphs. If the DOT_UML_DETAILS tag is set to NONE, doxygen
+# will not generate fields with class member information in the UML graphs. The
+# class diagrams will look similar to the default class diagrams but using UML
+# notation for the relationships.
+# Possible values are: NO, YES and NONE.
+# The default value is: NO.
+# This tag requires that the tag UML_LOOK is set to YES.
+
+DOT_UML_DETAILS        = NO
+
+# The DOT_WRAP_THRESHOLD tag can be used to set the maximum number of characters
+# to display on a single line. If the actual line length exceeds this threshold
+# significantly it will wrapped across multiple lines. Some heuristics are apply
+# to avoid ugly line breaks.
+# Minimum value: 0, maximum value: 1000, default value: 17.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_WRAP_THRESHOLD     = 17
+
+# If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and
+# collaboration graphs will show the relations between templates and their
+# instances.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+TEMPLATE_RELATIONS     = NO
+
+# If the INCLUDE_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are set to
+# YES then doxygen will generate a graph for each documented file showing the
+# direct and indirect include dependencies of the file with other documented
+# files.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INCLUDE_GRAPH          = YES
+
+# If the INCLUDED_BY_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are
+# set to YES then doxygen will generate a graph for each documented file showing
+# the direct and indirect include dependencies of the file with other documented
+# files.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INCLUDED_BY_GRAPH      = YES
+
+# If the CALL_GRAPH tag is set to YES then doxygen will generate a call
+# dependency graph for every global function or class method.
+#
+# Note that enabling this option will significantly increase the time of a run.
+# So in most cases it will be better to enable call graphs for selected
+# functions only using the \callgraph command. Disabling a call graph can be
+# accomplished by means of the command \hidecallgraph.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CALL_GRAPH             = YES
+
+# If the CALLER_GRAPH tag is set to YES then doxygen will generate a caller
+# dependency graph for every global function or class method.
+#
+# Note that enabling this option will significantly increase the time of a run.
+# So in most cases it will be better to enable caller graphs for selected
+# functions only using the \callergraph command. Disabling a caller graph can be
+# accomplished by means of the command \hidecallergraph.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CALLER_GRAPH           = YES
+
+# If the GRAPHICAL_HIERARCHY tag is set to YES then doxygen will graphical
+# hierarchy of all classes instead of a textual one.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GRAPHICAL_HIERARCHY    = YES
+
+# If the DIRECTORY_GRAPH tag is set to YES then doxygen will show the
+# dependencies a directory has on other directories in a graphical way. The
+# dependency relations are determined by the #include relations between the
+# files in the directories.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DIRECTORY_GRAPH        = YES
+
+# The DIR_GRAPH_MAX_DEPTH tag can be used to limit the maximum number of levels
+# of child directories generated in directory dependency graphs by dot.
+# Minimum value: 1, maximum value: 25, default value: 1.
+# This tag requires that the tag DIRECTORY_GRAPH is set to YES.
+
+DIR_GRAPH_MAX_DEPTH    = 1
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot. For an explanation of the image formats see the section
+# output formats in the documentation of the dot tool (Graphviz (see:
+# http://www.graphviz.org/)).
+# Note: If you choose svg you need to set HTML_FILE_EXTENSION to xhtml in order
+# to make the SVG files visible in IE 9+ (other browsers do not have this
+# requirement).
+# Possible values are: png, jpg, gif, svg, png:gd, png:gd:gd, png:cairo,
+# png:cairo:gd, png:cairo:cairo, png:cairo:gdiplus, png:gdiplus and
+# png:gdiplus:gdiplus.
+# The default value is: png.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_IMAGE_FORMAT       = png
+
+# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to
+# enable generation of interactive SVG images that allow zooming and panning.
+#
+# Note that this requires a modern browser other than Internet Explorer. Tested
+# and working are Firefox, Chrome, Safari, and Opera.
+# Note: For IE 9+ you need to set HTML_FILE_EXTENSION to xhtml in order to make
+# the SVG files visible. Older versions of IE do not have SVG support.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INTERACTIVE_SVG        = NO
+
+# The DOT_PATH tag can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found in the path.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_PATH               =
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the \dotfile
+# command).
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOTFILE_DIRS           =
+
+# The MSCFILE_DIRS tag can be used to specify one or more directories that
+# contain msc files that are included in the documentation (see the \mscfile
+# command).
+
+MSCFILE_DIRS           =
+
+# The DIAFILE_DIRS tag can be used to specify one or more directories that
+# contain dia files that are included in the documentation (see the \diafile
+# command).
+
+DIAFILE_DIRS           =
+
+# When using plantuml, the PLANTUML_JAR_PATH tag should be used to specify the
+# path where java can find the plantuml.jar file or to the filename of jar file
+# to be used. If left blank, it is assumed PlantUML is not used or called during
+# a preprocessing step. Doxygen will generate a warning when it encounters a
+# \startuml command in this case and will not generate output for the diagram.
+
+PLANTUML_JAR_PATH      =
+
+# When using plantuml, the PLANTUML_CFG_FILE tag can be used to specify a
+# configuration file for plantuml.
+
+PLANTUML_CFG_FILE      =
+
+# When using plantuml, the specified paths are searched for files specified by
+# the !include statement in a plantuml block.
+
+PLANTUML_INCLUDE_PATH  =
+
+# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of nodes
+# that will be shown in the graph. If the number of nodes in a graph becomes
+# larger than this value, doxygen will truncate the graph, which is visualized
+# by representing a node as a red box. Note that doxygen if the number of direct
+# children of the root node in a graph is already larger than
+# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note that
+# the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
+# Minimum value: 0, maximum value: 10000, default value: 50.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_GRAPH_MAX_NODES    = 50
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the graphs
+# generated by dot. A depth value of 3 means that only nodes reachable from the
+# root by following a path via at most 3 edges will be shown. Nodes that lay
+# further from the root node will be omitted. Note that setting this option to 1
+# or 2 may greatly reduce the computation time needed for large code bases. Also
+# note that the size of a graph can be further restricted by
+# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
+# Minimum value: 0, maximum value: 1000, default value: 0.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+MAX_DOT_GRAPH_DEPTH    = 0
+
+# Set the DOT_MULTI_TARGETS tag to YES to allow dot to generate multiple output
+# files in one run (i.e. multiple -o and -T options on the command line). This
+# makes dot run faster, but since only newer versions of dot (>1.8.10) support
+# this, this feature is disabled by default.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_MULTI_TARGETS      = NO
+
+# If the GENERATE_LEGEND tag is set to YES doxygen will generate a legend page
+# explaining the meaning of the various boxes and arrows in the dot generated
+# graphs.
+# Note: This tag requires that UML_LOOK isn't set, i.e. the doxygen internal
+# graphical representation for inheritance and collaboration diagrams is used.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GENERATE_LEGEND        = YES
+
+# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate
+# files that are used to generate the various graphs.
+#
+# Note: This setting is not only used for dot files but also for msc temporary
+# files.
+# The default value is: YES.
+
+DOT_CLEANUP            = YES
diff --git a/tools/prepare_doxygen.sh b/tools/prepare_doxygen.sh
new file mode 100755
index 000000000..68139ad67
--- /dev/null
+++ b/tools/prepare_doxygen.sh
@@ -0,0 +1,22 @@
+#!/bin/sh
+set -e
+
+PACKAGE_URL="https://github.com/jothepro/doxygen-awesome-css.git"
+PACKAGE_VERSION="v2.1.0"
+
+BASE_DIR=$(pwd)
+THEME_DIR="$BASE_DIR/docs/theme"
+WORKSPACE=$(mktemp -d 2> /dev/null || mktemp -d -t 'tmp')
+cleanup () {
+  EXIT_CODE=$?
+  [ -d "$WORKSPACE" ] && rm -rf "$WORKSPACE"
+  exit $EXIT_CODE
+}
+
+trap cleanup INT TERM EXIT
+
+cd "$WORKSPACE"
+git clone --depth=1 --branch "$PACKAGE_VERSION" "$PACKAGE_URL" theme
+rm -rf "$THEME_DIR"
+makedir -p "$THEME_DIR"
+mv "$WORKSPACE/theme" "$THEME_DIR"
diff --git a/tools/release.py b/tools/release.py
index 8f488f67f..58442b8ab 100755
--- a/tools/release.py
+++ b/tools/release.py
@@ -135,6 +135,11 @@ def topaddedversionstring(major, minor, rev):
 
 print("modified "+cmakefile+", a backup was made")
 
+doxygenfile = maindir + os.sep + "doxygen.txt"
+
+for line in fileinput.input(doxygenfile, inplace=1, backup='.bak'):
+    line = re.sub('PROJECT_NUMBER         = "\d+\.\d+\.\d+','PROJECT_NUMBER         = "'+newversionstring, line.rstrip())
+    print(line)
 
 print("Please run the tests before issuing a release: "+scriptlocation + "/prereleasetests.sh \n")
 print("to issue release, enter \n git commit -a \n git push \n git tag -a v"+toversionstring(*newversion)+" -m \"version "+toversionstring(*newversion)+"\"\n git push --tags \n")

From 48a6d8786e90ed22b9e99ee047ceb1a461484111 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Mon, 30 Jan 2023 13:40:01 -0500
Subject: [PATCH 057/162] Fixing.

---
 tools/prepare_doxygen.sh | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100755 tools/prepare_doxygen.sh

diff --git a/tools/prepare_doxygen.sh b/tools/prepare_doxygen.sh
new file mode 100755
index 000000000..7391ab45f
--- /dev/null
+++ b/tools/prepare_doxygen.sh
@@ -0,0 +1,22 @@
+#!/bin/sh
+set -e
+
+PACKAGE_URL="https://github.com/jothepro/doxygen-awesome-css.git"
+PACKAGE_VERSION="v2.1.0"
+
+BASE_DIR=$(pwd)
+THEME_DIR="$BASE_DIR/docs/theme"
+WORKSPACE=$(mktemp -d 2> /dev/null || mktemp -d -t 'tmp')
+cleanup () {
+  EXIT_CODE=$?
+  [ -d "$WORKSPACE" ] && rm -rf "$WORKSPACE"
+  exit $EXIT_CODE
+}
+
+trap cleanup INT TERM EXIT
+
+cd "$WORKSPACE"
+git clone --depth=1 --branch "$PACKAGE_VERSION" "$PACKAGE_URL" theme
+rm -rf "$THEME_DIR"
+mkdir -p "$THEME_DIR"
+mv "$WORKSPACE/theme" "$THEME_DIR"

From 32306113514528500d615357132f369e8dd0c8f5 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Mon, 30 Jan 2023 14:33:46 -0500
Subject: [PATCH 058/162] [skip ci] minor tweaks

---
 doxygen | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doxygen b/doxygen
index 768017761..fb0d7b70e 100644
--- a/doxygen
+++ b/doxygen
@@ -917,7 +917,7 @@ WARN_LOGFILE           =
 # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
 # Note: If this tag is empty the current directory is searched.
 
-INPUT                  = cpp/roaring.hh cpp/roaring64map.hh
+INPUT                  = README.md cpp/roaring.hh cpp/roaring64map.hh
 
 # This tag can be used to specify the character encoding of the source files
 # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses

From 67d9142b715e919a153d04a734c88a964018c4de Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Mon, 30 Jan 2023 14:39:23 -0500
Subject: [PATCH 059/162] minor glitch

---
 .github/workflows/documentation.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
index 6d59fa4c8..4089972cc 100644
--- a/.github/workflows/documentation.yml
+++ b/.github/workflows/documentation.yml
@@ -24,11 +24,11 @@ jobs:
         run: sudo apt-get install doxygen graphviz -y
       - run: mkdir docs
       - name: Install theme
-        run: ./tools/prepare-doxygen.sh
+        run: ./tools/prepare_doxygen.sh
       - name: Generate Doxygen Documentation
         run: doxygen ./doxygen
       - name: Deploy to GitHub Pages
         uses: peaceiris/actions-gh-pages@v3
         with:
           github_token: ${{ secrets.GITHUB_TOKEN }}
-          publish_dir: docs/html
\ No newline at end of file
+          publish_dir: docs/html

From 5f64aba8303815cc274b152d7745f7bbeb1080fe Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Mon, 30 Jan 2023 14:59:58 -0500
Subject: [PATCH 060/162] [skip-ci] doc update

---
 doxygen                  | 6 +++---
 tools/prepare_doxygen.sh | 3 +--
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/doxygen b/doxygen
index fb0d7b70e..e8b89a90f 100644
--- a/doxygen
+++ b/doxygen
@@ -1330,8 +1330,8 @@ HTML_STYLESHEET        =
 # documentation.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
-HTML_EXTRA_STYLESHEET  = docs/theme/doxygen-awesome.css \
-                         docs/theme/doxygen-awesome-sidebar-only.css
+HTML_EXTRA_STYLESHEET  = theme/doxygen-awesome.css \
+                         theme/doxygen-awesome-sidebar-only.css
 
 # The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
 # other source files which should be copied to the HTML output directory. Note
@@ -1341,7 +1341,7 @@ HTML_EXTRA_STYLESHEET  = docs/theme/doxygen-awesome.css \
 # files will be copied as-is; there are no commands or markers available.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
-HTML_EXTRA_FILES       = docs/theme/doxygen-awesome-darkmode-toggle.js
+HTML_EXTRA_FILES       = theme/doxygen-awesome-darkmode-toggle.js
 
 # The HTML_COLORSTYLE tag can be used to specify if the generated HTML output
 # should be rendered with a dark or light theme.
diff --git a/tools/prepare_doxygen.sh b/tools/prepare_doxygen.sh
index 7391ab45f..f04ff628c 100755
--- a/tools/prepare_doxygen.sh
+++ b/tools/prepare_doxygen.sh
@@ -5,7 +5,7 @@ PACKAGE_URL="https://github.com/jothepro/doxygen-awesome-css.git"
 PACKAGE_VERSION="v2.1.0"
 
 BASE_DIR=$(pwd)
-THEME_DIR="$BASE_DIR/docs/theme"
+THEME_DIR="$BASE_DIR/theme"
 WORKSPACE=$(mktemp -d 2> /dev/null || mktemp -d -t 'tmp')
 cleanup () {
   EXIT_CODE=$?
@@ -18,5 +18,4 @@ trap cleanup INT TERM EXIT
 cd "$WORKSPACE"
 git clone --depth=1 --branch "$PACKAGE_VERSION" "$PACKAGE_URL" theme
 rm -rf "$THEME_DIR"
-mkdir -p "$THEME_DIR"
 mv "$WORKSPACE/theme" "$THEME_DIR"

From 5526119f0535167a6afd63c63112557bf0aa30e9 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Mon, 30 Jan 2023 15:13:42 -0500
Subject: [PATCH 061/162] Update documentation.yml

---
 .github/workflows/documentation.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
index 4089972cc..54e515a6f 100644
--- a/.github/workflows/documentation.yml
+++ b/.github/workflows/documentation.yml
@@ -3,7 +3,7 @@ name: Doxygen GitHub Pages
 on:
   push:
     branches:
-      - main
+      - master
   # Allows you to run this workflow manually from the Actions tab
   workflow_dispatch:
 

From b31a6ad1b28d622bf82aefcfda8b302477d8a730 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Mon, 30 Jan 2023 16:08:55 -0500
Subject: [PATCH 062/162] Minor reorg of the readme. (#426)

---
 README.md | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index 30488508a..68b8ad68e 100644
--- a/README.md
+++ b/README.md
@@ -609,8 +609,9 @@ later `roaring_bitmap_or_inplace` will be very fast.
 
 You should benchmark these alternatives on your own data to decide what is best.
 
-# Python Wrapper
+# Wrappers
 
+## Python
 Tom Cornebize wrote a Python wrapper available at https://github.com/Ezibenroc/PyRoaringBitMap
 Installing it is as easy as typing...
 
@@ -618,7 +619,7 @@ Installing it is as easy as typing...
 pip install pyroaring
 ```
 
-# JavaScript Wrapper
+## JavaScript
 
 Salvatore Previti  wrote a Node/JavaScript wrapper available at https://github.com/SalvatorePreviti/roaring-node
 Installing it is as easy as typing...
@@ -627,33 +628,33 @@ Installing it is as easy as typing...
 npm install roaring
 ```
 
-# Swift Wrapper
+## Swift
 
 Jérémie Piotte wrote a [Swift wrapper](https://github.com/RoaringBitmap/SwiftRoaring).
 
 
-# C# Wrapper
+## C#
 
 Brandon Smith wrote a C# wrapper available at https://github.com/RogueException/CRoaring.Net (works for Windows and Linux under x64 processors)
 
 
-# Go (golang) Wrapper
+## Go (golang)
 
 There is a Go (golang) wrapper available at https://github.com/RoaringBitmap/gocroaring
 
-# Rust Wrapper
+## Rust
 
 Saulius Grigaliunas wrote a Rust wrapper available at https://github.com/saulius/croaring-rs
 
-# D Wrapper
+## D
 
 Yuce Tekol wrote a D wrapper available at https://github.com/yuce/droaring
 
-# Redis Module
+## Redis
 
 Antonio Guilherme Ferreira Viggiano wrote a Redis Module available at https://github.com/aviggiano/redis-roaring
 
-# Zig Wrapper
+## Zig
 
 Justin Whear wrote a Zig wrapper available at https://github.com/jwhear/roaring-zig
 

From e74691b298f517d50aae9abadf35b3a16ea17e0c Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Mon, 30 Jan 2023 16:50:28 -0500
Subject: [PATCH 063/162] Update README.md

---
 README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/README.md b/README.md
index 68b8ad68e..bc8ed585d 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,9 @@
 # CRoaring [![Ubuntu-CI](https://github.com/RoaringBitmap/CRoaring/actions/workflows/ubuntu-noexcept-ci.yml/badge.svg)](https://github.com/RoaringBitmap/CRoaring/actions/workflows/ubuntu-noexcept-ci.yml)
 
+[![Doxygen Documentation](https://img.shields.io/badge/docs-doxygen-green.svg)](http://roaringbitmap.github.io/CRoaring/)
+
+
+
 Portable Roaring bitmaps in C (and C++) with full support for your favorite compiler (GNU GCC, LLVM's clang, Visual Studio). Included in the [Awesome C](https://github.com/kozross/awesome-c) list of open source C software.
 
 # Introduction

From 8a2ffb5d96b91d965adacf30f2da3d4f0e793c6a Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Mon, 30 Jan 2023 16:52:54 -0500
Subject: [PATCH 064/162] Tweaking.

---
 README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index bc8ed585d..3879b65d4 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,6 @@
-# CRoaring [![Ubuntu-CI](https://github.com/RoaringBitmap/CRoaring/actions/workflows/ubuntu-noexcept-ci.yml/badge.svg)](https://github.com/RoaringBitmap/CRoaring/actions/workflows/ubuntu-noexcept-ci.yml)
+# CRoaring 
+
+[![Ubuntu-CI](https://github.com/RoaringBitmap/CRoaring/actions/workflows/ubuntu-noexcept-ci.yml/badge.svg)](https://github.com/RoaringBitmap/CRoaring/actions/workflows/ubuntu-noexcept-ci.yml)
 
 [![Doxygen Documentation](https://img.shields.io/badge/docs-doxygen-green.svg)](http://roaringbitmap.github.io/CRoaring/)
 

From 3d66c0d4dbd1334ade2204b0328cbb3ab4202b79 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Mon, 30 Jan 2023 16:53:45 -0500
Subject: [PATCH 065/162] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 3879b65d4..f24f88262 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # CRoaring 
 
-[![Ubuntu-CI](https://github.com/RoaringBitmap/CRoaring/actions/workflows/ubuntu-noexcept-ci.yml/badge.svg)](https://github.com/RoaringBitmap/CRoaring/actions/workflows/ubuntu-noexcept-ci.yml)
+[![Ubuntu-CI](https://github.com/RoaringBitmap/CRoaring/actions/workflows/ubuntu-noexcept-ci.yml/badge.svg)](https://github.com/RoaringBitmap/CRoaring/actions/workflows/ubuntu-noexcept-ci.yml) [![VS17-CI](https://github.com/RoaringBitmap/CRoaring/actions/workflows/vs17-ci.yml/badge.svg)](https://github.com/RoaringBitmap/CRoaring/actions/workflows/vs17-ci.yml)
 
 [![Doxygen Documentation](https://img.shields.io/badge/docs-doxygen-green.svg)](http://roaringbitmap.github.io/CRoaring/)
 

From f225e039b0a6e25343f0c39da4cc34c936f76967 Mon Sep 17 00:00:00 2001
From: Salvatore Previti <roorback@gmail.com>
Date: Tue, 31 Jan 2023 21:13:29 +0000
Subject: [PATCH 066/162] fix use of arena_alloc for C++ compilers when
 roaring.c is compiled as C++ source (for example in roaring-node) (#427)

---
 src/roaring.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/roaring.c b/src/roaring.c
index 64291a639..3d3d797f8 100644
--- a/src/roaring.c
+++ b/src/roaring.c
@@ -3281,8 +3281,8 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf) {
         (container_t **)arena_alloc(&arena,
                                     sizeof(container_t*) * num_containers);
 
-    uint16_t *keys = arena_alloc(&arena, num_containers * sizeof(uint16_t));
-    uint8_t *typecodes = arena_alloc(&arena, num_containers * sizeof(uint8_t));
+    uint16_t *keys = (uint16_t *)arena_alloc(&arena, num_containers * sizeof(uint16_t));
+    uint8_t *typecodes = (uint8_t *)arena_alloc(&arena, num_containers * sizeof(uint8_t));
 
     rb->high_low_container.keys = keys;
     rb->high_low_container.typecodes = typecodes;
@@ -3304,7 +3304,7 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf) {
 
         if (isbitmap) {
             typecodes[i] = BITSET_CONTAINER_TYPE;
-            bitset_container_t *c = arena_alloc(&arena, sizeof(bitset_container_t));
+            bitset_container_t *c = (bitset_container_t *)arena_alloc(&arena, sizeof(bitset_container_t));
             c->cardinality = cardinality;
             if(offset_headers != NULL) {
                 c->words = (uint64_t *) (start_of_buf + offset_headers[i]);
@@ -3315,7 +3315,7 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf) {
             rb->high_low_container.containers[i] = c;
         } else if (isrun) {
             typecodes[i] = RUN_CONTAINER_TYPE;
-            run_container_t *c = arena_alloc(&arena, sizeof(run_container_t));
+            run_container_t *c = (run_container_t *)arena_alloc(&arena, sizeof(run_container_t));
             c->capacity = cardinality;
             uint16_t n_runs;
             if(offset_headers != NULL) {
@@ -3332,7 +3332,7 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf) {
             rb->high_low_container.containers[i] = c;
         } else {
             typecodes[i] = ARRAY_CONTAINER_TYPE;
-            array_container_t *c = arena_alloc(&arena, sizeof(array_container_t));
+            array_container_t *c = (array_container_t *)arena_alloc(&arena, sizeof(array_container_t));
             c->cardinality = cardinality;
             c->capacity = cardinality;
             if(offset_headers != NULL) {

From ebd9a1757e978934c4e2d524cee104b1bea1324d Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Tue, 31 Jan 2023 16:17:27 -0500
Subject: [PATCH 067/162] Preparing the release

---
 CMakeLists.txt                    | 4 ++--
 doxygen                           | 2 +-
 include/roaring/roaring_version.h | 4 ++--
 tools/release.py                  | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 210825065..d8696c085 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,8 +18,8 @@ endif()
 set(ROARING_LIB_NAME roaring)
 set(PROJECT_VERSION_MAJOR 0)
 set(PROJECT_VERSION_MINOR 9)
-set(PROJECT_VERSION_PATCH 0)
-set(ROARING_LIB_VERSION "0.9.0" CACHE STRING "Roaring library version")
+set(PROJECT_VERSION_PATCH 1)
+set(ROARING_LIB_VERSION "0.9.1" CACHE STRING "Roaring library version")
 set(ROARING_LIB_SOVERSION "7" CACHE STRING "Roaring library soversion")
 
 option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON)
diff --git a/doxygen b/doxygen
index e8b89a90f..0ffd5d7e5 100644
--- a/doxygen
+++ b/doxygen
@@ -48,7 +48,7 @@ PROJECT_NAME           = "CRoaring"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "0.9.0"
+PROJECT_NUMBER         = "0.9.1"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/include/roaring/roaring_version.h b/include/roaring/roaring_version.h
index b7ad25e23..b90fe665b 100644
--- a/include/roaring/roaring_version.h
+++ b/include/roaring/roaring_version.h
@@ -1,10 +1,10 @@
 // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand 
 #ifndef ROARING_INCLUDE_ROARING_VERSION 
 #define ROARING_INCLUDE_ROARING_VERSION 
-#define ROARING_VERSION "0.9.0"
+#define ROARING_VERSION "0.9.1"
 enum { 
     ROARING_VERSION_MAJOR = 0,
     ROARING_VERSION_MINOR = 9,
-    ROARING_VERSION_REVISION = 0
+    ROARING_VERSION_REVISION = 1
 }; 
 #endif // ROARING_INCLUDE_ROARING_VERSION 
diff --git a/tools/release.py b/tools/release.py
index 58442b8ab..5d4ff71c6 100755
--- a/tools/release.py
+++ b/tools/release.py
@@ -135,7 +135,7 @@ def topaddedversionstring(major, minor, rev):
 
 print("modified "+cmakefile+", a backup was made")
 
-doxygenfile = maindir + os.sep + "doxygen.txt"
+doxygenfile = maindir + os.sep + "doxygen"
 
 for line in fileinput.input(doxygenfile, inplace=1, backup='.bak'):
     line = re.sub('PROJECT_NUMBER         = "\d+\.\d+\.\d+','PROJECT_NUMBER         = "'+newversionstring, line.rstrip())

From 20f3e46ac7803477fbfe3860ebcc31eb23406957 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Wed, 1 Feb 2023 21:43:17 -0500
Subject: [PATCH 068/162] Allowing unaligned SIMD accesses.

---
 include/roaring/roaring.h | 2 +-
 src/array_util.c          | 8 ++++----
 src/bitset_util.c         | 8 ++++----
 src/containers/bitset.c   | 4 ++--
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/include/roaring/roaring.h b/include/roaring/roaring.h
index e4a732136..9919e792f 100644
--- a/include/roaring/roaring.h
+++ b/include/roaring/roaring.h
@@ -541,7 +541,7 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf,
  *
  * The function is unsafe in the following ways:
  * 1) It may execute unaligned memory accesses.
- * 2) A buffer overflow may occure if buf does not point to a valid serialized
+ * 2) A buffer overflow may occur if buf does not point to a valid serialized
  *    bitmap.
  *
  * This is meant to be compatible with the Java and Go versions:
diff --git a/src/array_util.c b/src/array_util.c
index 48349105a..1daed41b6 100644
--- a/src/array_util.c
+++ b/src/array_util.c
@@ -385,7 +385,7 @@ int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a,
                 v_b, vectorlength, v_a, vectorlength,
                 _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
             const int r = _mm_extract_epi32(res_v, 0);
-            __m128i sm16 = _mm_load_si128((const __m128i *)shuffle_mask16 + r);
+            __m128i sm16 = _mm_loadu_si128((const __m128i *)shuffle_mask16 + r);
             __m128i p = _mm_shuffle_epi8(v_a, sm16);
             _mm_storeu_si128((__m128i *)&C[count], p);  // can overflow
             count += _mm_popcnt_u32(r);
@@ -409,7 +409,7 @@ int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a,
                     _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
                 const int r = _mm_extract_epi32(res_v, 0);
                 __m128i sm16 =
-                    _mm_load_si128((const __m128i *)shuffle_mask16 + r);
+                    _mm_loadu_si128((const __m128i *)shuffle_mask16 + r);
                 __m128i p = _mm_shuffle_epi8(v_a, sm16);
                 _mm_storeu_si128((__m128i *)&C[count], p);  // can overflow
                 count += _mm_popcnt_u32(r);
@@ -586,7 +586,7 @@ int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a,
                 const int bitmask_belongs_to_difference =
                     _mm_extract_epi32(runningmask_a_found_in_b, 0) ^ 0xFF;
                 /*** next few lines are probably expensive *****/
-                __m128i sm16 = _mm_load_si128((const __m128i *)shuffle_mask16 +
+                __m128i sm16 = _mm_loadu_si128((const __m128i *)shuffle_mask16 +
                                               bitmask_belongs_to_difference);
                 __m128i p = _mm_shuffle_epi8(v_a, sm16);
                 _mm_storeu_si128((__m128i *)&C[count], p);  // can overflow
@@ -621,7 +621,7 @@ int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a,
                 _mm_or_si128(runningmask_a_found_in_b, a_found_in_b);
             const int bitmask_belongs_to_difference =
                 _mm_extract_epi32(runningmask_a_found_in_b, 0) ^ 0xFF;
-            __m128i sm16 = _mm_load_si128((const __m128i *)shuffle_mask16 +
+            __m128i sm16 = _mm_loadu_si128((const __m128i *)shuffle_mask16 +
                                           bitmask_belongs_to_difference);
             __m128i p = _mm_shuffle_epi8(v_a, sm16);
             _mm_storeu_si128((__m128i *)&C[count], p);  // can overflow
diff --git a/src/bitset_util.c b/src/bitset_util.c
index 10d3d6cb2..6e3c0d7ba 100644
--- a/src/bitset_util.c
+++ b/src/bitset_util.c
@@ -573,9 +573,9 @@ size_t bitset_extract_setbits_avx2(const uint64_t *words, size_t length,
                 uint8_t byteB = (uint8_t)(w >> 8);
                 w >>= 16;
                 __m256i vecA =
-                    _mm256_load_si256((const __m256i *)vecDecodeTable[byteA]);
+                    _mm256_loadu_si256((const __m256i *)vecDecodeTable[byteA]);
                 __m256i vecB =
-                    _mm256_load_si256((const __m256i *)vecDecodeTable[byteB]);
+                    _mm256_loadu_si256((const __m256i *)vecDecodeTable[byteB]);
                 uint8_t advanceA = lengthTable[byteA];
                 uint8_t advanceB = lengthTable[byteB];
                 vecA = _mm256_add_epi32(baseVec, vecA);
@@ -678,9 +678,9 @@ size_t bitset_extract_setbits_sse_uint16(const uint64_t *words, size_t length,
                 uint8_t byteA = (uint8_t)w;
                 uint8_t byteB = (uint8_t)(w >> 8);
                 w >>= 16;
-                __m128i vecA = _mm_load_si128(
+                __m128i vecA = _mm_loadu_si128(
                     (const __m128i *)vecDecodeTable_uint16[byteA]);
-                __m128i vecB = _mm_load_si128(
+                __m128i vecB = _mm_loadu_si128(
                     (const __m128i *)vecDecodeTable_uint16[byteB]);
                 uint8_t advanceA = lengthTable[byteA];
                 uint8_t advanceB = lengthTable[byteB];
diff --git a/src/containers/bitset.c b/src/containers/bitset.c
index 25248e00e..4eb21dd70 100644
--- a/src/containers/bitset.c
+++ b/src/containers/bitset.c
@@ -822,8 +822,8 @@ static inline bool _avx2_bitset_container_equals(const bitset_container_t *conta
     const __m256i *ptr1 = (const __m256i*)container1->words;
     const __m256i *ptr2 = (const __m256i*)container2->words;
     for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS*sizeof(uint64_t)/32; i++) {
-      __m256i r1 = _mm256_load_si256(ptr1+i);
-      __m256i r2 = _mm256_load_si256(ptr2+i);
+      __m256i r1 = _mm256_loadu_si256(ptr1+i);
+      __m256i r2 = _mm256_loadu_si256(ptr2+i);
       int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(r1, r2));
       if ((uint32_t)mask != UINT32_MAX) {
           return false;

From 0c1fd6bb494eb1601338bdc544dd4b9a9f17c9c6 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Thu, 2 Feb 2023 14:45:39 -0500
Subject: [PATCH 069/162] New release.

---
 CMakeLists.txt                    | 4 ++--
 doxygen                           | 2 +-
 include/roaring/roaring_version.h | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index d8696c085..be2e657d8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,8 +18,8 @@ endif()
 set(ROARING_LIB_NAME roaring)
 set(PROJECT_VERSION_MAJOR 0)
 set(PROJECT_VERSION_MINOR 9)
-set(PROJECT_VERSION_PATCH 1)
-set(ROARING_LIB_VERSION "0.9.1" CACHE STRING "Roaring library version")
+set(PROJECT_VERSION_PATCH 2)
+set(ROARING_LIB_VERSION "0.9.2" CACHE STRING "Roaring library version")
 set(ROARING_LIB_SOVERSION "7" CACHE STRING "Roaring library soversion")
 
 option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON)
diff --git a/doxygen b/doxygen
index 0ffd5d7e5..9a99cacd9 100644
--- a/doxygen
+++ b/doxygen
@@ -48,7 +48,7 @@ PROJECT_NAME           = "CRoaring"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "0.9.1"
+PROJECT_NUMBER         = "0.9.2"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/include/roaring/roaring_version.h b/include/roaring/roaring_version.h
index b90fe665b..6f98d98d9 100644
--- a/include/roaring/roaring_version.h
+++ b/include/roaring/roaring_version.h
@@ -1,10 +1,10 @@
 // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand 
 #ifndef ROARING_INCLUDE_ROARING_VERSION 
 #define ROARING_INCLUDE_ROARING_VERSION 
-#define ROARING_VERSION "0.9.1"
+#define ROARING_VERSION "0.9.2"
 enum { 
     ROARING_VERSION_MAJOR = 0,
     ROARING_VERSION_MINOR = 9,
-    ROARING_VERSION_REVISION = 1
+    ROARING_VERSION_REVISION = 2
 }; 
 #endif // ROARING_INCLUDE_ROARING_VERSION 

From 9e47d6254c36691a75ded519707a751fd84a6767 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Fri, 3 Feb 2023 17:52:53 -0500
Subject: [PATCH 070/162] Fix memory leak. (#430)

---
 include/roaring/containers/containers.h |  2 +-
 tests/toplevel_unit.c                   | 13 +++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/include/roaring/containers/containers.h b/include/roaring/containers/containers.h
index ad78515d1..782bf1370 100644
--- a/include/roaring/containers/containers.h
+++ b/include/roaring/containers/containers.h
@@ -1179,7 +1179,7 @@ static inline container_t *container_lazy_or(
                                 CAST_run(result));
             *result_type = RUN_CONTAINER_TYPE;
             // we are being lazy
-            result = convert_run_to_efficient_container(
+            result = convert_run_to_efficient_container_and_free(
                 CAST_run(result), result_type);
             return result;
 
diff --git a/tests/toplevel_unit.c b/tests/toplevel_unit.c
index 1a1e8ab17..b0472839d 100644
--- a/tests/toplevel_unit.c
+++ b/tests/toplevel_unit.c
@@ -47,6 +47,18 @@ bool roaring_iterator_sumall(uint32_t value, void *param) {
     return true;  // continue till the end
 }
 
+DEFINE_TEST(issue429) {
+  // This is a memory leak test, so we don't need to check the results.
+  roaring_bitmap_t *b1 = roaring_bitmap_create();
+  roaring_bitmap_add_range(b1, 0, 100);
+  roaring_bitmap_remove_range(b1, 0, 99);
+  roaring_bitmap_t *b2 = roaring_bitmap_copy(b1);
+  const roaring_bitmap_t *bitmaps[] = {b1, b2};
+  roaring_bitmap_t *result = roaring_bitmap_or_many_heap(2, bitmaps);
+  roaring_bitmap_free(result);
+  roaring_bitmap_free(b2);
+  roaring_bitmap_free(b1);
+}
 
 DEFINE_TEST(range_contains) {
     uint32_t end = 2073952257;
@@ -4345,6 +4357,7 @@ int main() {
     tellmeall();
 
     const struct CMUnitTest tests[] = {
+        cmocka_unit_test(issue429),
         cmocka_unit_test(test_contains_range_PyRoaringBitMap_issue81),
         cmocka_unit_test(issue316),
         cmocka_unit_test(issue288),

From 101a0cac6bb29f38590b4c3a9371f4b156437b15 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Fri, 3 Feb 2023 18:41:11 -0500
Subject: [PATCH 071/162] New release.

---
 CMakeLists.txt                    | 4 ++--
 doxygen                           | 2 +-
 include/roaring/roaring_version.h | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index be2e657d8..570227228 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,8 +18,8 @@ endif()
 set(ROARING_LIB_NAME roaring)
 set(PROJECT_VERSION_MAJOR 0)
 set(PROJECT_VERSION_MINOR 9)
-set(PROJECT_VERSION_PATCH 2)
-set(ROARING_LIB_VERSION "0.9.2" CACHE STRING "Roaring library version")
+set(PROJECT_VERSION_PATCH 3)
+set(ROARING_LIB_VERSION "0.9.3" CACHE STRING "Roaring library version")
 set(ROARING_LIB_SOVERSION "7" CACHE STRING "Roaring library soversion")
 
 option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON)
diff --git a/doxygen b/doxygen
index 9a99cacd9..5654511f1 100644
--- a/doxygen
+++ b/doxygen
@@ -48,7 +48,7 @@ PROJECT_NAME           = "CRoaring"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "0.9.2"
+PROJECT_NUMBER         = "0.9.3"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/include/roaring/roaring_version.h b/include/roaring/roaring_version.h
index 6f98d98d9..bc71c2cab 100644
--- a/include/roaring/roaring_version.h
+++ b/include/roaring/roaring_version.h
@@ -1,10 +1,10 @@
 // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand 
 #ifndef ROARING_INCLUDE_ROARING_VERSION 
 #define ROARING_INCLUDE_ROARING_VERSION 
-#define ROARING_VERSION "0.9.2"
+#define ROARING_VERSION "0.9.3"
 enum { 
     ROARING_VERSION_MAJOR = 0,
     ROARING_VERSION_MINOR = 9,
-    ROARING_VERSION_REVISION = 2
+    ROARING_VERSION_REVISION = 3
 }; 
 #endif // ROARING_INCLUDE_ROARING_VERSION 

From 23a1b3687fe02af17d554a49fb8ded562d681b42 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Mon, 6 Feb 2023 08:52:07 -0500
Subject: [PATCH 072/162] Tweak.

---
 doxygen | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/doxygen b/doxygen
index 5654511f1..efc80ee41 100644
--- a/doxygen
+++ b/doxygen
@@ -1331,7 +1331,8 @@ HTML_STYLESHEET        =
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
 HTML_EXTRA_STYLESHEET  = theme/doxygen-awesome.css \
-                         theme/doxygen-awesome-sidebar-only.css
+                         theme/doxygen-awesome-sidebar-only.css \
+                         theme/doxygen-awesome-sidebar-only-darkmode-toggle.css
 
 # The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
 # other source files which should be copied to the HTML output directory. Note
@@ -1341,7 +1342,10 @@ HTML_EXTRA_STYLESHEET  = theme/doxygen-awesome.css \
 # files will be copied as-is; there are no commands or markers available.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
-HTML_EXTRA_FILES       = theme/doxygen-awesome-darkmode-toggle.js
+HTML_EXTRA_FILES       = theme/doxygen-awesome-darkmode-toggle.js \
+                         theme/doxygen-awesome-interactive-toc.js \
+                         theme/doxygen-awesome-fragment-copy-button.js \
+                         theme/doxygen-awesome-paragraph-link.js
 
 # The HTML_COLORSTYLE tag can be used to specify if the generated HTML output
 # should be rendered with a dark or light theme.

From 2e026d8d015cc3eb982139492f54bdae6af747b6 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Tue, 7 Feb 2023 10:40:46 -0500
Subject: [PATCH 073/162] Verifying and testing issue 431. (#432)

---
 src/containers/mixed_subset.c |  2 +-
 tests/toplevel_unit.c         | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/containers/mixed_subset.c b/src/containers/mixed_subset.c
index af6f03e79..86fe098f5 100644
--- a/src/containers/mixed_subset.c
+++ b/src/containers/mixed_subset.c
@@ -32,7 +32,7 @@ bool run_container_is_subset_array(const run_container_t* container1,
                                  container2->cardinality, start);
         stop_pos = advanceUntil(container2->array, stop_pos,
                                 container2->cardinality, stop);
-        if (start_pos == container2->cardinality) {
+        if (stop_pos == container2->cardinality) {
             return false;
         } else if (stop_pos - start_pos != stop - start ||
                    container2->array[start_pos] != start ||
diff --git a/tests/toplevel_unit.c b/tests/toplevel_unit.c
index b0472839d..cd1a42137 100644
--- a/tests/toplevel_unit.c
+++ b/tests/toplevel_unit.c
@@ -60,6 +60,19 @@ DEFINE_TEST(issue429) {
   roaring_bitmap_free(b1);
 }
 
+
+DEFINE_TEST(issue431) {
+  // This is a memory access test, so we don't need to check the results.
+  roaring_bitmap_t *b1 = roaring_bitmap_create();
+  roaring_bitmap_add(b1, 100);
+  roaring_bitmap_flip_inplace(b1, 0, 100 + 1);
+  roaring_bitmap_t *b2 = roaring_bitmap_create();
+  roaring_bitmap_add_range(b2, 50, 100 + 1);
+  roaring_bitmap_is_subset(b2, b1);
+  roaring_bitmap_free(b2);
+  roaring_bitmap_free(b1);
+}
+
 DEFINE_TEST(range_contains) {
     uint32_t end = 2073952257;
     uint32_t start = end-2;
@@ -4358,6 +4371,7 @@ int main() {
 
     const struct CMUnitTest tests[] = {
         cmocka_unit_test(issue429),
+        cmocka_unit_test(issue431),
         cmocka_unit_test(test_contains_range_PyRoaringBitMap_issue81),
         cmocka_unit_test(issue316),
         cmocka_unit_test(issue288),

From dd00cd8083fbe29a24109d5e79bd66155fbfad3a Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Tue, 7 Feb 2023 10:41:52 -0500
Subject: [PATCH 074/162] Preparing new release

---
 CMakeLists.txt                    | 4 ++--
 doxygen                           | 2 +-
 include/roaring/roaring_version.h | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 570227228..e4903943e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,8 +18,8 @@ endif()
 set(ROARING_LIB_NAME roaring)
 set(PROJECT_VERSION_MAJOR 0)
 set(PROJECT_VERSION_MINOR 9)
-set(PROJECT_VERSION_PATCH 3)
-set(ROARING_LIB_VERSION "0.9.3" CACHE STRING "Roaring library version")
+set(PROJECT_VERSION_PATCH 4)
+set(ROARING_LIB_VERSION "0.9.4" CACHE STRING "Roaring library version")
 set(ROARING_LIB_SOVERSION "7" CACHE STRING "Roaring library soversion")
 
 option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON)
diff --git a/doxygen b/doxygen
index efc80ee41..4286e1a9f 100644
--- a/doxygen
+++ b/doxygen
@@ -48,7 +48,7 @@ PROJECT_NAME           = "CRoaring"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "0.9.3"
+PROJECT_NUMBER         = "0.9.4"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/include/roaring/roaring_version.h b/include/roaring/roaring_version.h
index bc71c2cab..36174e1de 100644
--- a/include/roaring/roaring_version.h
+++ b/include/roaring/roaring_version.h
@@ -1,10 +1,10 @@
 // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand 
 #ifndef ROARING_INCLUDE_ROARING_VERSION 
 #define ROARING_INCLUDE_ROARING_VERSION 
-#define ROARING_VERSION "0.9.3"
+#define ROARING_VERSION "0.9.4"
 enum { 
     ROARING_VERSION_MAJOR = 0,
     ROARING_VERSION_MINOR = 9,
-    ROARING_VERSION_REVISION = 3
+    ROARING_VERSION_REVISION = 4
 }; 
 #endif // ROARING_INCLUDE_ROARING_VERSION 

From e52bd3465359d9e6ba6dfd77c5e613ab083fa66f Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Thu, 9 Feb 2023 16:35:43 -0500
Subject: [PATCH 075/162] Fix for issue 433 (#434)

---
 include/roaring/containers/containers.h |  2 +-
 tests/toplevel_unit.c                   | 16 ++++++++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/include/roaring/containers/containers.h b/include/roaring/containers/containers.h
index 782bf1370..2f7c1e4d7 100644
--- a/include/roaring/containers/containers.h
+++ b/include/roaring/containers/containers.h
@@ -2430,7 +2430,7 @@ static inline container_t *container_remove_range(
 
             if (result_cardinality == 0) {
                 return NULL;
-            } else if (result_cardinality < DEFAULT_MAX_SIZE) {
+            } else if (result_cardinality <= DEFAULT_MAX_SIZE) {
                 *result_type = ARRAY_CONTAINER_TYPE;
                 bitset_reset_range(bitset->words, min, max+1);
                 bitset->cardinality = result_cardinality;
diff --git a/tests/toplevel_unit.c b/tests/toplevel_unit.c
index cd1a42137..a1285d548 100644
--- a/tests/toplevel_unit.c
+++ b/tests/toplevel_unit.c
@@ -73,6 +73,21 @@ DEFINE_TEST(issue431) {
   roaring_bitmap_free(b1);
 }
 
+DEFINE_TEST(issue433) {
+  roaring_bitmap_t *b1 = roaring_bitmap_create();
+  roaring_bitmap_add(b1, 262143);
+  roaring_bitmap_add_range_closed(b1, 258047, 262143);
+  roaring_bitmap_remove_range_closed(b1, 262143, 262143);
+  size_t len = roaring_bitmap_portable_size_in_bytes(b1);
+  char *data = roaring_malloc(len);
+  roaring_bitmap_portable_serialize(b1, data);
+  roaring_bitmap_t *b2 = roaring_bitmap_portable_deserialize_safe(data, len);
+  assert_true(roaring_bitmap_equals(b1, b2));
+  roaring_bitmap_free(b2);
+  roaring_bitmap_free(b1);
+  roaring_free(data);
+}
+
 DEFINE_TEST(range_contains) {
     uint32_t end = 2073952257;
     uint32_t start = end-2;
@@ -4370,6 +4385,7 @@ int main() {
     tellmeall();
 
     const struct CMUnitTest tests[] = {
+        cmocka_unit_test(issue433),
         cmocka_unit_test(issue429),
         cmocka_unit_test(issue431),
         cmocka_unit_test(test_contains_range_PyRoaringBitMap_issue81),

From b729ef156412bfeb018968fbe7ded51d00790c07 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Thu, 9 Feb 2023 16:36:34 -0500
Subject: [PATCH 076/162] Issuing release

---
 CMakeLists.txt                    | 4 ++--
 doxygen                           | 2 +-
 include/roaring/roaring_version.h | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e4903943e..1d1135f64 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,8 +18,8 @@ endif()
 set(ROARING_LIB_NAME roaring)
 set(PROJECT_VERSION_MAJOR 0)
 set(PROJECT_VERSION_MINOR 9)
-set(PROJECT_VERSION_PATCH 4)
-set(ROARING_LIB_VERSION "0.9.4" CACHE STRING "Roaring library version")
+set(PROJECT_VERSION_PATCH 5)
+set(ROARING_LIB_VERSION "0.9.5" CACHE STRING "Roaring library version")
 set(ROARING_LIB_SOVERSION "7" CACHE STRING "Roaring library soversion")
 
 option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON)
diff --git a/doxygen b/doxygen
index 4286e1a9f..3511661ec 100644
--- a/doxygen
+++ b/doxygen
@@ -48,7 +48,7 @@ PROJECT_NAME           = "CRoaring"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "0.9.4"
+PROJECT_NUMBER         = "0.9.5"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/include/roaring/roaring_version.h b/include/roaring/roaring_version.h
index 36174e1de..52d70abae 100644
--- a/include/roaring/roaring_version.h
+++ b/include/roaring/roaring_version.h
@@ -1,10 +1,10 @@
 // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand 
 #ifndef ROARING_INCLUDE_ROARING_VERSION 
 #define ROARING_INCLUDE_ROARING_VERSION 
-#define ROARING_VERSION "0.9.4"
+#define ROARING_VERSION "0.9.5"
 enum { 
     ROARING_VERSION_MAJOR = 0,
     ROARING_VERSION_MINOR = 9,
-    ROARING_VERSION_REVISION = 4
+    ROARING_VERSION_REVISION = 5
 }; 
 #endif // ROARING_INCLUDE_ROARING_VERSION 

From 2fb8622a788995169a4838151f67b0e8ea502fce Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Sat, 11 Feb 2023 17:18:39 -0500
Subject: [PATCH 077/162] Verifies and fixes issue 436 (#437)

* Adding test.

* Fixing bug.
---
 include/roaring/containers/containers.h | 10 +---------
 tests/toplevel_unit.c                   | 21 +++++++++++++++++++++
 2 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/include/roaring/containers/containers.h b/include/roaring/containers/containers.h
index 2f7c1e4d7..47001da0b 100644
--- a/include/roaring/containers/containers.h
+++ b/include/roaring/containers/containers.h
@@ -2469,15 +2469,7 @@ static inline container_t *container_remove_range(
             }
 
             run_container_remove_range(run, min, max);
-
-            if (run_container_serialized_size_in_bytes(run->n_runs) <=
-                    bitset_container_serialized_size_in_bytes()) {
-                *result_type = RUN_CONTAINER_TYPE;
-                return run;
-            } else {
-                *result_type = BITSET_CONTAINER_TYPE;
-                return bitset_container_from_run(run);
-            }
+            return convert_run_to_efficient_container(run, result_type);
         }
         default:
             __builtin_unreachable();
diff --git a/tests/toplevel_unit.c b/tests/toplevel_unit.c
index a1285d548..c1765a9a4 100644
--- a/tests/toplevel_unit.c
+++ b/tests/toplevel_unit.c
@@ -88,6 +88,26 @@ DEFINE_TEST(issue433) {
   roaring_free(data);
 }
 
+
+
+DEFINE_TEST(issue436) {
+  roaring_bitmap_t *b1 = roaring_bitmap_create();
+  roaring_bitmap_add_range_closed(b1, 19711, 262068);
+  for (int i = 0; i < 0x10000; i += 2) {
+    roaring_bitmap_add(b1, i);
+  }
+  roaring_bitmap_printf_describe(b1);
+  roaring_bitmap_remove_range_closed(b1, 6143, 65505);
+  size_t len = roaring_bitmap_portable_size_in_bytes(b1);
+  char *data = roaring_malloc(len);
+  roaring_bitmap_portable_serialize(b1, data);
+  roaring_bitmap_t *b2 = roaring_bitmap_portable_deserialize_safe(data, len);
+  assert_true(roaring_bitmap_equals(b1, b2));
+  roaring_bitmap_free(b2);
+  roaring_bitmap_free(b1);
+  roaring_free(data);
+}
+
 DEFINE_TEST(range_contains) {
     uint32_t end = 2073952257;
     uint32_t start = end-2;
@@ -4385,6 +4405,7 @@ int main() {
     tellmeall();
 
     const struct CMUnitTest tests[] = {
+        cmocka_unit_test(issue436),
         cmocka_unit_test(issue433),
         cmocka_unit_test(issue429),
         cmocka_unit_test(issue431),

From f2adf9aaa57e2e6b001eee8383312311b644772d Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Sat, 11 Feb 2023 17:19:10 -0500
Subject: [PATCH 078/162] Patch release

---
 CMakeLists.txt                    | 4 ++--
 doxygen                           | 2 +-
 include/roaring/roaring_version.h | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1d1135f64..a8190197d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,8 +18,8 @@ endif()
 set(ROARING_LIB_NAME roaring)
 set(PROJECT_VERSION_MAJOR 0)
 set(PROJECT_VERSION_MINOR 9)
-set(PROJECT_VERSION_PATCH 5)
-set(ROARING_LIB_VERSION "0.9.5" CACHE STRING "Roaring library version")
+set(PROJECT_VERSION_PATCH 6)
+set(ROARING_LIB_VERSION "0.9.6" CACHE STRING "Roaring library version")
 set(ROARING_LIB_SOVERSION "7" CACHE STRING "Roaring library soversion")
 
 option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON)
diff --git a/doxygen b/doxygen
index 3511661ec..c7a68db30 100644
--- a/doxygen
+++ b/doxygen
@@ -48,7 +48,7 @@ PROJECT_NAME           = "CRoaring"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "0.9.5"
+PROJECT_NUMBER         = "0.9.6"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/include/roaring/roaring_version.h b/include/roaring/roaring_version.h
index 52d70abae..2b0daa06a 100644
--- a/include/roaring/roaring_version.h
+++ b/include/roaring/roaring_version.h
@@ -1,10 +1,10 @@
 // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand 
 #ifndef ROARING_INCLUDE_ROARING_VERSION 
 #define ROARING_INCLUDE_ROARING_VERSION 
-#define ROARING_VERSION "0.9.5"
+#define ROARING_VERSION "0.9.6"
 enum { 
     ROARING_VERSION_MAJOR = 0,
     ROARING_VERSION_MINOR = 9,
-    ROARING_VERSION_REVISION = 5
+    ROARING_VERSION_REVISION = 6
 }; 
 #endif // ROARING_INCLUDE_ROARING_VERSION 

From 88d837dc86d82fe77222d6b59ce4bc1d42962e75 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <lemire@gmail.com>
Date: Wed, 15 Feb 2023 09:52:42 -0500
Subject: [PATCH 079/162] Using a C++ header for C include.

---
 cpp/roaring64map.hh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/roaring64map.hh b/cpp/roaring64map.hh
index 1b0b97a53..bdc8ba776 100644
--- a/cpp/roaring64map.hh
+++ b/cpp/roaring64map.hh
@@ -9,8 +9,8 @@
 #ifndef INCLUDE_ROARING_64_MAP_HH_
 #define INCLUDE_ROARING_64_MAP_HH_
 
-#include <inttypes.h>
 #include <algorithm>
+#include <cinttypes> // PRIu64 macro
 #include <cstdarg>  // for va_list handling in bitmapOf()
 #include <cstdio>  // for std::printf() in the printf() method
 #include <cstring>  // for std::memcpy()

From 59c59dceac26d1cae3c305fe9fdf640b25838ba5 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Thu, 16 Feb 2023 08:57:56 -0500
Subject: [PATCH 080/162] Patch release

---
 CMakeLists.txt                    | 4 ++--
 doxygen                           | 2 +-
 include/roaring/roaring_version.h | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a8190197d..74303c3cd 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,8 +18,8 @@ endif()
 set(ROARING_LIB_NAME roaring)
 set(PROJECT_VERSION_MAJOR 0)
 set(PROJECT_VERSION_MINOR 9)
-set(PROJECT_VERSION_PATCH 6)
-set(ROARING_LIB_VERSION "0.9.6" CACHE STRING "Roaring library version")
+set(PROJECT_VERSION_PATCH 7)
+set(ROARING_LIB_VERSION "0.9.7" CACHE STRING "Roaring library version")
 set(ROARING_LIB_SOVERSION "7" CACHE STRING "Roaring library soversion")
 
 option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON)
diff --git a/doxygen b/doxygen
index c7a68db30..39a479a9f 100644
--- a/doxygen
+++ b/doxygen
@@ -48,7 +48,7 @@ PROJECT_NAME           = "CRoaring"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "0.9.6"
+PROJECT_NUMBER         = "0.9.7"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/include/roaring/roaring_version.h b/include/roaring/roaring_version.h
index 2b0daa06a..13778bae2 100644
--- a/include/roaring/roaring_version.h
+++ b/include/roaring/roaring_version.h
@@ -1,10 +1,10 @@
 // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand 
 #ifndef ROARING_INCLUDE_ROARING_VERSION 
 #define ROARING_INCLUDE_ROARING_VERSION 
-#define ROARING_VERSION "0.9.6"
+#define ROARING_VERSION "0.9.7"
 enum { 
     ROARING_VERSION_MAJOR = 0,
     ROARING_VERSION_MINOR = 9,
-    ROARING_VERSION_REVISION = 6
+    ROARING_VERSION_REVISION = 7
 }; 
 #endif // ROARING_INCLUDE_ROARING_VERSION 

From d1cb7cd29bc33580922abd3c4a63a90bac18a12d Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Thu, 16 Feb 2023 12:15:28 -0500
Subject: [PATCH 081/162] Verifying and fixing issue 440. (#441)

---
 src/roaring.c         |  3 +--
 tests/toplevel_unit.c | 11 +++++++++++
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/roaring.c b/src/roaring.c
index 3d3d797f8..d75fb5cb7 100644
--- a/src/roaring.c
+++ b/src/roaring.c
@@ -2879,8 +2879,7 @@ bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_sta
     }
     int32_t is = ra_get_index(&r->high_low_container, hb_rs);
     int32_t ie = ra_get_index(&r->high_low_container, hb_re);
-    ie = (ie < 0 ? -ie - 1 : ie);
-    if ((is < 0) || ((ie - is) != span) || ie >= hlc_sz) {
+    if ((ie < 0) || (is < 0) || ((ie - is) != span) || ie >= hlc_sz) {
        return false;
     }
     const uint32_t lb_rs = range_start & 0xFFFF;
diff --git a/tests/toplevel_unit.c b/tests/toplevel_unit.c
index c1765a9a4..aab8ae5df 100644
--- a/tests/toplevel_unit.c
+++ b/tests/toplevel_unit.c
@@ -108,6 +108,16 @@ DEFINE_TEST(issue436) {
   roaring_free(data);
 }
 
+DEFINE_TEST(issue440) {
+  roaring_bitmap_t *b1 = roaring_bitmap_create();
+  roaring_bitmap_add_range_closed(b1, 0x20000, 0x2FFFF);
+  roaring_bitmap_add_range_closed(b1, 0, 0xFFFF);
+  uint32_t largest_item = 0x11000;
+  assert_false(roaring_bitmap_contains_range(b1, 0, largest_item + 1));
+  assert_false(roaring_bitmap_contains(b1, largest_item));
+  roaring_bitmap_free(b1);
+}
+
 DEFINE_TEST(range_contains) {
     uint32_t end = 2073952257;
     uint32_t start = end-2;
@@ -4405,6 +4415,7 @@ int main() {
     tellmeall();
 
     const struct CMUnitTest tests[] = {
+        cmocka_unit_test(issue440),
         cmocka_unit_test(issue436),
         cmocka_unit_test(issue433),
         cmocka_unit_test(issue429),

From b3c1dd736e944c79c24be1e09e4065a1791924e9 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Thu, 16 Feb 2023 12:16:07 -0500
Subject: [PATCH 082/162] Issue a patch release.

---
 CMakeLists.txt                    | 4 ++--
 doxygen                           | 2 +-
 include/roaring/roaring_version.h | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 74303c3cd..241fec60c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,8 +18,8 @@ endif()
 set(ROARING_LIB_NAME roaring)
 set(PROJECT_VERSION_MAJOR 0)
 set(PROJECT_VERSION_MINOR 9)
-set(PROJECT_VERSION_PATCH 7)
-set(ROARING_LIB_VERSION "0.9.7" CACHE STRING "Roaring library version")
+set(PROJECT_VERSION_PATCH 8)
+set(ROARING_LIB_VERSION "0.9.8" CACHE STRING "Roaring library version")
 set(ROARING_LIB_SOVERSION "7" CACHE STRING "Roaring library soversion")
 
 option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON)
diff --git a/doxygen b/doxygen
index 39a479a9f..b3efb5e93 100644
--- a/doxygen
+++ b/doxygen
@@ -48,7 +48,7 @@ PROJECT_NAME           = "CRoaring"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "0.9.7"
+PROJECT_NUMBER         = "0.9.8"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/include/roaring/roaring_version.h b/include/roaring/roaring_version.h
index 13778bae2..1f337016f 100644
--- a/include/roaring/roaring_version.h
+++ b/include/roaring/roaring_version.h
@@ -1,10 +1,10 @@
 // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand 
 #ifndef ROARING_INCLUDE_ROARING_VERSION 
 #define ROARING_INCLUDE_ROARING_VERSION 
-#define ROARING_VERSION "0.9.7"
+#define ROARING_VERSION "0.9.8"
 enum { 
     ROARING_VERSION_MAJOR = 0,
     ROARING_VERSION_MINOR = 9,
-    ROARING_VERSION_REVISION = 7
+    ROARING_VERSION_REVISION = 8
 }; 
 #endif // ROARING_INCLUDE_ROARING_VERSION 

From 11b1ddb3c4e131c1b4d3e42d320cd4206968a0a8 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Mon, 27 Feb 2023 16:00:02 -0500
Subject: [PATCH 083/162] Added sec. file

---
 SECURITY.md | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100644 SECURITY.md

diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644
index 000000000..1d9c45c86
--- /dev/null
+++ b/SECURITY.md
@@ -0,0 +1,9 @@
+# Security Policy
+
+## Reporting a Vulnerability
+
+Please use the following contact information for reporting a vulnerability:
+
+- [Daniel Lemire]( https://www.teluq.ca/siteweb/univ/en/dlemire.html) - daniel@lemire.me
+
+

From 18bcee8a5b1ca65c448b0f80a4bcb52f58712fac Mon Sep 17 00:00:00 2001
From: Daniel Lemire <lemire@gmail.com>
Date: Tue, 28 Feb 2023 18:29:22 -0500
Subject: [PATCH 084/162] Patch release (better guard for add_range)

---
 CMakeLists.txt                    | 4 ++--
 doxygen                           | 2 +-
 include/roaring/roaring.h         | 4 ++--
 include/roaring/roaring_version.h | 4 ++--
 tests/cpp_unit.cpp                | 8 ++++++++
 5 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 241fec60c..fba717e4b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,8 +18,8 @@ endif()
 set(ROARING_LIB_NAME roaring)
 set(PROJECT_VERSION_MAJOR 0)
 set(PROJECT_VERSION_MINOR 9)
-set(PROJECT_VERSION_PATCH 8)
-set(ROARING_LIB_VERSION "0.9.8" CACHE STRING "Roaring library version")
+set(PROJECT_VERSION_PATCH 9)
+set(ROARING_LIB_VERSION "0.9.9" CACHE STRING "Roaring library version")
 set(ROARING_LIB_SOVERSION "7" CACHE STRING "Roaring library soversion")
 
 option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON)
diff --git a/doxygen b/doxygen
index b3efb5e93..c2d59286f 100644
--- a/doxygen
+++ b/doxygen
@@ -48,7 +48,7 @@ PROJECT_NAME           = "CRoaring"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "0.9.8"
+PROJECT_NUMBER         = "0.9.9"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/include/roaring/roaring.h b/include/roaring/roaring.h
index 9919e792f..9fad8ec5a 100644
--- a/include/roaring/roaring.h
+++ b/include/roaring/roaring.h
@@ -333,7 +333,7 @@ void roaring_bitmap_add_range_closed(roaring_bitmap_t *r,
  */
 static inline void roaring_bitmap_add_range(roaring_bitmap_t *r,
                                             uint64_t min, uint64_t max) {
-    if(max == min) return;
+    if(max <= min) return;
     roaring_bitmap_add_range_closed(r, (uint32_t)min, (uint32_t)(max - 1));
 }
 
@@ -353,7 +353,7 @@ void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r,
  */
 static inline void roaring_bitmap_remove_range(roaring_bitmap_t *r,
                                                uint64_t min, uint64_t max) {
-    if(max == min) return;
+    if(max <= min) return;
     roaring_bitmap_remove_range_closed(r, (uint32_t)min, (uint32_t)(max - 1));
 }
 
diff --git a/include/roaring/roaring_version.h b/include/roaring/roaring_version.h
index 1f337016f..25a5632d4 100644
--- a/include/roaring/roaring_version.h
+++ b/include/roaring/roaring_version.h
@@ -1,10 +1,10 @@
 // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand 
 #ifndef ROARING_INCLUDE_ROARING_VERSION 
 #define ROARING_INCLUDE_ROARING_VERSION 
-#define ROARING_VERSION "0.9.8"
+#define ROARING_VERSION "0.9.9"
 enum { 
     ROARING_VERSION_MAJOR = 0,
     ROARING_VERSION_MINOR = 9,
-    ROARING_VERSION_REVISION = 8
+    ROARING_VERSION_REVISION = 9
 }; 
 #endif // ROARING_INCLUDE_ROARING_VERSION 
diff --git a/tests/cpp_unit.cpp b/tests/cpp_unit.cpp
index b1b50ac58..b5abbe44d 100644
--- a/tests/cpp_unit.cpp
+++ b/tests/cpp_unit.cpp
@@ -52,6 +52,13 @@ bool roaring_iterator_sumall64(uint64_t value, void *param) {
     return true;  // we always process all values
 }
 
+
+DEFINE_TEST(fuzz_001) {
+    roaring::Roaring b;
+    b.addRange(173, 0);
+    assert_true(b.cardinality() == 0);
+}
+
 DEFINE_TEST(serial_test) {
     uint32_t values[] = {5, 2, 3, 4, 1};
     Roaring r1(sizeof(values) / sizeof(uint32_t), values);
@@ -1944,6 +1951,7 @@ DEFINE_TEST(test_cpp_contains_range_interleaved_containers) {
 int main() {
     roaring::misc::tellmeall();
     const struct CMUnitTest tests[] = {
+        cmocka_unit_test(fuzz_001),
         cmocka_unit_test(test_bitmap_of_32),
         cmocka_unit_test(test_bitmap_of_64),
         cmocka_unit_test(serial_test),

From 95de500c8dcbf2ba01c2677fe9097ce7db2f4f70 Mon Sep 17 00:00:00 2001
From: Nathaniel Brough <nathaniel.brough@gmail.com>
Date: Sat, 4 Mar 2023 14:29:23 -0800
Subject: [PATCH 085/162] fuzz: Moves fuzz harnesses from oss-fuzz to CRoaring
 (#442)

This change will make future changes/additions to fuzz harnesses
easier, without needing approvals from the OSS-fuzz team.
---
 fuzz/build.sh                |  38 +++++++++
 fuzz/croaring_fuzzer.c       |  51 ++++++++++++
 fuzz/croaring_fuzzer.options |   2 +
 fuzz/croaring_fuzzer_cc.cc   | 155 +++++++++++++++++++++++++++++++++++
 4 files changed, 246 insertions(+)
 create mode 100755 fuzz/build.sh
 create mode 100644 fuzz/croaring_fuzzer.c
 create mode 100644 fuzz/croaring_fuzzer.options
 create mode 100644 fuzz/croaring_fuzzer_cc.cc

diff --git a/fuzz/build.sh b/fuzz/build.sh
new file mode 100755
index 000000000..21644a6e5
--- /dev/null
+++ b/fuzz/build.sh
@@ -0,0 +1,38 @@
+#!/bin/bash -eu
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+################################################################################
+
+mkdir build-dir && cd build-dir
+cmake -DENABLE_ROARING_TESTS=OFF ..
+make -j$(nproc)
+
+$CC $CFLAGS  \
+     -I$SRC/croaring/include \
+     -c $SRC/croaring_fuzzer.c -o fuzzer.o
+
+$CXX $CXXFLAGS $LIB_FUZZING_ENGINE fuzzer.o   \
+     -o $OUT/croaring_fuzzer $SRC/croaring/build-dir/src/libroaring.a
+
+$CXX $CFLAGS $CXXFLAGS  \
+     -I$SRC/croaring/include \
+     -I$SRC/croaring \
+     -c $SRC/croaring_fuzzer_cc.cc -o fuzzer_cc.o
+
+$CXX $CXXFLAGS $LIB_FUZZING_ENGINE fuzzer_cc.o   \
+     -o $OUT/croaring_fuzzer_cc $SRC/croaring/build-dir/src/libroaring.a
+
+zip $OUT/croaring_fuzzer_seed_corpus.zip $SRC/croaring/tests/testdata/*bin
+cp $SRC/croaring/tests/testdata/*bin $OUT/
diff --git a/fuzz/croaring_fuzzer.c b/fuzz/croaring_fuzzer.c
new file mode 100644
index 000000000..00d2cf44c
--- /dev/null
+++ b/fuzz/croaring_fuzzer.c
@@ -0,0 +1,51 @@
+// Copyright 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+#include "roaring/roaring.h"
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size){
+    roaring_statistics_t stats;
+    bool answer = true;
+    roaring_bitmap_t* bitmap = roaring_bitmap_portable_deserialize_safe(data, size);
+    if(bitmap) {
+        /*
+        uint64_t card1 = roaring_bitmap_get_cardinality(bitmap);
+        roaring_bitmap_statistics(bitmap, &stats);
+        unsigned universe_size = stats.max_value + 1;
+        roaring_bitmap_t *inverted = roaring_bitmap_flip(bitmap, 0U, universe_size);
+        if(inverted) {
+            roaring_bitmap_t *double_inverted = roaring_bitmap_flip(inverted, 0U, universe_size);
+            if(double_inverted)
+            {
+                answer = (roaring_bitmap_get_cardinality(inverted) + roaring_bitmap_get_cardinality(bitmap) == universe_size);
+                if (answer) answer = roaring_bitmap_equals(bitmap, double_inverted);
+                if (!answer) {
+                    printf("Bad flip\n\nbitmap1:\n");
+                    roaring_bitmap_printf_describe(bitmap);  // debug
+                    printf("\n\nflipped:\n");
+                    roaring_bitmap_printf_describe(inverted);  // debug
+                }
+                roaring_bitmap_free(double_inverted);
+            }
+            roaring_bitmap_free(inverted);
+        }*/
+        roaring_bitmap_free(bitmap);
+    }
+    return 0;
+}
diff --git a/fuzz/croaring_fuzzer.options b/fuzz/croaring_fuzzer.options
new file mode 100644
index 000000000..7ca5e76f5
--- /dev/null
+++ b/fuzz/croaring_fuzzer.options
@@ -0,0 +1,2 @@
+[libfuzzer]
+close_fd_mask = 2
diff --git a/fuzz/croaring_fuzzer_cc.cc b/fuzz/croaring_fuzzer_cc.cc
new file mode 100644
index 000000000..e54b0bd3c
--- /dev/null
+++ b/fuzz/croaring_fuzzer_cc.cc
@@ -0,0 +1,155 @@
+// Copyright 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#include "cpp/roaring.hh"
+#include "fuzzer/FuzzedDataProvider.h"
+#include <vector>
+
+std::vector<uint32_t> ConsumeVecInRange(FuzzedDataProvider &fdp, size_t length,
+                                        uint32_t min_value,
+                                        uint32_t max_value) {
+  std::vector<uint32_t> result = {0};
+  result.resize(length);
+  std::generate(result.begin(), result.end(), [&]() {
+    return fdp.ConsumeIntegralInRange<uint32_t>(min_value, max_value);
+  });
+  return result;
+}
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+  /**
+   * A bitmap may contain up to 2**32 elements. Later this function will
+   * output the content to an array where each element uses 32 bits of storage.
+   * That would use 16 GB. Thus this function is bound to run out of memory.
+   *
+   * Even without the full serialization to a 32-bit array, a bitmap may still use over
+   * 512 MB in the normal course of operation: that is to be expected since it can
+   * represent all sets of integers in [0,2**32]. This function may hold several
+   * bitmaps in memory at once, so it can require gigabytes of memory (without bugs).
+   * Hence, unless it has a generous memory capacity, this function will run out of memory
+   * almost certainly.
+   *
+   * For sanity, we may limit the range to, say, 10,000,000 which will use 38 MB or so.
+   * With such a limited range, if we run out of memory, then we can almost certain that it
+   * has to do with a genuine bug.
+   */
+
+  uint32_t range_start = 0;
+  uint32_t range_end = 10'000'000;
+
+  /**
+   * We are not solely dependent on the range [range_start, range_end) because
+   * ConsumeVecInRange below produce integers in a small range starting at 0.
+   */
+
+  FuzzedDataProvider fdp(data, size);
+  /**
+   * The next line was ConsumeVecInRange(fdp, 500, 0, 1000) but it would pick 500
+   * values at random from 0, 1000, making almost certain that all of the values are
+   * picked. It seems more useful to pick 500 values in the range 0,1000.
+   */
+  std::vector<uint32_t> bitmap_data_a = ConsumeVecInRange(fdp, 500, 0, 1000);
+  roaring::Roaring a(bitmap_data_a.size(), bitmap_data_a.data());
+  a.runOptimize();
+  a.shrinkToFit();
+
+  std::vector<uint32_t> bitmap_data_b = ConsumeVecInRange(fdp, 500, 0, 1000);
+  roaring::Roaring b(bitmap_data_b.size(), bitmap_data_b.data());
+  b.runOptimize();
+  b.add(fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end));
+  b.addChecked(fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end));
+  b.addRange(fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end), fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end));
+  // add half of a to b.
+  b.addMany(bitmap_data_a.size() / 2, bitmap_data_a.data());
+  b.remove(fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end));
+  b.removeChecked(fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end));
+  b.removeRange(fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end),
+                fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end));
+  b.removeRangeClosed(fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end),
+                      fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end));
+  b.maximum();
+  b.minimum();
+  b.contains(fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end));
+  b.containsRange(fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end),
+                  fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end));
+
+  uint32_t element = 0;
+  a.select(fdp.ConsumeIntegralInRange<uint32_t>(0, 1000), &element);
+  a.intersect(b);
+  a.jaccard_index(b);
+  a.or_cardinality(b);
+  a.andnot_cardinality(b);
+  a.xor_cardinality(b);
+  a.rank(fdp.ConsumeIntegralInRange<uint32_t>(0, 5000));
+  a.getSizeInBytes();
+
+  roaring::Roaring c = a & b;
+  roaring::Roaring d = a - b;
+  roaring::Roaring e = a | b;
+  roaring::Roaring f = a ^ b;
+  a |= e;
+  a &= b;
+  a -= c;
+  a ^= f;
+
+  volatile bool is_equal = (a == b);
+
+  std::vector<uint32_t> b_as_array = {0};
+  b_as_array.resize(b.cardinality());
+  b.isEmpty();
+  b.toUint32Array(b_as_array.data());
+
+  a.isSubset(b);
+  a.isStrictSubset(b);
+  b.flip(fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end), fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end));
+  b.flipClosed(fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end),
+               fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end));
+  b.removeRunCompression();
+
+  // Move/copy constructors
+  roaring::Roaring copied = b;
+  roaring::Roaring moved = std::move(b);
+
+  // Asignment operators
+  b = copied;
+  b = std::move(moved);
+
+  // Safe read from serialized
+  std::vector<char> read_buffer = fdp.ConsumeBytes<char>(100);
+  std::vector<char> write_buffer = {0};
+  try {
+    roaring::Roaring read_safely =
+        roaring::Roaring::readSafe(read_buffer.data(), read_buffer.size());
+    write_buffer.resize(read_safely.getSizeInBytes());
+    read_safely.write(write_buffer.data(), fdp.ConsumeBool());
+    assert(write_buffer == read_buffer);
+  } catch (const std::runtime_error &) {
+    // Do nothing.
+  }
+
+  f.toString();
+
+  volatile int unused = 0;
+
+  for (roaring::Roaring::const_iterator i = a.begin(); i != a.end(); i++) {
+    unused++;
+  }
+
+  roaring::Roaring::const_iterator b_iter = b.begin();
+  b_iter.equalorlarger(fdp.ConsumeIntegralInRange<uint32_t>(range_start, range_end));
+
+  return 0;
+}

From acad98420f5d8fd8177d9b36a0da408411a01b63 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <lemire@gmail.com>
Date: Mon, 6 Mar 2023 21:53:17 -0500
Subject: [PATCH 086/162] Cleaning.

---
 README.md                  | 13 ++++++++++++-
 cpp/roaring.hh             | 13 ++++++++++++-
 fuzz/croaring_fuzzer.c     | 26 +++-----------------------
 fuzz/croaring_fuzzer_cc.cc | 19 ++++++++++++-------
 include/roaring/roaring.h  |  9 +++++++++
 tests/cpp_example2.cpp     |  6 +++++-
 6 files changed, 53 insertions(+), 33 deletions(-)

diff --git a/README.md b/README.md
index f24f88262..551d86a33 100644
--- a/README.md
+++ b/README.md
@@ -287,6 +287,11 @@ int main() {
     uint32_t expectedsize = roaring_bitmap_portable_size_in_bytes(r1);
     char *serializedbytes = malloc(expectedsize);
     roaring_bitmap_portable_serialize(r1, serializedbytes);
+    // For additional safety, you may replace roaring_bitmap_portable_deserialize by
+    // roaring_bitmap_portable_deserialize_safe.
+    // Note: it is expected that the input follows the specification
+    // https://github.com/RoaringBitmap/RoaringFormatSpec
+    // otherwise the result may be unusable.
     roaring_bitmap_t *t = roaring_bitmap_portable_deserialize(serializedbytes);
     assert(roaring_bitmap_equals(r1, t));  // what we recover is equal
     roaring_bitmap_free(t);
@@ -298,6 +303,9 @@ int main() {
            expectedsize);  // sizeofbitmap would be zero if no bitmap were found
     // we can also read the bitmap "safely" by specifying a byte size limit:
     t = roaring_bitmap_portable_deserialize_safe(serializedbytes, expectedsize);
+    // It is still necessary for the content of seriallizedbytes to follow
+    // the standard: https://github.com/RoaringBitmap/RoaringFormatSpec
+    // This is guaranted when calling 'roaring_bitmap_portable_deserialize'.
     assert(roaring_bitmap_equals(r1, t));  // what we recover is equal
     roaring_bitmap_free(t);
 
@@ -422,7 +430,10 @@ int main() {
     uint32_t expectedsize = r1.getSizeInBytes();
     char *serializedbytes = new char[expectedsize];
     r1.write(serializedbytes);
-    Roaring t = Roaring::read(serializedbytes);
+    // readSafe will not overflow, but the resulting bitmap
+    // is only valid and usable if the input follows the
+    // Roaring specification: https://github.com/RoaringBitmap/RoaringFormatSpec/
+    Roaring t = Roaring::readSafe(serializedbytes);
     assert(r1 == t);
     delete[] serializedbytes;
 
diff --git a/cpp/roaring.hh b/cpp/roaring.hh
index d48ebdab8..2e049955c 100644
--- a/cpp/roaring.hh
+++ b/cpp/roaring.hh
@@ -567,7 +567,18 @@ public:
     /**
      * Read a bitmap from a serialized version, reading no more than maxbytes
      * bytes.  This is meant to be compatible with the Java and Go versions.
-     *
+     * The function itself is safe in the sense that it will not cause buffer overflows.
+     * However, for correct operations, it is assumed that the bitmap read was once
+     * serialized from a valid bitmap. If you provided an incorrect input (garbage), then the
+     * bitmap read may not be in a valid state and following operations may not lead
+     * to sensible results. It is your responsability to ensure that the input bytes
+     * follow the format specification if you want a usable bitmap:
+     * https://github.com/RoaringBitmap/RoaringFormatSpec
+     * In particular, the serialized array containers need to be in sorted order, and the
+     * run containers should be in sorted non-overlapping order. This is is guaranteed to
+     * happen when serializing an existing bitmap, but not for random inputs.
+     * Note that this function assumes that your bitmap was serialized in *portable* mode
+     * (which is the default with the 'write' method).
      */
     static Roaring readSafe(const char *buf, size_t maxbytes) {
         roaring_bitmap_t * r =
diff --git a/fuzz/croaring_fuzzer.c b/fuzz/croaring_fuzzer.c
index 00d2cf44c..c97f41187 100644
--- a/fuzz/croaring_fuzzer.c
+++ b/fuzz/croaring_fuzzer.c
@@ -20,31 +20,11 @@
 #include "roaring/roaring.h"
 
 int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size){
-    roaring_statistics_t stats;
-    bool answer = true;
+int LLVMFuzzerTestOneInput(const char *data, size_t size) {
+    // We test that deserialization never fails.
     roaring_bitmap_t* bitmap = roaring_bitmap_portable_deserialize_safe(data, size);
     if(bitmap) {
-        /*
-        uint64_t card1 = roaring_bitmap_get_cardinality(bitmap);
-        roaring_bitmap_statistics(bitmap, &stats);
-        unsigned universe_size = stats.max_value + 1;
-        roaring_bitmap_t *inverted = roaring_bitmap_flip(bitmap, 0U, universe_size);
-        if(inverted) {
-            roaring_bitmap_t *double_inverted = roaring_bitmap_flip(inverted, 0U, universe_size);
-            if(double_inverted)
-            {
-                answer = (roaring_bitmap_get_cardinality(inverted) + roaring_bitmap_get_cardinality(bitmap) == universe_size);
-                if (answer) answer = roaring_bitmap_equals(bitmap, double_inverted);
-                if (!answer) {
-                    printf("Bad flip\n\nbitmap1:\n");
-                    roaring_bitmap_printf_describe(bitmap);  // debug
-                    printf("\n\nflipped:\n");
-                    roaring_bitmap_printf_describe(inverted);  // debug
-                }
-                roaring_bitmap_free(double_inverted);
-            }
-            roaring_bitmap_free(inverted);
-        }*/
+        // The bitmap may not be usable if it does not follow the specification.
         roaring_bitmap_free(bitmap);
     }
     return 0;
diff --git a/fuzz/croaring_fuzzer_cc.cc b/fuzz/croaring_fuzzer_cc.cc
index e54b0bd3c..1c1e39e53 100644
--- a/fuzz/croaring_fuzzer_cc.cc
+++ b/fuzz/croaring_fuzzer_cc.cc
@@ -129,17 +129,22 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
 
   // Safe read from serialized
   std::vector<char> read_buffer = fdp.ConsumeBytes<char>(100);
-  std::vector<char> write_buffer = {0};
-  try {
+  {
     roaring::Roaring read_safely =
         roaring::Roaring::readSafe(read_buffer.data(), read_buffer.size());
-    write_buffer.resize(read_safely.getSizeInBytes());
-    read_safely.write(write_buffer.data(), fdp.ConsumeBool());
-    assert(write_buffer == read_buffer);
-  } catch (const std::runtime_error &) {
-    // Do nothing.
+    // The above is guaranteed to be safe. However, read_safely is maybe
+    // in an improper state and it cannot be used safely (including for
+    // reserialization).
   }
 
+  // The bitmap b can be serialized and re-read.
+  std::size_t expected_size_in_bytes = b.getSizeInBytes();
+  std::vector<char> buffer(expected_size_in_bytes);
+  std::size_t size_in_bytes = b.write(buffer.data());
+  assert(expected_size_in_bytes == size_in_bytes);
+  roaring::Roaring bread = roaring::Roaring::readSafe(buffer.data(), size_in_bytes);
+  assert(bread == b);
+
   f.toString();
 
   volatile int unused = 0;
diff --git a/include/roaring/roaring.h b/include/roaring/roaring.h
index 9fad8ec5a..04291c4c0 100644
--- a/include/roaring/roaring.h
+++ b/include/roaring/roaring.h
@@ -525,6 +525,15 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf);
  * This is meant to be compatible with the Java and Go versions:
  * https://github.com/RoaringBitmap/RoaringFormatSpec
  *
+ * The function itself is safe in the sense that it will not cause buffer overflows.
+ * However, for correct operations, it is assumed that the bitmap read was once
+ * serialized from a valid bitmap (i.e., it follows the format specification).
+ * If you provided an incorrect input (garbage), then the bitmap read may not be in
+ * a valid state and following operations may not lead to sensible results.
+ * In particular, the serialized array containers need to be in sorted order, and the
+ * run containers should be in sorted non-overlapping order. This is is guaranteed to
+ * happen when serializing an existing bitmap, but not for random inputs.
+ *
  * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
  * the data format is going to be big-endian and not compatible with little-endian systems.
  */
diff --git a/tests/cpp_example2.cpp b/tests/cpp_example2.cpp
index 8e2b3cff6..9c6ef3927 100644
--- a/tests/cpp_example2.cpp
+++ b/tests/cpp_example2.cpp
@@ -73,7 +73,10 @@ int main() {
     uint32_t expectedsize = r1.getSizeInBytes();
     char *serializedbytes = new char[expectedsize];
     r1.write(serializedbytes);
-    Roaring t = Roaring::read(serializedbytes);
+    // readSafe will not overflow, but the resulting bitmap
+    // is only valid and usable if the input follows the
+    // Roaring specification: https://github.com/RoaringBitmap/RoaringFormatSpec/
+    Roaring t = Roaring::readSafe(serializedbytes);
     assert_true(r1 == t);
     delete[] serializedbytes;
 
@@ -98,5 +101,6 @@ int main() {
     Roaring rogue(5, manyvalues);
     Roaring::const_iterator j = rogue.begin();
     j.equalorlarger(4);  // *j == 4
+
     return EXIT_SUCCESS;
 }

From d51cedb8f85482afb97da233afebdecd4cdf3918 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Tue, 7 Mar 2023 13:52:44 -0500
Subject: [PATCH 087/162] Fixed cppexample2 typo (#443)

---
 README.md              | 2 +-
 tests/cpp_example2.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 551d86a33..b214a19d6 100644
--- a/README.md
+++ b/README.md
@@ -433,7 +433,7 @@ int main() {
     // readSafe will not overflow, but the resulting bitmap
     // is only valid and usable if the input follows the
     // Roaring specification: https://github.com/RoaringBitmap/RoaringFormatSpec/
-    Roaring t = Roaring::readSafe(serializedbytes);
+    Roaring t = Roaring::readSafe(serializedbytes, expectedsize);
     assert(r1 == t);
     delete[] serializedbytes;
 
diff --git a/tests/cpp_example2.cpp b/tests/cpp_example2.cpp
index 9c6ef3927..cab19249a 100644
--- a/tests/cpp_example2.cpp
+++ b/tests/cpp_example2.cpp
@@ -76,7 +76,7 @@ int main() {
     // readSafe will not overflow, but the resulting bitmap
     // is only valid and usable if the input follows the
     // Roaring specification: https://github.com/RoaringBitmap/RoaringFormatSpec/
-    Roaring t = Roaring::readSafe(serializedbytes);
+    Roaring t = Roaring::readSafe(serializedbytes, expectedsize);
     assert_true(r1 == t);
     delete[] serializedbytes;
 

From 2c6708ec84cb8d574c58463c0937e0a922e31f26 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Thu, 9 Mar 2023 22:56:38 -0500
Subject: [PATCH 088/162] Typo

---
 fuzz/croaring_fuzzer.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fuzz/croaring_fuzzer.c b/fuzz/croaring_fuzzer.c
index c97f41187..27f7aa3b5 100644
--- a/fuzz/croaring_fuzzer.c
+++ b/fuzz/croaring_fuzzer.c
@@ -19,7 +19,6 @@
 #include <stdlib.h>
 #include "roaring/roaring.h"
 
-int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size){
 int LLVMFuzzerTestOneInput(const char *data, size_t size) {
     // We test that deserialization never fails.
     roaring_bitmap_t* bitmap = roaring_bitmap_portable_deserialize_safe(data, size);

From 84fe3c8b0b58111d9daff8256b8399b5d5957968 Mon Sep 17 00:00:00 2001
From: Chen Tianjie <TJ_Chen@outlook.com>
Date: Tue, 14 Mar 2023 21:35:57 +0800
Subject: [PATCH 089/162] Add intersect_vector16_inplace. (#445)

---
 include/roaring/array_util.h |  3 ++
 src/array_util.c             | 92 ++++++++++++++++++++++++++++++++++++
 src/containers/array.c       | 13 ++++-
 3 files changed, 106 insertions(+), 2 deletions(-)

diff --git a/include/roaring/array_util.h b/include/roaring/array_util.h
index e0be2f70e..46e57b715 100644
--- a/include/roaring/array_util.h
+++ b/include/roaring/array_util.h
@@ -125,6 +125,9 @@ int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a,
                            const uint16_t *__restrict__ B, size_t s_b,
                            uint16_t *C);
 
+int32_t intersect_vector16_inplace(uint16_t *__restrict__ A, size_t s_a,
+                           const uint16_t *__restrict__ B, size_t s_b);
+
 /**
  * Compute the cardinality of the intersection using SSE4 instructions
  */
diff --git a/src/array_util.c b/src/array_util.c
index 1daed41b6..963c32490 100644
--- a/src/array_util.c
+++ b/src/array_util.c
@@ -444,6 +444,98 @@ int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a,
     }
     return (int32_t)count;
 }
+
+int32_t intersect_vector16_inplace(uint16_t *__restrict__ A, size_t s_a,
+                           const uint16_t *__restrict__ B, size_t s_b) {
+    size_t count = 0;
+    size_t i_a = 0, i_b = 0;
+    const int vectorlength = sizeof(__m128i) / sizeof(uint16_t);
+    const size_t st_a = (s_a / vectorlength) * vectorlength;
+    const size_t st_b = (s_b / vectorlength) * vectorlength;
+    __m128i v_a, v_b;
+    if ((i_a < st_a) && (i_b < st_b)) {
+        v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
+        v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
+        __m128i tmp[2] = {_mm_setzero_si128()};
+        size_t tmp_count = 0;
+        while ((A[i_a] == 0) || (B[i_b] == 0)) {
+            const __m128i res_v = _mm_cmpestrm(
+                v_b, vectorlength, v_a, vectorlength,
+                _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
+            const int r = _mm_extract_epi32(res_v, 0);
+            __m128i sm16 = _mm_loadu_si128((const __m128i *)shuffle_mask16 + r);
+            __m128i p = _mm_shuffle_epi8(v_a, sm16);
+            _mm_storeu_si128((__m128i*)&((uint16_t*)tmp)[tmp_count], p);
+            tmp_count += _mm_popcnt_u32(r);
+            const uint16_t a_max = A[i_a + vectorlength - 1];
+            const uint16_t b_max = B[i_b + vectorlength - 1];
+            if (a_max <= b_max) {
+                _mm_storeu_si128((__m128i *)&A[count], tmp[0]);
+                _mm_storeu_si128(tmp, _mm_setzero_si128());
+                count += tmp_count;
+                tmp_count = 0;           
+                i_a += vectorlength;
+                if (i_a == st_a) break;
+                v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
+            }
+            if (b_max <= a_max) {
+                i_b += vectorlength;
+                if (i_b == st_b) break;
+                v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
+            }
+        }
+        if ((i_a < st_a) && (i_b < st_b)) {
+            while (true) {
+                const __m128i res_v = _mm_cmpistrm(
+                    v_b, v_a,
+                    _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
+                const int r = _mm_extract_epi32(res_v, 0);
+                __m128i sm16 = _mm_loadu_si128((const __m128i *)shuffle_mask16 + r);
+                __m128i p = _mm_shuffle_epi8(v_a, sm16);
+                _mm_storeu_si128((__m128i*)&((uint16_t*)tmp)[tmp_count], p);
+                tmp_count += _mm_popcnt_u32(r);
+                const uint16_t a_max = A[i_a + vectorlength - 1];
+                const uint16_t b_max = B[i_b + vectorlength - 1];
+                if (a_max <= b_max) {
+                    _mm_storeu_si128((__m128i *)&A[count], tmp[0]);
+                    _mm_storeu_si128(tmp, _mm_setzero_si128());
+                    count += tmp_count;
+                    tmp_count = 0;  
+                    i_a += vectorlength;
+                    if (i_a == st_a) break;
+                    v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
+                }
+                if (b_max <= a_max) {
+                    i_b += vectorlength;
+                    if (i_b == st_b) break;
+                    v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
+                }
+            }
+        }
+        // tmp_count <= 8, so this does not affect efficiency so much
+        for (size_t i = 0; i < tmp_count; i++) {
+            A[count] = ((uint16_t*)tmp)[i];
+            count++;
+        }
+        i_a += tmp_count;  // We can at least jump pass $tmp_count elements in A
+    }
+    // intersect the tail using scalar intersection
+    while (i_a < s_a && i_b < s_b) {
+        uint16_t a = A[i_a];
+        uint16_t b = B[i_b];
+        if (a < b) {
+            i_a++;
+        } else if (b < a) {
+            i_b++;
+        } else {
+            A[count] = a;  //==b;
+            count++;
+            i_a++;
+            i_b++;
+        }
+    }
+    return (int32_t)count;
+}
 CROARING_UNTARGET_REGION
 
 CROARING_TARGET_AVX2
diff --git a/src/containers/array.c b/src/containers/array.c
index 312f7c6a1..cb2957265 100644
--- a/src/containers/array.c
+++ b/src/containers/array.c
@@ -361,7 +361,6 @@ bool array_container_intersect(const array_container_t *array1,
  * */
 void array_container_intersection_inplace(array_container_t *src_1,
                                           const array_container_t *src_2) {
-    // todo: can any of this be vectorized?
     int32_t card_1 = src_1->cardinality, card_2 = src_2->cardinality;
     const int threshold = 64;  // subject to tuning
     if (card_1 * threshold < card_2) {
@@ -371,8 +370,18 @@ void array_container_intersection_inplace(array_container_t *src_1,
         src_1->cardinality = intersect_skewed_uint16(
             src_2->array, card_2, src_1->array, card_1, src_1->array);
     } else {
+#ifdef CROARING_IS_X64
+        if (croaring_avx2()) {
+            src_1->cardinality = intersect_vector16_inplace(
+                src_1->array, card_1, src_2->array, card_2);
+        } else {
+            src_1->cardinality = intersect_uint16(
+                src_1->array, card_1, src_2->array, card_2, src_1->array);
+        }
+#else
         src_1->cardinality = intersect_uint16(
-            src_1->array, card_1, src_2->array, card_2, src_1->array);
+                        src_1->array, card_1, src_2->array, card_2, src_1->array);
+#endif       
     }
 }
 

From 2411b3be4ddf5cd982c8acadf56be109ba9d7b2c Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Tue, 14 Mar 2023 21:09:00 -0400
Subject: [PATCH 090/162] This PR improves the C++ documentation and fix an
 error with the fuzzer (#449)

---
 cpp/roaring.hh             | 131 ++++++++++++++++++++++---------------
 fuzz/croaring_fuzzer_cc.cc |   4 +-
 2 files changed, 82 insertions(+), 53 deletions(-)

diff --git a/cpp/roaring.hh b/cpp/roaring.hh
index 2e049955c..cf453ec71 100644
--- a/cpp/roaring.hh
+++ b/cpp/roaring.hh
@@ -70,7 +70,8 @@ public:
     }
 
     /**
-     * Copy constructor
+     * Copy constructor.
+     * It may throw std::runtime_error if there is insufficient memory.
      */
     Roaring(const Roaring &r) : Roaring() {
         if (!api::roaring_bitmap_overwrite(&roaring, &r.roaring)) {
@@ -133,87 +134,87 @@ public:
     /**
      * Add value x
      */
-    void add(uint32_t x) { api::roaring_bitmap_add(&roaring, x); }
+    void add(uint32_t x) noexcept { api::roaring_bitmap_add(&roaring, x); }
 
     /**
      * Add value x
      * Returns true if a new value was added, false if the value was already
      * existing.
      */
-    bool addChecked(uint32_t x) {
+    bool addChecked(uint32_t x) noexcept {
         return api::roaring_bitmap_add_checked(&roaring, x);
     }
 
     /**
      * Add all values in range [min, max)
      */
-    void addRange(const uint64_t min, const uint64_t max)  {
+    void addRange(const uint64_t min, const uint64_t max) noexcept {
         return api::roaring_bitmap_add_range(&roaring, min, max);
     }
 
     /**
      * Add all values in range [min, max]
      */
-    void addRangeClosed(const uint32_t min, const uint32_t max)  {
+    void addRangeClosed(const uint32_t min, const uint32_t max) noexcept {
         return api::roaring_bitmap_add_range_closed(&roaring, min, max);
     }
 
     /**
      * Add value n_args from pointer vals
      */
-    void addMany(size_t n_args, const uint32_t *vals) {
+    void addMany(size_t n_args, const uint32_t *vals) noexcept {
         api::roaring_bitmap_add_many(&roaring, n_args, vals);
     }
 
     /**
      * Remove value x
      */
-    void remove(uint32_t x) { api::roaring_bitmap_remove(&roaring, x); }
+    void remove(uint32_t x) noexcept { api::roaring_bitmap_remove(&roaring, x); }
 
     /**
      * Remove value x
      * Returns true if a new value was removed, false if the value was not
      * existing.
      */
-    bool removeChecked(uint32_t x) {
+    bool removeChecked(uint32_t x) noexcept {
         return api::roaring_bitmap_remove_checked(&roaring, x);
     }
 
     /**
      * Remove all values in range [min, max)
      */
-    void removeRange(uint64_t min, uint64_t max) {
+    void removeRange(uint64_t min, uint64_t max) noexcept {
         return api::roaring_bitmap_remove_range(&roaring, min, max);
     }
 
     /**
      * Remove all values in range [min, max]
      */
-    void removeRangeClosed(uint32_t min, uint32_t max) {
+    void removeRangeClosed(uint32_t min, uint32_t max) noexcept {
         return api::roaring_bitmap_remove_range_closed(&roaring, min, max);
     }
 
     /**
      * Return the largest value (if not empty)
      */
-    uint32_t maximum() const { return api::roaring_bitmap_maximum(&roaring); }
+    uint32_t maximum() const noexcept { return api::roaring_bitmap_maximum(&roaring); }
 
     /**
      * Return the smallest value (if not empty)
      */
-    uint32_t minimum() const { return api::roaring_bitmap_minimum(&roaring); }
+    uint32_t minimum() const noexcept { return api::roaring_bitmap_minimum(&roaring); }
 
     /**
      * Check if value x is present
      */
-    bool contains(uint32_t x) const {
+    bool contains(uint32_t x) const noexcept {
         return api::roaring_bitmap_contains(&roaring, x);
     }
 
     /**
      * Check if all values from x (included) to y (excluded) are present
      */
-    bool containsRange(const uint64_t x, const uint64_t y) const {
+    bool containsRange(const uint64_t x, const uint64_t y) const noexcept {
         return api::roaring_bitmap_contains_range(&roaring, x, y);
     }
 
@@ -241,6 +242,7 @@ public:
     /**
      * Copies the content of the provided bitmap, and
      * discard the current content.
+     * It may throw std::runtime_error if there is insufficient memory.
      */
     Roaring &operator=(const Roaring &r) {
         if (!api::roaring_bitmap_overwrite(&roaring, &r.roaring)) {
@@ -284,7 +286,7 @@ public:
      * Performance hint: if you are computing the intersection between several
      * bitmaps, two-by-two, it is best to start with the smallest bitmap.
      */
-    Roaring &operator&=(const Roaring &r) {
+    Roaring &operator&=(const Roaring &r) noexcept {
         api::roaring_bitmap_and_inplace(&roaring, &r.roaring);
         return *this;
     }
@@ -294,7 +296,7 @@ public:
      * bitmap, writing the result in the current bitmap. The provided bitmap
      * is not modified.
      */
-    Roaring &operator-=(const Roaring &r) {
+    Roaring &operator-=(const Roaring &r) noexcept {
         api::roaring_bitmap_andnot_inplace(&roaring, &r.roaring);
         return *this;
     }
@@ -306,7 +308,7 @@ public:
      *
      * See also the fastunion function to aggregate many bitmaps more quickly.
      */
-    Roaring &operator|=(const Roaring &r) {
+    Roaring &operator|=(const Roaring &r) noexcept {
         api::roaring_bitmap_or_inplace(&roaring, &r.roaring);
         return *this;
     }
@@ -316,7 +318,7 @@ public:
      * bitmap, writing the result in the current bitmap. The provided bitmap
      * is not modified.
      */
-    Roaring &operator^=(const Roaring &r) {
+    Roaring &operator^=(const Roaring &r) noexcept {
         api::roaring_bitmap_xor_inplace(&roaring, &r.roaring);
         return *this;
     }
@@ -324,31 +326,31 @@ public:
     /**
      * Exchange the content of this bitmap with another.
      */
-    void swap(Roaring &r) { std::swap(r.roaring, roaring); }
+    void swap(Roaring &r) noexcept { std::swap(r.roaring, roaring); }
 
     /**
      * Get the cardinality of the bitmap (number of elements).
      */
-    uint64_t cardinality() const {
+    uint64_t cardinality() const noexcept {
         return api::roaring_bitmap_get_cardinality(&roaring);
     }
 
     /**
      * Returns true if the bitmap is empty (cardinality is zero).
      */
-    bool isEmpty() const { return api::roaring_bitmap_is_empty(&roaring); }
+    bool isEmpty() const noexcept { return api::roaring_bitmap_is_empty(&roaring); }
 
     /**
      * Returns true if the bitmap is subset of the other.
      */
-    bool isSubset(const Roaring &r) const {
+    bool isSubset(const Roaring &r) const noexcept {
         return api::roaring_bitmap_is_subset(&roaring, &r.roaring);
     }
 
     /**
      * Returns true if the bitmap is strict subset of the other.
      */
-    bool isStrictSubset(const Roaring &r) const {
+    bool isStrictSubset(const Roaring &r) const noexcept {
         return api::roaring_bitmap_is_strict_subset(&roaring, &r.roaring);
     }
 
@@ -357,20 +359,20 @@ public:
      * responsible to ensure that there is enough memory allocated
      * (e.g., ans = new uint32[mybitmap.cardinality()];)
      */
-    void toUint32Array(uint32_t *ans) const {
+    void toUint32Array(uint32_t *ans) const noexcept {
         api::roaring_bitmap_to_uint32_array(&roaring, ans);
     }
     /**
      * To int array with pagination
      */
-    void rangeUint32Array(uint32_t *ans, size_t offset, size_t limit) const {
+    void rangeUint32Array(uint32_t *ans, size_t offset, size_t limit) const noexcept {
         api::roaring_bitmap_range_uint32_array(&roaring, offset, limit, ans);
     }
 
     /**
      * Return true if the two bitmaps contain the same elements.
      */
-    bool operator==(const Roaring &r) const {
+    bool operator==(const Roaring &r) const noexcept {
         return api::roaring_bitmap_equals(&roaring, &r.roaring);
     }
 
@@ -378,7 +380,7 @@ public:
      * Compute the negation of the roaring bitmap within the half-open interval
      * [range_start, range_end). Areas outside the interval are unchanged.
      */
-    void flip(uint64_t range_start, uint64_t range_end) {
+    void flip(uint64_t range_start, uint64_t range_end) noexcept {
         api::roaring_bitmap_flip_inplace(&roaring, range_start, range_end);
     }
 
@@ -386,7 +388,7 @@ public:
      * Compute the negation of the roaring bitmap within the closed interval
      * [range_start, range_end]. Areas outside the interval are unchanged.
      */
-    void flipClosed(uint32_t range_start, uint32_t range_end) {
+    void flipClosed(uint32_t range_start, uint32_t range_end) noexcept {
         api::roaring_bitmap_flip_inplace(
             &roaring, range_start, uint64_t(range_end) + 1);
     }
@@ -395,7 +397,7 @@ public:
      * Remove run-length encoding even when it is more space efficient.
      * Return whether a change was applied.
      */
-    bool removeRunCompression() {
+    bool removeRunCompression() noexcept {
         return api::roaring_bitmap_remove_run_compression(&roaring);
     }
 
@@ -405,13 +407,13 @@ public:
      * Returns true if the result has at least one run container.  Additional
      * savings might be possible by calling shrinkToFit().
      */
-    bool runOptimize() { return api::roaring_bitmap_run_optimize(&roaring); }
+    bool runOptimize() noexcept { return api::roaring_bitmap_run_optimize(&roaring); }
 
     /**
      * If needed, reallocate memory to shrink the memory usage. Returns
      * the number of bytes saved.
      */
-    size_t shrinkToFit() { return api::roaring_bitmap_shrink_to_fit(&roaring); }
+    size_t shrinkToFit() noexcept { return api::roaring_bitmap_shrink_to_fit(&roaring); }
 
     /**
      * Iterate over the bitmap elements. The function iterator is called once
@@ -434,21 +436,21 @@ public:
      * this function returns true and sets element to the element of given rank.
      * Otherwise, it returns false.
      */
-    bool select(uint32_t rnk, uint32_t *element) const {
+    bool select(uint32_t rnk, uint32_t *element) const noexcept {
         return api::roaring_bitmap_select(&roaring, rnk, element);
     }
 
     /**
      * Computes the size of the intersection between two bitmaps.
      */
-    uint64_t and_cardinality(const Roaring &r) const {
+    uint64_t and_cardinality(const Roaring &r) const noexcept {
         return api::roaring_bitmap_and_cardinality(&roaring, &r.roaring);
     }
 
     /**
      * Check whether the two bitmaps intersect.
      */
-    bool intersect(const Roaring &r) const {
+    bool intersect(const Roaring &r) const noexcept {
         return api::roaring_bitmap_intersect(&roaring, &r.roaring);
     }
 
@@ -459,21 +461,21 @@ public:
      *
      * The Jaccard index is undefined if both bitmaps are empty.
      */
-    double jaccard_index(const Roaring &r) const {
+    double jaccard_index(const Roaring &r) const noexcept {
         return api::roaring_bitmap_jaccard_index(&roaring, &r.roaring);
     }
 
     /**
      * Computes the size of the union between two bitmaps.
      */
-    uint64_t or_cardinality(const Roaring &r) const {
+    uint64_t or_cardinality(const Roaring &r) const noexcept {
         return api::roaring_bitmap_or_cardinality(&roaring, &r.roaring);
     }
 
     /**
      * Computes the size of the difference (andnot) between two bitmaps.
      */
-    uint64_t andnot_cardinality(const Roaring &r) const {
+    uint64_t andnot_cardinality(const Roaring &r) const noexcept {
         return api::roaring_bitmap_andnot_cardinality(&roaring, &r.roaring);
     }
 
@@ -481,7 +483,7 @@ public:
      * Computes the size of the symmetric difference (andnot) between two
      * bitmaps.
      */
-    uint64_t xor_cardinality(const Roaring &r) const {
+    uint64_t xor_cardinality(const Roaring &r) const noexcept {
         return api::roaring_bitmap_xor_cardinality(&roaring, &r.roaring);
     }
 
@@ -493,7 +495,7 @@ public:
      * 1 when ranking the smallest value, but the select function returns the
      * smallest value when using index 0.
      */
-    uint64_t rank(uint32_t x) const {
+    uint64_t rank(uint32_t x) const noexcept {
         return api::roaring_bitmap_rank(&roaring, x);
     }
 
@@ -536,11 +538,12 @@ public:
      *      }  // namespace serialization
      *      }  // namespace boost
      */
-    size_t write(char *buf, bool portable = true) const {
-        if (portable)
+    size_t write(char *buf, bool portable = true) const noexcept {
+        if (portable) {
             return api::roaring_bitmap_portable_serialize(&roaring, buf);
-        else
+        } else {
             return api::roaring_bitmap_serialize(&roaring, buf);
+        }
     }
 
     /**
@@ -553,6 +556,11 @@ public:
      *
      * This function is unsafe in the sense that if you provide bad data,
      * many, many bytes could be read. See also readSafe.
+     *
+     * The function may throw std::runtime_error if a bitmap could not be read. Not that even
+     * if it does not throw, the bitmap could still be unusable if the loaded
+     * data does not match the portable Roaring specification: you should
+     * ensure that the data you load come from a serialized bitmap.
      */
     static Roaring read(const char *buf, bool portable = true) {
         roaring_bitmap_t * r = portable
@@ -579,6 +587,11 @@ public:
      * happen when serializing an existing bitmap, but not for random inputs.
      * Note that this function assumes that your bitmap was serialized in *portable* mode
      * (which is the default with the 'write' method).
+     *
+     * The function may throw std::runtime_error if a bitmap could not be read. Not that even
+     * if it does not throw, the bitmap could still be unusable if the loaded
+     * data does not match the portable Roaring specification: you should
+     * ensure that the data you load come from a serialized bitmap.
      */
     static Roaring readSafe(const char *buf, size_t maxbytes) {
         roaring_bitmap_t * r =
@@ -597,13 +610,18 @@ public:
      * can save space compared to the portable format (e.g., for very
      * sparse bitmaps).
      */
-    size_t getSizeInBytes(bool portable = true) const {
-        if (portable)
+    size_t getSizeInBytes(bool portable = true) const noexcept {
+        if (portable) {
             return api::roaring_bitmap_portable_size_in_bytes(&roaring);
-        else
+        } else {
             return api::roaring_bitmap_size_in_bytes(&roaring);
+        }
     }
 
+    /**
+     * For advanced users.
+     * This function may throw std::runtime_error.
+     */
     static const Roaring frozenView(const char *buf, size_t length) {
         const roaring_bitmap_t *s =
             api::roaring_bitmap_frozen_view(buf, length);
@@ -615,11 +633,17 @@ public:
         return r;
     }
 
-    void writeFrozen(char *buf) const {
+    /**
+     * For advanced users.
+     */
+    void writeFrozen(char *buf) const noexcept {
         roaring_bitmap_frozen_serialize(&roaring, buf);
     }
 
-    size_t getFrozenSizeInBytes() const {
+    /**
+     * For advanced users.
+     */
+    size_t getFrozenSizeInBytes() const noexcept {
         return roaring_bitmap_frozen_size_in_bytes(&roaring);
     }
 
@@ -631,6 +655,7 @@ public:
      * bitmaps, two-by-two, it is best to start with the smallest bitmap.
      * Consider also using the operator &= to avoid needlessly creating
      * many temporary bitmaps.
+     * This function may throw std::runtime_error.
      */
     Roaring operator&(const Roaring &o) const {
         roaring_bitmap_t *r = api::roaring_bitmap_and(&roaring, &o.roaring);
@@ -643,6 +668,7 @@ public:
     /**
      * Computes the difference between two bitmaps and returns new bitmap.
      * The current bitmap and the provided bitmap are unchanged.
+     * This function may throw std::runtime_error.
      */
     Roaring operator-(const Roaring &o) const {
         roaring_bitmap_t *r = api::roaring_bitmap_andnot(&roaring, &o.roaring);
@@ -655,6 +681,7 @@ public:
     /**
      * Computes the union between two bitmaps and returns new bitmap.
      * The current bitmap and the provided bitmap are unchanged.
+     * This function may throw std::runtime_error.
      */
     Roaring operator|(const Roaring &o) const {
         roaring_bitmap_t *r = api::roaring_bitmap_or(&roaring, &o.roaring);
@@ -667,6 +694,7 @@ public:
     /**
      * Computes the symmetric union between two bitmaps and returns new bitmap.
      * The current bitmap and the provided bitmap are unchanged.
+     * This function may throw std::runtime_error.
      */
     Roaring operator^(const Roaring &o) const {
         roaring_bitmap_t *r = api::roaring_bitmap_xor(&roaring, &o.roaring);
@@ -679,19 +707,19 @@ public:
     /**
      * Whether or not we apply copy and write.
      */
-    void setCopyOnWrite(bool val) {
+    void setCopyOnWrite(bool val) noexcept {
         api::roaring_bitmap_set_copy_on_write(&roaring, val);
     }
 
     /**
      * Print the content of the bitmap
      */
-    void printf() const { api::roaring_bitmap_printf(&roaring); }
+    void printf() const noexcept { api::roaring_bitmap_printf(&roaring); }
 
     /**
      * Print the content of the bitmap into a string
      */
-    std::string toString() const {
+    std::string toString() const noexcept {
         struct iter_data {
             std::string str{}; // The empty constructor silences warnings from pedantic static analyzers.
             char first_char = '{';
@@ -716,13 +744,14 @@ public:
     /**
      * Whether or not copy and write is active.
      */
-    bool getCopyOnWrite() const {
+    bool getCopyOnWrite() const noexcept {
         return api::roaring_bitmap_get_copy_on_write(&roaring);
     }
 
     /**
      * Computes the logical or (union) between "n" bitmaps (referenced by a
      * pointer).
+     * This function may throw std::runtime_error.
      */
     static Roaring fastunion(size_t n, const Roaring **inputs) {
         const roaring_bitmap_t **x =
diff --git a/fuzz/croaring_fuzzer_cc.cc b/fuzz/croaring_fuzzer_cc.cc
index 1c1e39e53..fe2efda1c 100644
--- a/fuzz/croaring_fuzzer_cc.cc
+++ b/fuzz/croaring_fuzzer_cc.cc
@@ -129,13 +129,13 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
 
   // Safe read from serialized
   std::vector<char> read_buffer = fdp.ConsumeBytes<char>(100);
-  {
+  try {
     roaring::Roaring read_safely =
         roaring::Roaring::readSafe(read_buffer.data(), read_buffer.size());
     // The above is guaranteed to be safe. However, read_safely is maybe
     // in an improper state and it cannot be used safely (including for
     // reserialization).
-  }
+  } catch(...) {}
 
   // The bitmap b can be serialized and re-read.
   std::size_t expected_size_in_bytes = b.getSizeInBytes();

From bae09d97031121700efa3bba2d8784816870424f Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Wed, 15 Mar 2023 19:02:47 -0400
Subject: [PATCH 091/162] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index b214a19d6..a5df08728 100644
--- a/README.md
+++ b/README.md
@@ -15,7 +15,7 @@ Bitsets, also called bitmaps, are commonly used as fast data structures. Unfortu
 
 Roaring bitmaps are compressed bitmaps which tend to outperform conventional compressed bitmaps such as WAH, EWAH or Concise.
 They are used by several major systems such as [Apache Lucene][lucene] and derivative systems such as [Solr][solr] and
-[Elasticsearch][elasticsearch], [Metamarkets' Druid][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas],  [Apache Spark][spark], [OpenSearchServer][opensearchserver], [Cloud Torrent][cloudtorrent], [Whoosh][whoosh], [InfluxDB](https://www.influxdata.com), [Pilosa][pilosa], [Bleve](http://www.blevesearch.com), [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin]. The CRoaring library is used in several systems such as [Apache Doris](http://doris.incubator.apache.org). The YouTube SQL Engine, [Google Procella](https://research.google/pubs/pub48388/), uses Roaring bitmaps for indexing.
+[Elasticsearch][elasticsearch], [Metamarkets' Druid][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas],  [Apache Spark][spark], [OpenSearchServer][opensearchserver], [Cloud Torrent][cloudtorrent], [Whoosh][whoosh], [InfluxDB](https://www.influxdata.com), [Pilosa][pilosa], [Bleve](http://www.blevesearch.com), [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin]. The CRoaring library is used in several systems such as [Apache Doris](http://doris.incubator.apache.org) and [Twitter Pelican](https://github.com/twitter/pelikan). The YouTube SQL Engine, [Google Procella](https://research.google/pubs/pub48388/), uses Roaring bitmaps for indexing.
 
 We published a peer-reviewed article on the design and evaluation of this library:
 

From 9eb0f9e219b20efd454e941d1ede9f0eda146aac Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Wed, 15 Mar 2023 19:05:05 -0400
Subject: [PATCH 092/162] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index a5df08728..b214a19d6 100644
--- a/README.md
+++ b/README.md
@@ -15,7 +15,7 @@ Bitsets, also called bitmaps, are commonly used as fast data structures. Unfortu
 
 Roaring bitmaps are compressed bitmaps which tend to outperform conventional compressed bitmaps such as WAH, EWAH or Concise.
 They are used by several major systems such as [Apache Lucene][lucene] and derivative systems such as [Solr][solr] and
-[Elasticsearch][elasticsearch], [Metamarkets' Druid][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas],  [Apache Spark][spark], [OpenSearchServer][opensearchserver], [Cloud Torrent][cloudtorrent], [Whoosh][whoosh], [InfluxDB](https://www.influxdata.com), [Pilosa][pilosa], [Bleve](http://www.blevesearch.com), [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin]. The CRoaring library is used in several systems such as [Apache Doris](http://doris.incubator.apache.org) and [Twitter Pelican](https://github.com/twitter/pelikan). The YouTube SQL Engine, [Google Procella](https://research.google/pubs/pub48388/), uses Roaring bitmaps for indexing.
+[Elasticsearch][elasticsearch], [Metamarkets' Druid][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas],  [Apache Spark][spark], [OpenSearchServer][opensearchserver], [Cloud Torrent][cloudtorrent], [Whoosh][whoosh], [InfluxDB](https://www.influxdata.com), [Pilosa][pilosa], [Bleve](http://www.blevesearch.com), [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin]. The CRoaring library is used in several systems such as [Apache Doris](http://doris.incubator.apache.org). The YouTube SQL Engine, [Google Procella](https://research.google/pubs/pub48388/), uses Roaring bitmaps for indexing.
 
 We published a peer-reviewed article on the design and evaluation of this library:
 

From f40ed52bcdd635840a79877cef4857315dba817c Mon Sep 17 00:00:00 2001
From: Gabriela Gutierrez <gabigutierrez@google.com>
Date: Thu, 16 Mar 2023 20:16:25 +0000
Subject: [PATCH 093/162] Set minimum permissions to workflows (#450)

* Set minimum permissions for alpine.yml

Signed-off-by: Gabriela Gutierrez <gabigutierrez@google.com>

* Set minimum permissions for cifuzz.yml

Signed-off-by: Gabriela Gutierrez <gabigutierrez@google.com>

* Set minimum permissions for codeql.yml

Signed-off-by: Gabriela Gutierrez <gabigutierrez@google.com>

* Adjust minimum permissions to documentation.yml

Signed-off-by: Gabriela Gutierrez <gabigutierrez@google.com>

* Set minimum permissions to macos-ci.yml

Signed-off-by: Gabriela Gutierrez <gabigutierrez@google.com>

* Set minimum permissions for s390x.yml

Signed-off-by: Gabriela Gutierrez <gabigutierrez@google.com>

* Set minimum permissions for ubuntu-18-ci.yml

Signed-off-by: Gabriela Gutierrez <gabigutierrez@google.com>

* Set minimum permissions for ubuntu-ci.yml

Signed-off-by: Gabriela Gutierrez <gabigutierrez@google.com>

* Set minimum permissions for ubuntu-debug-sani-ci.yml

Signed-off-by: Gabriela Gutierrez <gabigutierrez@google.com>

* Set minimum permissions for ubuntu-gcc10-ci.yml

Signed-off-by: Gabriela Gutierrez <gabigutierrez@google.com>

* Set minimum permissions for ubuntu-legacy-ci.yml

Signed-off-by: Gabriela Gutierrez <gabigutierrez@google.com>

* Set minimum permissions for ubuntu-noexcept-ci.yml

Signed-off-by: Gabriela Gutierrez <gabigutierrez@google.com>

* Set minimum permissions for ubuntu-oldclang-18-ci.yml

Signed-off-by: Gabriela Gutierrez <gabigutierrez@google.com>

* Set minimum permissions for ubuntu-sani-ci.yml

Signed-off-by: Gabriela Gutierrez <gabigutierrez@google.com>

* Set minimum permissions for vs16-arm-ci.yml

Signed-off-by: Gabriela Gutierrez <gabigutierrez@google.com>

* Set minimum permissions for vs16-ci.yml

Signed-off-by: Gabriela Gutierrez <gabigutierrez@google.com>

* Set minimum permissions for vs17-arm-ci.yml

Signed-off-by: Gabriela Gutierrez <gabigutierrez@google.com>

* Set minimum permissions for vs17-ci.yml

Signed-off-by: Gabriela Gutierrez <gabigutierrez@google.com>

* Set minimum permissions for vs17-clang-ci.yml

Signed-off-by: Gabriela Gutierrez <gabigutierrez@google.com>

---------

Signed-off-by: Gabriela Gutierrez <gabigutierrez@google.com>
---
 .github/workflows/alpine.yml                | 4 +++-
 .github/workflows/cifuzz.yml                | 2 ++
 .github/workflows/codeql.yml                | 3 +++
 .github/workflows/documentation.yml         | 8 +++++---
 .github/workflows/macos-ci.yml              | 2 ++
 .github/workflows/s390x.yml                 | 3 +++
 .github/workflows/ubuntu-18-ci.yml          | 2 ++
 .github/workflows/ubuntu-ci.yml             | 2 ++
 .github/workflows/ubuntu-debug-sani-ci.yml  | 2 ++
 .github/workflows/ubuntu-gcc10-ci.yml       | 2 ++
 .github/workflows/ubuntu-legacy-ci.yml      | 2 ++
 .github/workflows/ubuntu-noexcept-ci.yml    | 2 ++
 .github/workflows/ubuntu-oldclang-18-ci.yml | 2 ++
 .github/workflows/ubuntu-sani-ci.yml        | 2 ++
 .github/workflows/vs16-arm-ci.yml           | 3 +++
 .github/workflows/vs16-ci.yml               | 3 +++
 .github/workflows/vs17-arm-ci.yml           | 5 ++++-
 .github/workflows/vs17-ci.yml               | 3 +++
 .github/workflows/vs17-clang-ci.yml         | 5 ++++-
 19 files changed, 51 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/alpine.yml b/.github/workflows/alpine.yml
index 95491609c..bacf210c3 100644
--- a/.github/workflows/alpine.yml
+++ b/.github/workflows/alpine.yml
@@ -2,6 +2,8 @@ name: Alpine Linux
 'on':
   - push
   - pull_request
+permissions:
+  contents: read
 jobs:
   ubuntu-build:
     runs-on: ubuntu-latest
@@ -33,4 +35,4 @@ jobs:
           ./alpine.sh cmake --build build_for_alpine_debug
       - name: testdebug
         run: |
-          ./alpine.sh bash -c "cd build_for_alpine_debug && ctest"
\ No newline at end of file
+          ./alpine.sh bash -c "cd build_for_alpine_debug && ctest"
diff --git a/.github/workflows/cifuzz.yml b/.github/workflows/cifuzz.yml
index 94bfd479d..3539dbf1f 100644
--- a/.github/workflows/cifuzz.yml
+++ b/.github/workflows/cifuzz.yml
@@ -1,5 +1,7 @@
 name: CIFuzz
 on: [pull_request]
+permissions:
+  contents: read
 jobs:
   Fuzzing:
     runs-on: ubuntu-latest
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
index 6d19b4e62..c02d8abdc 100644
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@@ -8,6 +8,9 @@ on:
   schedule:
     - cron: "39 2 * * 6"
 
+permissions:
+  contents: read
+
 jobs:
   analyze:
     name: Analyze
diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
index 54e515a6f..72888ae4f 100644
--- a/.github/workflows/documentation.yml
+++ b/.github/workflows/documentation.yml
@@ -8,9 +8,7 @@ on:
   workflow_dispatch:
 
 permissions:
-  contents: write
-  pages: write
-  id-token: write
+  contents: read
 
 jobs:
   deploy:
@@ -18,6 +16,10 @@ jobs:
       name: github-pages
       url: ${{ steps.deployment.outputs.page_url }}
     runs-on: ubuntu-latest
+    permissions:
+      contents: write
+      pages: write
+      id-token: write
     steps:
       - uses: actions/checkout@v3
       - name: Install Doxygen
diff --git a/.github/workflows/macos-ci.yml b/.github/workflows/macos-ci.yml
index d926d172f..4edb1de29 100644
--- a/.github/workflows/macos-ci.yml
+++ b/.github/workflows/macos-ci.yml
@@ -4,6 +4,8 @@ name: Macos-CI
   - push
   - pull_request
 
+permissions:
+  contents: read
 
 jobs:
   ci:
diff --git a/.github/workflows/s390x.yml b/.github/workflows/s390x.yml
index d49858218..4822c4ea9 100644
--- a/.github/workflows/s390x.yml
+++ b/.github/workflows/s390x.yml
@@ -8,6 +8,9 @@ on:
     branches:
       - master
 
+permissions:
+  contents: read
+
 jobs:
   build:
     runs-on: ubuntu-latest
diff --git a/.github/workflows/ubuntu-18-ci.yml b/.github/workflows/ubuntu-18-ci.yml
index 554951fe2..225533734 100644
--- a/.github/workflows/ubuntu-18-ci.yml
+++ b/.github/workflows/ubuntu-18-ci.yml
@@ -4,6 +4,8 @@ name: Ubuntu-18-CI
   - push
   - pull_request
 
+permissions:
+  contents: read
 
 jobs:
   ci:
diff --git a/.github/workflows/ubuntu-ci.yml b/.github/workflows/ubuntu-ci.yml
index 82956cca6..247bdf89c 100644
--- a/.github/workflows/ubuntu-ci.yml
+++ b/.github/workflows/ubuntu-ci.yml
@@ -4,6 +4,8 @@ name: Ubuntu-CI
   - push
   - pull_request
 
+permissions:
+  contents: read
 
 jobs:
   ci:
diff --git a/.github/workflows/ubuntu-debug-sani-ci.yml b/.github/workflows/ubuntu-debug-sani-ci.yml
index 52848d873..e828b6220 100644
--- a/.github/workflows/ubuntu-debug-sani-ci.yml
+++ b/.github/workflows/ubuntu-debug-sani-ci.yml
@@ -4,6 +4,8 @@ name: Ubuntu-Debug-Sanitized-CI
   - push
   - pull_request
 
+permissions:
+  contents: read
 
 jobs:
   ci:
diff --git a/.github/workflows/ubuntu-gcc10-ci.yml b/.github/workflows/ubuntu-gcc10-ci.yml
index d617f4b43..1f976ddc7 100644
--- a/.github/workflows/ubuntu-gcc10-ci.yml
+++ b/.github/workflows/ubuntu-gcc10-ci.yml
@@ -4,6 +4,8 @@ name: Ubuntu-GCC10-CI
   - push
   - pull_request
 
+permissions:
+  contents: read
 
 jobs:
   ci:
diff --git a/.github/workflows/ubuntu-legacy-ci.yml b/.github/workflows/ubuntu-legacy-ci.yml
index fd13acabb..940fe08b1 100644
--- a/.github/workflows/ubuntu-legacy-ci.yml
+++ b/.github/workflows/ubuntu-legacy-ci.yml
@@ -4,6 +4,8 @@ name: Ubuntu-CI
   - push
   - pull_request
 
+permissions:
+  contents: read
 
 jobs:
   ci:
diff --git a/.github/workflows/ubuntu-noexcept-ci.yml b/.github/workflows/ubuntu-noexcept-ci.yml
index 1b5382000..bf5dc7385 100644
--- a/.github/workflows/ubuntu-noexcept-ci.yml
+++ b/.github/workflows/ubuntu-noexcept-ci.yml
@@ -4,6 +4,8 @@ name: Ubuntu-CI
   - push
   - pull_request
 
+permissions:
+  contents: read
 
 jobs:
   ci:
diff --git a/.github/workflows/ubuntu-oldclang-18-ci.yml b/.github/workflows/ubuntu-oldclang-18-ci.yml
index 8f6e50dde..8fc5825d9 100644
--- a/.github/workflows/ubuntu-oldclang-18-ci.yml
+++ b/.github/workflows/ubuntu-oldclang-18-ci.yml
@@ -4,6 +4,8 @@ name: Ubuntu-CI (old llvm)
   - push
   - pull_request
 
+permissions:
+  contents: read
 
 jobs:
   ci:
diff --git a/.github/workflows/ubuntu-sani-ci.yml b/.github/workflows/ubuntu-sani-ci.yml
index c955b7e85..d3ecaaf9b 100644
--- a/.github/workflows/ubuntu-sani-ci.yml
+++ b/.github/workflows/ubuntu-sani-ci.yml
@@ -4,6 +4,8 @@ name: Ubuntu-Sanitized-CI
   - push
   - pull_request
 
+permissions:
+  contents: read
 
 jobs:
   ci:
diff --git a/.github/workflows/vs16-arm-ci.yml b/.github/workflows/vs16-arm-ci.yml
index 79017bd76..7af16c77e 100644
--- a/.github/workflows/vs16-arm-ci.yml
+++ b/.github/workflows/vs16-arm-ci.yml
@@ -2,6 +2,9 @@ name: VS16-ARM-CI
 
 on: [push, pull_request]
 
+permissions:
+  contents: read
+
 jobs:
   ci:
     name: windows-vs16
diff --git a/.github/workflows/vs16-ci.yml b/.github/workflows/vs16-ci.yml
index de40efc81..3d52fc298 100644
--- a/.github/workflows/vs16-ci.yml
+++ b/.github/workflows/vs16-ci.yml
@@ -2,6 +2,9 @@ name: VS16-CI
 
 on: [push, pull_request]
 
+permissions:
+  contents: read
+
 jobs:
   ci:
     name: windows-vs16
diff --git a/.github/workflows/vs17-arm-ci.yml b/.github/workflows/vs17-arm-ci.yml
index bb0532003..db087ca79 100644
--- a/.github/workflows/vs17-arm-ci.yml
+++ b/.github/workflows/vs17-arm-ci.yml
@@ -2,6 +2,9 @@ name: VS17-ARM-CI
 
 on: [push, pull_request]
 
+permissions:
+  contents: read
+
 jobs:
   ci:
     name: windows-vs17
@@ -18,4 +21,4 @@ jobs:
       - name: Use cmake
         run: |
           cmake -A ${{ matrix.arch }} -DCMAKE_CROSSCOMPILING=1 -B build  &&
-          cmake --build build --verbose
\ No newline at end of file
+          cmake --build build --verbose
diff --git a/.github/workflows/vs17-ci.yml b/.github/workflows/vs17-ci.yml
index 1cfc0b652..bad4fda01 100644
--- a/.github/workflows/vs17-ci.yml
+++ b/.github/workflows/vs17-ci.yml
@@ -2,6 +2,9 @@ name: VS17-CI
 
 on: [push, pull_request]
 
+permissions:
+  contents: read
+
 jobs:
   ci:
     name: windows-vs17
diff --git a/.github/workflows/vs17-clang-ci.yml b/.github/workflows/vs17-clang-ci.yml
index 9beb6d320..49c97b46a 100644
--- a/.github/workflows/vs17-clang-ci.yml
+++ b/.github/workflows/vs17-clang-ci.yml
@@ -2,6 +2,9 @@ name: VS17-CLANG-CI
 
 on: [push, pull_request]
 
+permissions:
+  contents: read
+
 jobs:
   ci:
     name: windows-vs17
@@ -30,4 +33,4 @@ jobs:
       - name: Run basic tests in Debug
         run: |
           cd build
-          ctest -C Debug --output-on-failure 
\ No newline at end of file
+          ctest -C Debug --output-on-failure 

From 4d406555aee923f9a153325aba2ab7e638c0da75 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Mon, 20 Mar 2023 14:42:03 -0400
Subject: [PATCH 094/162] Update codeql.yml

---
 .github/workflows/codeql.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
index c02d8abdc..8834c90c8 100644
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@@ -33,7 +33,6 @@ jobs:
         uses: github/codeql-action/init@v2
         with:
           languages: ${{ matrix.language }}
-          queries: +security-and-quality
 
       - name: Autobuild
         uses: github/codeql-action/autobuild@v2

From c69b17ceb74d808274a04d0466cabfffb1aa20ed Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Fri, 24 Mar 2023 13:25:29 -0400
Subject: [PATCH 095/162] add avx512 support (#451)

* add avx512 optimization

* support bmi2 when use avx

* support bmi2 when use avx

* update

* Minor tweaks

* Guarding bitset_extract_setbits_avx512_uint16

* We use 'hamming' in this library.

* Compiler guarding.

* Fixes

* Tweaking.

* Documenting AVX-512 throttling.

* Adding missing headers.

---------

Co-authored-by: Hui Han <hui.han@intel.com>
---
 README.md                           |   8 +-
 include/roaring/bitset_util.h       | 126 +++++++++++++
 include/roaring/isadetection.h      | 142 +++++++++-----
 include/roaring/misc/configreport.h |  67 ++++++-
 include/roaring/portability.h       |  21 ++-
 src/array_util.c                    |  65 +++++++
 src/bitset_util.c                   | 111 +++++++++++
 src/containers/array.c              |   2 +-
 src/containers/bitset.c             | 280 +++++++++++++++++++++++++++-
 src/containers/convert.c            |  24 ++-
 src/containers/run.c                |  50 ++++-
 tools/cmake/FindOptions.cmake       |   8 +
 12 files changed, 839 insertions(+), 65 deletions(-)

diff --git a/README.md b/README.md
index b214a19d6..d674e8f0e 100644
--- a/README.md
+++ b/README.md
@@ -544,7 +544,7 @@ To build with at least Visual Studio 2017 directly in the IDE:
 - For testing, in the Standard toolbar, drop the ``Select Startup Item...`` menu and choose one of the tests. Run the test by pressing the button to the left of the dropdown.
 
 
-We have optimizations specific to AVX2 in the code, and they are turned dynamically based on the detected hardware at runtime.
+We have optimizations specific to AVX2 and AVX-512 in the code, and they are turned dynamically based on the detected hardware at runtime.
 
 
 ## Usage (Using `conan`)
@@ -587,9 +587,11 @@ These commands will also print out instructions on how to use the library from M
 
 If you find the version of `roaring` shipped with `vcpkg` is out-of-date, feel free to report it to `vcpkg` community either by submiting an issue or by creating a PR.
 
-# AVX2-related throttling
+# SIMD-related throttling
 
-Our AVX2 code does not use floating-point numbers or multiplications, so it is not subject to turbo frequency throttling on many-core Intel processors.
+Our AVX2 code does not use floating-point numbers or multiplications, so it is not subject to turbo frequency throttling on many-core Intel processors. 
+
+Our AVX-512 code is only enabled on recent hardware (Intel Ice Lake or better and AMD Zen 4) where SIMD-specific frequency throttling is not observed.
 
 # Thread safety
 
diff --git a/include/roaring/bitset_util.h b/include/roaring/bitset_util.h
index 0eea94edd..9da26f463 100644
--- a/include/roaring/bitset_util.h
+++ b/include/roaring/bitset_util.h
@@ -154,6 +154,9 @@ size_t bitset_extract_setbits_avx2(const uint64_t *words, size_t length,
                                    uint32_t *out, size_t outcapacity,
                                    uint32_t base);
 
+size_t bitset_extract_setbits_avx512(const uint64_t *words, size_t length, 
+                                   uint32_t *out, size_t outcapacity, 
+                                   uint32_t base);
 /*
  * Given a bitset containing "length" 64-bit words, write out the position
  * of all the set bits to "out", values start at "base".
@@ -186,6 +189,10 @@ size_t bitset_extract_setbits_sse_uint16(const uint64_t *words, size_t length,
                                          uint16_t *out, size_t outcapacity,
                                          uint16_t base);
 
+size_t bitset_extract_setbits_avx512_uint16(const uint64_t *words, size_t length,
+                                         uint16_t *out, size_t outcapacity, 
+                                         uint16_t base);
+
 /*
  * Given a bitset containing "length" 64-bit words, write out the position
  * of all the set bits to "out",  values start at "base"
@@ -569,6 +576,125 @@ CROARING_TARGET_AVX2
 AVXPOPCNTFNC(andnot, _mm256_andnot_si256)
 CROARING_UNTARGET_REGION
 
+
+#define VPOPCNT_AND_ADD(ptr, i, accu)   \
+    const __m512i v##i = _mm512_loadu_si512((const __m512i*)ptr + i);  \
+    const __m512i p##i = _mm512_popcnt_epi64(v##i);    \
+    accu = _mm512_add_epi64(accu, p##i);  
+
+#if CROARING_COMPILER_SUPPORTS_AVX512
+CROARING_TARGET_AVX512
+static inline uint64_t sum_epu64_256(const __m256i v) {
+
+    return (uint64_t)(_mm256_extract_epi64(v, 0))
+         + (uint64_t)(_mm256_extract_epi64(v, 1))
+         + (uint64_t)(_mm256_extract_epi64(v, 2))
+         + (uint64_t)(_mm256_extract_epi64(v, 3));
+}
+
+
+static inline uint64_t simd_sum_epu64(const __m512i v) {
+
+     __m256i lo = _mm512_extracti64x4_epi64(v, 0);
+     __m256i hi = _mm512_extracti64x4_epi64(v, 1);
+
+    return sum_epu64_256(lo) + sum_epu64_256(hi);
+}
+
+static inline uint64_t avx512_vpopcount(const __m512i* data, const uint64_t size)
+{
+    const uint64_t limit = size - size % 4;
+    __m512i total = _mm512_setzero_si512();
+    uint64_t i = 0;
+
+    for (; i < limit; i += 4)
+    {    
+        VPOPCNT_AND_ADD(data + i, 0, total);
+        VPOPCNT_AND_ADD(data + i, 1, total);
+        VPOPCNT_AND_ADD(data + i, 2, total);
+        VPOPCNT_AND_ADD(data + i, 3, total);
+    }
+    
+    for (; i < size; i++)
+    {
+        total = _mm512_add_epi64(total, _mm512_popcnt_epi64(_mm512_loadu_si512(data + i)));
+    }
+        
+    return simd_sum_epu64(total);
+}
+CROARING_UNTARGET_REGION
+#endif
+
+#define AVXPOPCNTFNC512(opname, avx_intrinsic)                                 \
+    static inline uint64_t avx512_harley_seal_popcount512_##opname(            \
+        const __m512i *data1, const __m512i *data2, const uint64_t size) {     \
+        __m512i total = _mm512_setzero_si512();                                \
+        const uint64_t limit = size - size % 4;                                \
+        uint64_t i = 0;                                                        \
+	    for (; i < limit; i += 4) {                                        \
+            __m512i a1 = avx_intrinsic(_mm512_loadu_si512(data1 + i),          \
+                                       _mm512_loadu_si512(data2 + i));         \
+            total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a1));          \
+            __m512i a2 = avx_intrinsic(_mm512_loadu_si512(data1 + i + 1),      \
+                                       _mm512_loadu_si512(data2 + i + 1));     \
+            total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a2));          \
+             __m512i a3 = avx_intrinsic(_mm512_loadu_si512(data1 + i + 2),     \
+                                       _mm512_loadu_si512(data2 + i + 2));     \
+            total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a3));          \
+             __m512i a4 = avx_intrinsic(_mm512_loadu_si512(data1 + i + 3),     \
+                                       _mm512_loadu_si512(data2 + i + 3));     \
+            total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a4));          \
+       }                                                                       \
+       for(; i < size; i++) {                                                  \
+              __m512i a = avx_intrinsic(_mm512_loadu_si512(data1 + i),         \
+                       _mm512_loadu_si512(data2 + i));                         \
+              total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a));         \
+        }                                                                      \
+        return simd_sum_epu64(total);                                          \
+    }                                                                          \
+    static inline uint64_t avx512_harley_seal_popcount512andstore_##opname(    \
+        const __m512i *__restrict__ data1, const __m512i *__restrict__ data2,  \
+        __m512i *__restrict__ out, const uint64_t size) {                      \
+        __m512i total = _mm512_setzero_si512();                                \
+        const uint64_t limit = size - size % 4;                                \
+        uint64_t i = 0;                                                        \
+	    for (; i < limit; i += 4) {                                        \
+            __m512i a1 = avx_intrinsic(_mm512_loadu_si512(data1 + i),          \
+                                       _mm512_loadu_si512(data2 + i));         \
+            _mm512_storeu_si512(out + i, a1);                                  \
+            total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a1));          \
+            __m512i a2 = avx_intrinsic(_mm512_loadu_si512(data1 + i + 1),      \
+                                       _mm512_loadu_si512(data2 + i + 1));     \
+            _mm512_storeu_si512(out + i + 1, a2);                              \
+            total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a2));          \
+             __m512i a3 = avx_intrinsic(_mm512_loadu_si512(data1 + i + 2),     \
+                                       _mm512_loadu_si512(data2 + i + 2));     \
+            _mm512_storeu_si512(out + i + 2, a3);                              \
+            total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a3));          \
+            __m512i a4 = avx_intrinsic(_mm512_loadu_si512(data1 + i + 3),      \
+                                       _mm512_loadu_si512(data2 + i + 3));     \
+            _mm512_storeu_si512(out + i + 3, a4);                              \
+            total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a4));          \
+       }                                                                       \
+       for(; i < size; i++) {                                                  \
+              __m512i a = avx_intrinsic(_mm512_loadu_si512(data1 + i),         \
+                       _mm512_loadu_si512(data2 + i));                         \
+            _mm512_storeu_si512(out + i, a);                                   \
+ 	       total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a));        \
+        }                                                                      \
+        return simd_sum_epu64(total);                                          \
+    }                                                                          \
+
+#if CROARING_COMPILER_SUPPORTS_AVX512
+CROARING_TARGET_AVX512
+AVXPOPCNTFNC512(or, _mm512_or_si512)
+AVXPOPCNTFNC512(union, _mm512_or_si512)
+AVXPOPCNTFNC512(and, _mm512_and_si512)
+AVXPOPCNTFNC512(intersection, _mm512_and_si512)
+AVXPOPCNTFNC512(xor, _mm512_xor_si512)
+AVXPOPCNTFNC512(andnot, _mm512_andnot_si512)
+CROARING_UNTARGET_REGION
+#endif
 /***
  * END Harley-Seal popcount functions.
  */
diff --git a/include/roaring/isadetection.h b/include/roaring/isadetection.h
index 69c25a073..39d1d6621 100644
--- a/include/roaring/isadetection.h
+++ b/include/roaring/isadetection.h
@@ -52,6 +52,22 @@ POSSIBILITY OF SUCH DAMAGE.
 #include <stdbool.h>
 #include <stdlib.h>
 
+
+#ifdef __has_include
+// We want to make sure that the AVX-512 functions are only built on compilers
+// fully supporting AVX-512.
+#if __has_include(<avx512vbmi2intrin.h>)
+#define CROARING_COMPILER_SUPPORTS_AVX512 1
+#endif
+#endif
+
+// Visual Studio 2019 and up support AVX-512
+#ifdef _MSC_VER
+#if _MSC_VER >= 1920
+#define CROARING_COMPILER_SUPPORTS_AVX512 1
+#endif
+#endif
+
 // We need portability.h to be included first, see
 // https://github.com/RoaringBitmap/CRoaring/issues/394
 #include <roaring/portability.h>
@@ -71,34 +87,19 @@ enum croaring_instruction_set {
   CROARING_BMI1 = 0x20,
   CROARING_BMI2 = 0x40,
   CROARING_ALTIVEC = 0x80,
+  CROARING_AVX512F = 0x100,
+  CROARING_AVX512DQ = 0x200,
+  CROARING_AVX512BW = 0x400,
+  CROARING_AVX512VBMI2 = 0x800,
+  CROARING_AVX512BITALG = 0x1000,
+  CROARING_AVX512VPOPCNTDQ = 0x2000,
   CROARING_UNINITIALIZED = 0x8000
 };
 
-#if defined(__PPC64__)
-
-//static inline uint32_t dynamic_croaring_detect_supported_architectures() {
-//  return CROARING_ALTIVEC;
-//}
-
-#elif defined(__arm__) || defined(__aarch64__) // incl. armel, armhf, arm64
-
-#if defined(__ARM_NEON)
-
-//static inline uint32_t dynamic_croaring_detect_supported_architectures() {
-//  return CROARING_NEON;
-//}
-
-#else // ARM without NEON
-
-//static inline uint32_t dynamic_croaring_detect_supported_architectures() {
-//  return CROARING_DEFAULT;
-//}
-
-#endif
-
-#elif defined(__x86_64__) || defined(_M_AMD64) // x64
+static unsigned int CROARING_AVX512_REQUIRED = (CROARING_AVX512F | CROARING_AVX512DQ | CROARING_AVX512BW | CROARING_AVX512VBMI2 | CROARING_AVX512BITALG | CROARING_AVX512VPOPCNTDQ);
 
 
+#if defined(__x86_64__) || defined(_M_AMD64) // x64
 
 
 static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx,
@@ -124,6 +125,11 @@ static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx,
 #endif
 }
 
+/**
+ * This is a relatively expensive function but it will get called at most
+ * *once* per compilation units. Normally, the CRoaring library is built
+ * as one compilation unit.
+ */
 static inline uint32_t dynamic_croaring_detect_supported_architectures() {
   uint32_t eax, ebx, ecx, edx;
   uint32_t host_isa = 0x0;
@@ -131,6 +137,12 @@ static inline uint32_t dynamic_croaring_detect_supported_architectures() {
   static uint32_t cpuid_avx2_bit = 1 << 5;      ///< @private Bit 5 of EBX for EAX=0x7
   static uint32_t cpuid_bmi1_bit = 1 << 3;      ///< @private bit 3 of EBX for EAX=0x7
   static uint32_t cpuid_bmi2_bit = 1 << 8;      ///< @private bit 8 of EBX for EAX=0x7
+  static uint32_t cpuid_avx512f_bit = 1 << 16;  ///< @private bit 16 of EBX for EAX=0x7
+  static uint32_t cpuid_avx512dq_bit = 1 << 17; ///< @private bit 17 of EBX for EAX=0x7
+  static uint32_t cpuid_avx512bw_bit = 1 << 30; ///< @private bit 30 of EBX for EAX=0x7
+  static uint32_t cpuid_avx512vbmi2_bit = 1 << 6; ///< @private bit 6 of ECX for EAX=0x7
+  static uint32_t cpuid_avx512bitalg_bit = 1 << 12; ///< @private bit 12 of ECX for EAX=0x7
+  static uint32_t cpuid_avx512vpopcntdq_bit = 1 << 14; ///< @private bit 14 of ECX for EAX=0x7
   static uint32_t cpuid_sse42_bit = 1 << 20;    ///< @private bit 20 of ECX for EAX=0x1
   static uint32_t cpuid_pclmulqdq_bit = 1 << 1; ///< @private bit  1 of ECX for EAX=0x1
   // ECX for EAX=0x7
@@ -147,7 +159,31 @@ static inline uint32_t dynamic_croaring_detect_supported_architectures() {
   if (ebx & cpuid_bmi2_bit) {
     host_isa |= CROARING_BMI2;
   }
-
+  
+  if (ebx & cpuid_avx512f_bit) {
+    host_isa |= CROARING_AVX512F;
+  }
+  
+  if (ebx & cpuid_avx512bw_bit) {
+    host_isa |= CROARING_AVX512BW;
+  }
+  
+  if (ebx & cpuid_avx512dq_bit) {
+    host_isa |= CROARING_AVX512DQ;
+  }
+  
+  if (ecx & cpuid_avx512vbmi2_bit) {
+    host_isa |= CROARING_AVX512VBMI2;
+  }
+  
+  if (ecx & cpuid_avx512bitalg_bit) {
+    host_isa |= CROARING_AVX512BITALG;
+  }
+  
+  if (ecx & cpuid_avx512vpopcntdq_bit) {
+    host_isa |= CROARING_AVX512VPOPCNTDQ;
+  }
+  
   // EBX for EAX=0x1
   eax = 0x1;
   cpuid(&eax, &ebx, &ecx, &edx);
@@ -162,13 +198,6 @@ static inline uint32_t dynamic_croaring_detect_supported_architectures() {
 
   return host_isa;
 }
-#else // fallback
-
-
-//static inline uint32_t dynamic_croaring_detect_supported_architectures() {
-//  return CROARING_DEFAULT;
-//}
-
 
 #endif // end SIMD extension detection code
 
@@ -207,27 +236,56 @@ static inline uint32_t croaring_detect_supported_architectures() {
 static inline bool croaring_avx2() {
   return false;
 }
+static inline bool croaring_avx512() {
+  return false;
+}
+#elif defined(__AVX512F__) && defined(__AVX512DQ__) && defined(__AVX512BW__) && defined(__AVX512VBMI2__) && defined(__AVX512BITALG__) && defined(__AVX512VPOPCNTDQ__)
+static inline bool croaring_avx2() {
+  return true;
+}
+static inline bool croaring_avx512() {
+  return true;
+}
 #elif defined(__AVX2__)
 static inline bool croaring_avx2() {
   return true;
 }
+static inline bool croaring_avx512() {
+#if CROARING_COMPILER_SUPPORTS_AVX512
+  // Even though we have set __AVX2__ at compile-time, it is still possible for the hardware
+  // to support AVX-512. By setting __AVX2__, all we are saying is that croaring_avx2() must be true!
+  static bool avx512_support = false;
+
+  if( !avx512_support )
+  {
+      avx512_support = ( (croaring_detect_supported_architectures() & CROARING_AVX512_REQUIRED)
+	                        == CROARING_AVX512_REQUIRED);
+  }
+  return avx512_support;
+#else
+  return false;
+#endif
+}
 #else
 static inline bool croaring_avx2() {
   return  (croaring_detect_supported_architectures() & CROARING_AVX2) == CROARING_AVX2;
 }
+static inline bool croaring_avx512() {
+#if CROARING_COMPILER_SUPPORTS_AVX512
+  static bool avx512_support = false;
+
+  if( !avx512_support )
+  {
+      avx512_support = ( (croaring_detect_supported_architectures() & CROARING_AVX512_REQUIRED)
+	                        == CROARING_AVX512_REQUIRED);
+  }
+  return avx512_support;
+#else
+  return false;
+#endif
+}
 #endif
 
-
-#else // defined(__x86_64__) || defined(_M_AMD64) // x64
-
-//static inline bool croaring_avx2() {
-//  return false;
-//}
-
-//static inline uint32_t croaring_detect_supported_architectures() {
-//    // no runtime dispatch
-//    return dynamic_croaring_detect_supported_architectures();
-//}
 #endif // defined(__x86_64__) || defined(_M_AMD64) // x64
 
 #endif // ROARING_ISADETECTION_H
diff --git a/include/roaring/misc/configreport.h b/include/roaring/misc/configreport.h
index a6a64285e..01974a77b 100644
--- a/include/roaring/misc/configreport.h
+++ b/include/roaring/misc/configreport.h
@@ -1,8 +1,9 @@
 /*
  * configreport.h
- *
+ * If this gets compiled into a different execution unit than the CRoaring library,
+ * the functions croaring_avx512() and croaring_avx2() *may* trigger an additional
+ * call to dynamic_croaring_detect_supported_architectures().
  */
-
 #ifndef INCLUDE_MISC_CONFIGREPORT_H_
 #define INCLUDE_MISC_CONFIGREPORT_H_
 
@@ -109,8 +110,55 @@ static inline const char *guessprocessor() {
         case 0x016C:
             codename = "Pineview";
             break;
+        case 0x706e:
+        case 0x606a:
+            codename = "Icelake";
+            break;
+        case 0x706a:
+        case 0x506c:
+            codename = "Goldmont";
+            break;
+       case 0x806c:
+       case 0x806d:
+            codename = "TigerLake";
+            break;
+        case 0x806e:
+        case 0x906e:
+            codename = "Kabylake";
+            break;
+        case 0xa065:
+        case 0xa066:
+            codename = "Cometlake";
+            break;
+        case 0xa067:
+            codename = "Rocketlake";
+            break;
+        case 0x9067:
+        case 0x906a:
+            codename = "Alderlake";
+            break;
+        case 0xb067:
+            codename = "Raptorlake";
+            break;
+        case 0x30f1:
+        case 0x60f0:
+        case 0x70f1:
+        case 0x60f8:
+        case 0x90f0:
+            codename = "Zen2";
+            break;
+        case 0x20f10:
+        case 0x50f00:
+            codename = "Zen3";
+            break;
+        case 0x40f40:
+            codename = "Zen3+";
+            break;
+        case 0x60f10:
+            codename = "Zen4";
+            break;
         default:
-            codename = "UNKNOWN";
+            codename = "unknown";
             break;
     }
     return codename;
@@ -131,15 +179,24 @@ static inline void tellmeall() {
  #ifdef __AVX2__
     printf(" Building for AVX2\t");
  #endif
+    if(croaring_avx512()) {
+        printf( "AVX-512\t");
+    }
     if(croaring_avx2()) {
-        printf( "AVX2 usable\t");
+        printf( "AVX2\t");
     }
     if((config & CROARING_AVX2) == CROARING_AVX2) {
         printf( "AVX2 detected\t");
        if(!croaring_avx2()) {
          printf( "AVX2 not used\t");
        }
-     }
+    }
+    if((config & CROARING_AVX512_REQUIRED) == CROARING_AVX512_REQUIRED) {
+        printf( "AVX-512 detected\t");
+       if(!croaring_avx2()) {
+         printf( "AVX-512 not used\t");
+       }
+    }
     if((config & CROARING_SSE42) == CROARING_SSE42) {
         printf(" SSE4.2 detected\t");
     }
diff --git a/include/roaring/portability.h b/include/roaring/portability.h
index 15577c84a..e650466fc 100644
--- a/include/roaring/portability.h
+++ b/include/roaring/portability.h
@@ -126,6 +126,15 @@ extern "C" {  // portability definitions are in global scope, not a namespace
 #include <avxintrin.h>
 #include <avx2intrin.h>
 #include <wmmintrin.h>
+// Important: we need the AVX-512 headers:
+#include <avx512fintrin.h>
+#include <avx512dqintrin.h>
+#include <avx512cdintrin.h>
+#include <avx512bwintrin.h>
+#include <avx512vlintrin.h>
+#include <avx512vbmiintrin.h>
+#include <avx512vbmi2intrin.h>
+#include <avx512vpopcntdqintrin.h>
 // unfortunately, we may not get _blsr_u64, but, thankfully, clang
 // has it as a macro.
 #ifndef _blsr_u64
@@ -328,6 +337,7 @@ static inline int hamming(uint64_t x) {
 #endif
 
 #define CROARING_TARGET_AVX2 CROARING_TARGET_REGION("avx2,bmi,pclmul,lzcnt")
+#define CROARING_TARGET_AVX512 CROARING_TARGET_REGION("bmi2,avx512f,avx512dq,avx512bw,avx512vbmi2,avx512bitalg,avx512vpopcntdq")
 
 #ifdef __AVX2__
 // No need for runtime dispatching.
@@ -338,6 +348,15 @@ static inline int hamming(uint64_t x) {
 #define CROARING_UNTARGET_REGION
 #endif
 
+#if defined(__AVX512F__) && defined(__AVX512DQ__) && defined(__AVX512BW__) && defined(__AVX512VBMI2__) && defined(__AVX512BITALG__) && defined(__AVX512VPOPCNTDQ__)
+// No need for runtime dispatching.
+// It is unnecessary and harmful to old clang to tag regions.
+#undef CROARING_TARGET_AVX512
+#define CROARING_TARGET_AVX512
+#undef CROARING_UNTARGET_REGION
+#define CROARING_UNTARGET_REGION
+#endif
+
 // Allow unaligned memory access
 #if defined(__GNUC__) || defined(__clang__)
 #define ALLOW_UNALIGNED __attribute__((no_sanitize("alignment")))
@@ -384,4 +403,4 @@ static inline int hamming(uint64_t x) {
 // be included, but not isadetection.h: the latter is a
 // strict requirement.
 #include <roaring/isadetection.h> // include it last!
-#endif /* INCLUDE_PORTABILITY_H_ */
\ No newline at end of file
+#endif /* INCLUDE_PORTABILITY_H_ */
diff --git a/src/array_util.c b/src/array_util.c
index 963c32490..9600ce1fd 100644
--- a/src/array_util.c
+++ b/src/array_util.c
@@ -1984,6 +1984,66 @@ size_t fast_union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *s
 #endif
 }
 #ifdef CROARING_IS_X64
+#if CROARING_COMPILER_SUPPORTS_AVX512
+CROARING_TARGET_AVX512
+static inline bool _avx512_memequals(const void *s1, const void *s2, size_t n) {
+    const uint8_t *ptr1 = (const uint8_t *)s1;
+    const uint8_t *ptr2 = (const uint8_t *)s2;
+    const uint8_t *end1 = ptr1 + n;
+    const uint8_t *end8 = ptr1 + ((n >> 3) << 3);
+    const uint8_t *end32 = ptr1 + ((n >> 5) << 5);
+    const uint8_t *end64 = ptr1 + ((n >> 6) << 6);
+    
+    while (ptr1 < end64){
+        __m512i r1 = _mm512_loadu_si512((const __m512i*)ptr1);
+        __m512i r2 = _mm512_loadu_si512((const __m512i*)ptr2);
+
+        uint64_t mask = _mm512_cmpeq_epi8_mask(r1, r2);
+        
+        if (mask != UINT64_MAX) {
+           return false;
+        }
+
+        ptr1 += 64;
+        ptr2 += 64;
+
+    }
+
+    while (ptr1 < end32) {
+        __m256i r1 = _mm256_loadu_si256((const __m256i*)ptr1);
+        __m256i r2 = _mm256_loadu_si256((const __m256i*)ptr2);
+        int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(r1, r2));
+        if ((uint32_t)mask != UINT32_MAX) {
+            return false;
+        }
+        ptr1 += 32;
+        ptr2 += 32;
+    }
+
+    while (ptr1 < end8) {
+	uint64_t v1, v2;
+        memcpy(&v1,ptr1,sizeof(uint64_t));
+        memcpy(&v2,ptr2,sizeof(uint64_t));
+        if (v1 != v2) {
+            return false;
+        }
+        ptr1 += 8;
+        ptr2 += 8;
+    }
+
+    while (ptr1 < end1) {
+        if (*ptr1 != *ptr2) {
+            return false;
+        }
+        ptr1++;
+        ptr2++;
+    }
+
+    return true;
+}
+CROARING_UNTARGET_REGION
+#endif // CROARING_COMPILER_SUPPORTS_AVX512
+
 CROARING_TARGET_AVX2
 static inline bool _avx2_memequals(const void *s1, const void *s2, size_t n) {
     const uint8_t *ptr1 = (const uint8_t *)s1;
@@ -2032,6 +2092,11 @@ bool memequals(const void *s1, const void *s2, size_t n) {
         return true;
     }
 #ifdef CROARING_IS_X64
+#if CROARING_COMPILER_SUPPORTS_AVX512
+    if( croaring_avx512() ) {
+      return _avx512_memequals(s1, s2, n);
+    } else
+#endif // CROARING_COMPILER_SUPPORTS_AVX512
     if( croaring_avx2() ) {
       return _avx2_memequals(s1, s2, n);
     } else {
diff --git a/src/bitset_util.c b/src/bitset_util.c
index 6e3c0d7ba..f1a04dcec 100644
--- a/src/bitset_util.c
+++ b/src/bitset_util.c
@@ -553,6 +553,117 @@ static uint16_t vecDecodeTable_uint16[256][8] = {
 #endif
 
 #ifdef CROARING_IS_X64
+#if CROARING_COMPILER_SUPPORTS_AVX512
+CROARING_TARGET_AVX512
+const uint8_t vbmi2_table[64] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
+size_t bitset_extract_setbits_avx512(const uint64_t *words, size_t length, uint32_t *vout,
+                                   size_t outcapacity, uint32_t base) {
+    uint32_t *out = (uint32_t *)vout;
+    uint32_t *initout = out;
+    uint32_t *safeout = out + outcapacity;
+    __m512i base_v = _mm512_set1_epi32(base);    
+    __m512i index_table = _mm512_loadu_si512(vbmi2_table);
+    size_t i = 0;
+
+    for (; (i < length) && ((out + 64) < safeout); i += 1)
+    {
+        uint64_t v = words[i];		
+        __m512i vec = _mm512_maskz_compress_epi8(v, index_table);	
+        	    
+        uint8_t advance = hamming(v);
+        
+        __m512i vbase = _mm512_add_epi32(base_v, _mm512_set1_epi32(i * 64));
+        __m512i r1 = _mm512_cvtepi8_epi32(_mm512_extracti32x4_epi32(vec,0));
+        __m512i r2 = _mm512_cvtepi8_epi32(_mm512_extracti32x4_epi32(vec,1));
+        __m512i r3 = _mm512_cvtepi8_epi32(_mm512_extracti32x4_epi32(vec,2));
+        __m512i r4 = _mm512_cvtepi8_epi32(_mm512_extracti32x4_epi32(vec,3));
+        
+        r1 = _mm512_add_epi32(r1, vbase);
+        r2 = _mm512_add_epi32(r2, vbase);
+        r3 = _mm512_add_epi32(r3, vbase);
+        r4 = _mm512_add_epi32(r4, vbase);
+        _mm512_storeu_si512((__m512i *)out, r1);
+        _mm512_storeu_si512((__m512i *)(out + 16), r2);
+        _mm512_storeu_si512((__m512i *)(out + 32), r3);
+        _mm512_storeu_si512((__m512i *)(out + 48), r4);
+
+        out += advance;
+        
+    }
+
+    base += i * 64;
+    
+    for (; (i < length) && (out < safeout); ++i) {
+         uint64_t w = words[i];
+         while ((w != 0) && (out < safeout)) {
+             uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail)
+             int r = __builtin_ctzll(w); // on x64, should compile to TZCNT
+             uint32_t val = r + base;
+             memcpy(out, &val,
+                    sizeof(uint32_t));  // should be compiled as a MOV on x64
+             out++;
+             w ^= t;
+         }
+         base += 64;
+     }
+
+
+    return out - initout;
+
+}
+
+// Reference: https://lemire.me/blog/2022/05/10/faster-bitset-decoding-using-intel-avx-512/
+size_t bitset_extract_setbits_avx512_uint16(const uint64_t *array, size_t length,
+                                     uint16_t *vout, size_t capacity, uint16_t base) {
+    uint16_t *out = (uint16_t *)vout;
+    uint16_t *initout = out;
+    uint16_t *safeout = vout + capacity;
+
+    __m512i base_v = _mm512_set1_epi16(base);
+    __m512i index_table = _mm512_loadu_si512(vbmi2_table);
+    size_t i = 0;
+
+    for (; (i < length) && ((out + 64) < safeout); i++)
+    {
+        uint64_t v = array[i];
+        __m512i vec = _mm512_maskz_compress_epi8(v, index_table);
+
+        uint8_t advance = hamming(v);
+
+        __m512i vbase = _mm512_add_epi16(base_v, _mm512_set1_epi16(i * 64));
+        __m512i r1 = _mm512_cvtepi8_epi16(_mm512_extracti32x8_epi32(vec,0));
+        __m512i r2 = _mm512_cvtepi8_epi16(_mm512_extracti32x8_epi32(vec,1));
+
+        r1 = _mm512_add_epi16(r1, vbase);
+        r2 = _mm512_add_epi16(r2, vbase);
+
+	    _mm512_storeu_si512((__m512i *)out, r1);
+        _mm512_storeu_si512((__m512i *)(out + 32), r2);
+        out += advance;
+
+    }
+
+    base += i * 64;
+
+    for (; (i < length) && (out < safeout); ++i) {
+         uint64_t w = array[i];
+         while ((w != 0) && (out < safeout)) {
+             uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail)
+             int r = __builtin_ctzll(w); // on x64, should compile to TZCNT
+             uint32_t val = r + base;
+             memcpy(out, &val,
+                    sizeof(uint16_t));
+             out++;
+             w ^= t;
+         }
+         base += 64;
+     }
+
+    return out - initout;
+}
+CROARING_UNTARGET_REGION
+#endif
+
 CROARING_TARGET_AVX2
 size_t bitset_extract_setbits_avx2(const uint64_t *words, size_t length,
                                    uint32_t *out, size_t outcapacity,
diff --git a/src/containers/array.c b/src/containers/array.c
index cb2957265..8e3c053f2 100644
--- a/src/containers/array.c
+++ b/src/containers/array.c
@@ -381,7 +381,7 @@ void array_container_intersection_inplace(array_container_t *src_1,
 #else
         src_1->cardinality = intersect_uint16(
                         src_1->array, card_1, src_2->array, card_2, src_1->array);
-#endif       
+#endif
     }
 }
 
diff --git a/src/containers/bitset.c b/src/containers/bitset.c
index 4eb21dd70..59874aeaa 100644
--- a/src/containers/bitset.c
+++ b/src/containers/bitset.c
@@ -53,9 +53,20 @@ bitset_container_t *bitset_container_create(void) {
     if (!bitset) {
         return NULL;
     }
-    // sizeof(__m256i) == 32
+
+    size_t align_size = 32;
+#ifdef CROARING_IS_X64
+    if ( croaring_avx512() ) {
+	    // sizeof(__m512i) == 64
+	    align_size = 64;
+    }
+    else {
+      // sizeof(__m256i) == 32
+	    align_size = 32;
+    }
+#endif
     bitset->words = (uint64_t *)roaring_aligned_malloc(
-        32, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
+        align_size, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
     if (!bitset->words) {
         roaring_free(bitset);
         return NULL;
@@ -117,9 +128,20 @@ bitset_container_t *bitset_container_clone(const bitset_container_t *src) {
     if (!bitset) {
         return NULL;
     }
-    // sizeof(__m256i) == 32
+
+    size_t align_size = 32;
+#ifdef CROARING_IS_X64
+    if ( croaring_avx512() ) {
+	    // sizeof(__m512i) == 64
+	    align_size = 64;
+    }
+    else {
+      // sizeof(__m256i) == 32
+	    align_size = 32;
+    }
+#endif
     bitset->words = (uint64_t *)roaring_aligned_malloc(
-        32, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
+        align_size, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
     if (!bitset->words) {
         roaring_free(bitset);
         return NULL;
@@ -218,6 +240,9 @@ bool bitset_container_intersect(const bitset_container_t *src_1,
 #ifndef WORDS_IN_AVX2_REG
 #define WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t)
 #endif
+#ifndef WORDS_IN_AVX512_REG
+#define WORDS_IN_AVX512_REG sizeof(__m512i) / sizeof(uint64_t)
+#endif
 /* Get the number of bits set (force computation) */
 static inline int _scalar_bitset_container_compute_cardinality(const bitset_container_t *bitset) {
   const uint64_t *words = bitset->words;
@@ -232,6 +257,13 @@ static inline int _scalar_bitset_container_compute_cardinality(const bitset_cont
 }
 /* Get the number of bits set (force computation) */
 int bitset_container_compute_cardinality(const bitset_container_t *bitset) {
+#if CROARING_COMPILER_SUPPORTS_AVX512
+    if( croaring_avx512() ) {
+      return (int) avx512_vpopcount(
+        (const __m512i *)bitset->words,
+        BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX512_REG));
+    } else
+#endif // CROARING_COMPILER_SUPPORTS_AVX512
     if( croaring_avx2() ) {
       return (int) avx2_harley_seal_popcount256(
         (const __m256i *)bitset->words,
@@ -286,6 +318,167 @@ int bitset_container_compute_cardinality(const bitset_container_t *bitset) {
 #ifdef CROARING_IS_X64
 
 #define BITSET_CONTAINER_FN_REPEAT 8
+#ifndef WORDS_IN_AVX512_REG
+#define WORDS_IN_AVX512_REG sizeof(__m512i) / sizeof(uint64_t)
+#endif // WORDS_IN_AVX512_REG
+/*#define LOOP_SIZE                    \
+    BITSET_CONTAINER_SIZE_IN_WORDS / \
+        ((WORDS_IN_AVX512_REG)*BITSET_CONTAINER_FN_REPEAT)
+*/
+/* Computes a binary operation (eg union) on bitset1 and bitset2 and write the
+   result to bitsetout */
+// clang-format off
+#define AVX512_BITSET_CONTAINER_FN1(before, opname, opsymbol, avx_intrinsic,   \
+                                neon_intrinsic, after)                         \
+  static inline int _avx512_bitset_container_##opname##_nocard(                \
+      const bitset_container_t *src_1, const bitset_container_t *src_2,        \
+      bitset_container_t *dst) {                                               \
+        const uint8_t * __restrict__ words_1 = (const uint8_t *)src_1->words;  \
+    const uint8_t * __restrict__ words_2 = (const uint8_t *)src_2->words;      \
+    /* not using the blocking optimization for some reason*/                   \
+    uint8_t *out = (uint8_t*)dst->words;                                       \
+    const int innerloop = 8;                                                   \
+    for (size_t i = 0;                                                         \
+        i < BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX512_REG);            \
+                                                         i+=innerloop) {       \
+        __m512i A1, A2, AO;                                                    \
+        A1 = _mm512_loadu_si512((const __m512i *)(words_1));                   \
+        A2 = _mm512_loadu_si512((const __m512i *)(words_2));                   \
+        AO = avx_intrinsic(A2, A1);                                            \
+        _mm512_storeu_si512((__m512i *)out, AO);                               \
+        A1 = _mm512_loadu_si512((const __m512i *)(words_1 + 64));              \
+        A2 = _mm512_loadu_si512((const __m512i *)(words_2 + 64));              \
+        AO = avx_intrinsic(A2, A1);                                            \
+        _mm512_storeu_si512((__m512i *)(out+64), AO);                          \
+        A1 = _mm512_loadu_si512((const __m512i *)(words_1 + 128));             \
+        A2 = _mm512_loadu_si512((const __m512i *)(words_2 + 128));             \
+        AO = avx_intrinsic(A2, A1);                                            \
+        _mm512_storeu_si512((__m512i *)(out+128), AO);                         \
+        A1 = _mm512_loadu_si512((const __m512i *)(words_1 + 192));             \
+        A2 = _mm512_loadu_si512((const __m512i *)(words_2 + 192));             \
+        AO = avx_intrinsic(A2, A1);                                            \
+        _mm512_storeu_si512((__m512i *)(out+192), AO);                         \
+        A1 = _mm512_loadu_si512((const __m512i *)(words_1 + 256));             \
+        A2 = _mm512_loadu_si512((const __m512i *)(words_2 + 256));             \
+        AO = avx_intrinsic(A2, A1);                                            \
+        _mm512_storeu_si512((__m512i *)(out+256), AO);                         \
+        A1 = _mm512_loadu_si512((const __m512i *)(words_1 + 320));             \
+        A2 = _mm512_loadu_si512((const __m512i *)(words_2 + 320));             \
+        AO = avx_intrinsic(A2, A1);                                            \
+        _mm512_storeu_si512((__m512i *)(out+320), AO);                         \
+        A1 = _mm512_loadu_si512((const __m512i *)(words_1 + 384));             \
+        A2 = _mm512_loadu_si512((const __m512i *)(words_2 + 384));             \
+        AO = avx_intrinsic(A2, A1);                                            \
+        _mm512_storeu_si512((__m512i *)(out+384), AO);                         \
+        A1 = _mm512_loadu_si512((const __m512i *)(words_1 + 448));             \
+        A2 = _mm512_loadu_si512((const __m512i *)(words_2 + 448));             \
+        AO = avx_intrinsic(A2, A1);                                     \
+        _mm512_storeu_si512((__m512i *)(out+448), AO);                  \
+        out+=512;                                                       \
+        words_1 += 512;                                                 \
+        words_2 += 512;                                                 \
+    }                                                                   \
+    dst->cardinality = BITSET_UNKNOWN_CARDINALITY;                      \
+    return dst->cardinality;                                            \
+  }
+
+#define AVX512_BITSET_CONTAINER_FN2(before, opname, opsymbol, avx_intrinsic,           \
+                                neon_intrinsic, after)                                 \
+  /* next, a version that updates cardinality*/                                        \
+  static inline int _avx512_bitset_container_##opname(const bitset_container_t *src_1, \
+                                      const bitset_container_t *src_2,                 \
+                                      bitset_container_t *dst) {                       \
+    const __m512i * __restrict__ words_1 = (const __m512i *) src_1->words;             \
+    const __m512i * __restrict__ words_2 = (const __m512i *) src_2->words;             \
+    __m512i *out = (__m512i *) dst->words;                                             \
+    dst->cardinality = (int32_t)avx512_harley_seal_popcount512andstore_##opname(words_2,\
+				words_1, out,BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX512_REG));           \
+    return dst->cardinality;                                                            \
+  }
+
+#define AVX512_BITSET_CONTAINER_FN3(before, opname, opsymbol, avx_intrinsic,            \
+                                neon_intrinsic, after)                                  \
+  /* next, a version that just computes the cardinality*/                               \
+  static inline int _avx512_bitset_container_##opname##_justcard(                       \
+      const bitset_container_t *src_1, const bitset_container_t *src_2) {               \
+    const __m512i * __restrict__ data1 = (const __m512i *) src_1->words;                \
+    const __m512i * __restrict__ data2 = (const __m512i *) src_2->words;                \
+    return (int)avx512_harley_seal_popcount512_##opname(data2,                          \
+				data1, BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX512_REG));                 \
+  }
+
+
+// we duplicate the function because other containers use the "or" term, makes API more consistent
+#if CROARING_COMPILER_SUPPORTS_AVX512
+CROARING_TARGET_AVX512
+AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, or,    |, _mm512_or_si512, vorrq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+CROARING_TARGET_AVX512
+AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, union, |, _mm512_or_si512, vorrq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+
+// we duplicate the function because other containers use the "intersection" term, makes API more consistent
+CROARING_TARGET_AVX512
+AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, and,          &, _mm512_or_si512, vandq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+CROARING_TARGET_AVX512
+AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, intersection, &, _mm512_or_si512, vandq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+
+CROARING_TARGET_AVX512
+AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, xor,    ^,  _mm512_or_si512,    veorq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+CROARING_TARGET_AVX512
+AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, andnot, &~, _mm512_or_si512, vbicq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+
+// we duplicate the function because other containers use the "or" term, makes API more consistent
+CROARING_TARGET_AVX512
+AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, or,    |, _mm512_or_si512, vorrq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+CROARING_TARGET_AVX512
+AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, union, |, _mm512_or_si512, vorrq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+
+// we duplicate the function because other containers use the "intersection" term, makes API more consistent
+CROARING_TARGET_AVX512
+AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, and,          &, _mm512_or_si512, vandq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+CROARING_TARGET_AVX512
+AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, intersection, &, _mm512_or_si512, vandq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+
+CROARING_TARGET_AVX512
+AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, xor,    ^,  _mm512_or_si512,    veorq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+CROARING_TARGET_AVX512
+AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, andnot, &~, _mm512_or_si512, vbicq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+
+// we duplicate the function because other containers use the "or" term, makes API more consistent
+CROARING_TARGET_AVX512
+AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, or,    |, _mm512_or_si512, vorrq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+CROARING_TARGET_AVX512
+AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, union, |, _mm512_or_si512, vorrq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+
+// we duplicate the function because other containers use the "intersection" term, makes API more consistent
+CROARING_TARGET_AVX512
+AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, and,          &, _mm512_or_si512, vandq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+CROARING_TARGET_AVX512
+AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, intersection, &, _mm512_or_si512, vandq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+
+CROARING_TARGET_AVX512
+AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, xor,    ^,  _mm512_or_si512,    veorq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+CROARING_TARGET_AVX512
+AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, andnot, &~, _mm512_or_si512, vbicq_u64, CROARING_UNTARGET_REGION)
+CROARING_UNTARGET_REGION
+#endif // CROARING_COMPILER_SUPPORTS_AVX512
+
 #ifndef WORDS_IN_AVX2_REG
 #define WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t)
 #endif // WORDS_IN_AVX2_REG
@@ -504,12 +697,16 @@ SCALAR_BITSET_CONTAINER_FN(intersection, &, _mm256_and_si256, vandq_u64)
 SCALAR_BITSET_CONTAINER_FN(xor,    ^,  _mm256_xor_si256,    veorq_u64)
 SCALAR_BITSET_CONTAINER_FN(andnot, &~, _mm256_andnot_si256, vbicq_u64)
 
+#if CROARING_COMPILER_SUPPORTS_AVX512
 
 #define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic)   \
   int bitset_container_##opname(const bitset_container_t *src_1,               \
                                 const bitset_container_t *src_2,               \
                                 bitset_container_t *dst) {                     \
-    if ( croaring_avx2() ) {                                                       \
+    if ( croaring_avx512() ) {                                                 \
+      return _avx512_bitset_container_##opname(src_1, src_2, dst);             \
+    }                                                                          \
+    else if ( croaring_avx2() ) {                                              \
       return _avx2_bitset_container_##opname(src_1, src_2, dst);               \
     } else {                                                                   \
       return _scalar_bitset_container_##opname(src_1, src_2, dst);             \
@@ -518,7 +715,10 @@ SCALAR_BITSET_CONTAINER_FN(andnot, &~, _mm256_andnot_si256, vbicq_u64)
   int bitset_container_##opname##_nocard(const bitset_container_t *src_1,      \
                                          const bitset_container_t *src_2,      \
                                          bitset_container_t *dst) {            \
-    if ( croaring_avx2() ) {                                                       \
+    if ( croaring_avx512() ) {                                                 \
+      return _avx512_bitset_container_##opname##_nocard(src_1, src_2, dst);    \
+    }                                                                          \
+    else if ( croaring_avx2() ) {                                              \
       return _avx2_bitset_container_##opname##_nocard(src_1, src_2, dst);      \
     } else {                                                                   \
       return _scalar_bitset_container_##opname##_nocard(src_1, src_2, dst);    \
@@ -526,7 +726,10 @@ SCALAR_BITSET_CONTAINER_FN(andnot, &~, _mm256_andnot_si256, vbicq_u64)
   }                                                                            \
   int bitset_container_##opname##_justcard(const bitset_container_t *src_1,    \
                                            const bitset_container_t *src_2) {  \
-    if ((croaring_detect_supported_architectures() & CROARING_AVX2) ==         \
+    if ( croaring_avx512() ) {                                                 \
+      return _avx512_bitset_container_##opname##_justcard(src_1, src_2);       \
+    }                                                                          \
+    else if ((croaring_detect_supported_architectures() & CROARING_AVX2) ==    \
         CROARING_AVX2) {                                                       \
       return _avx2_bitset_container_##opname##_justcard(src_1, src_2);         \
     } else {                                                                   \
@@ -534,7 +737,38 @@ SCALAR_BITSET_CONTAINER_FN(andnot, &~, _mm256_andnot_si256, vbicq_u64)
     }                                                                          \
   }
 
+#else // CROARING_COMPILER_SUPPORTS_AVX512
+
+
+#define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic)   \
+  int bitset_container_##opname(const bitset_container_t *src_1,               \
+                                const bitset_container_t *src_2,               \
+                                bitset_container_t *dst) {                     \
+    if ( croaring_avx2() ) {                                                   \
+      return _avx2_bitset_container_##opname(src_1, src_2, dst);               \
+    } else {                                                                   \
+      return _scalar_bitset_container_##opname(src_1, src_2, dst);             \
+    }                                                                          \
+  }                                                                            \
+  int bitset_container_##opname##_nocard(const bitset_container_t *src_1,      \
+                                         const bitset_container_t *src_2,      \
+                                         bitset_container_t *dst) {            \
+    if ( croaring_avx2() ) {                                                   \
+      return _avx2_bitset_container_##opname##_nocard(src_1, src_2, dst);      \
+    } else {                                                                   \
+      return _scalar_bitset_container_##opname##_nocard(src_1, src_2, dst);    \
+    }                                                                          \
+  }                                                                            \
+  int bitset_container_##opname##_justcard(const bitset_container_t *src_1,    \
+                                           const bitset_container_t *src_2) {  \
+    if ( croaring_avx2() ) {                                                   \
+      return _avx2_bitset_container_##opname##_justcard(src_1, src_2);         \
+    } else {                                                                   \
+      return _scalar_bitset_container_##opname##_justcard(src_1, src_2);       \
+    }                                                                          \
+  }
 
+#endif //  CROARING_COMPILER_SUPPORTS_AVX512
 
 #elif defined(USENEON)
 
@@ -693,7 +927,13 @@ int bitset_container_to_uint32_array(
     uint32_t base
 ){
 #ifdef CROARING_IS_X64
-    if(( croaring_avx2() ) &&  (bc->cardinality >= 8192))  // heuristic
+#if CROARING_COMPILER_SUPPORTS_AVX512
+   if(( croaring_avx512() ) &&  (bc->cardinality >= 8192))  // heuristic
+		return (int) bitset_extract_setbits_avx512(bc->words,
+                BITSET_CONTAINER_SIZE_IN_WORDS, out, bc->cardinality, base);
+   else
+#endif
+   if(( croaring_avx2() ) &&  (bc->cardinality >= 8192))  // heuristic
 		return (int) bitset_extract_setbits_avx2(bc->words,
                 BITSET_CONTAINER_SIZE_IN_WORDS, out, bc->cardinality, base);
 	else
@@ -816,6 +1056,24 @@ bool bitset_container_iterate64(const bitset_container_t *cont, uint32_t base, r
 }
 
 #ifdef CROARING_IS_X64
+#if CROARING_COMPILER_SUPPORTS_AVX512
+CROARING_TARGET_AVX512
+ALLOW_UNALIGNED
+static inline bool _avx512_bitset_container_equals(const bitset_container_t *container1, const bitset_container_t *container2) {
+  const __m512i *ptr1 = (const __m512i*)container1->words;
+  const __m512i *ptr2 = (const __m512i*)container2->words;
+  for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS*sizeof(uint64_t)/64; i++) {
+      __m512i r1 = _mm512_loadu_si512(ptr1+i);
+      __m512i r2 = _mm512_loadu_si512(ptr2+i);
+      __mmask64 mask = _mm512_cmpeq_epi8_mask(r1, r2);
+      if ((uint64_t)mask != UINT64_MAX) {
+          return false;
+      }
+  }
+	return true;
+}
+CROARING_UNTARGET_REGION
+#endif // CROARING_COMPILER_SUPPORTS_AVX512
 CROARING_TARGET_AVX2
 ALLOW_UNALIGNED
 static inline bool _avx2_bitset_container_equals(const bitset_container_t *container1, const bitset_container_t *container2) {
@@ -845,6 +1103,12 @@ bool bitset_container_equals(const bitset_container_t *container1, const bitset_
     }
   }
 #ifdef CROARING_IS_X64
+#if CROARING_COMPILER_SUPPORTS_AVX512
+  if( croaring_avx512() ) {
+    return _avx512_bitset_container_equals(container1, container2);
+  }
+  else
+#endif
   if( croaring_avx2() ) {
     return _avx2_bitset_container_equals(container1, container2);
   }
diff --git a/src/containers/convert.c b/src/containers/convert.c
index 300a1c0a8..4c9db7238 100644
--- a/src/containers/convert.c
+++ b/src/containers/convert.c
@@ -48,11 +48,27 @@ array_container_t *array_container_from_bitset(const bitset_container_t *bits) {
     array_container_t *result =
         array_container_create_given_capacity(bits->cardinality);
     result->cardinality = bits->cardinality;
-    //  sse version ends up being slower here
-    // (bitset_extract_setbits_sse_uint16)
-    // because of the sparsity of the data
-    bitset_extract_setbits_uint16(bits->words, BITSET_CONTAINER_SIZE_IN_WORDS,
+#if CROARING_IS_X64
+#if CROARING_COMPILER_SUPPORTS_AVX512
+    if( croaring_avx512() ) {
+        bitset_extract_setbits_avx512_uint16(bits->words, BITSET_CONTAINER_SIZE_IN_WORDS,
+                                  result->array, bits->cardinality , 0);
+    } else
+#endif
+    {
+        //  sse version ends up being slower here
+        // (bitset_extract_setbits_sse_uint16)
+        // because of the sparsity of the data
+        bitset_extract_setbits_uint16(bits->words, BITSET_CONTAINER_SIZE_IN_WORDS,
+                                  result->array, 0);
+    }
+#else
+        // If the system is not x64, then we have no accelerated function.
+        bitset_extract_setbits_uint16(bits->words, BITSET_CONTAINER_SIZE_IN_WORDS,
                                   result->array, 0);
+#endif
+
+
     return result;
 }
 
diff --git a/src/containers/run.c b/src/containers/run.c
index a7e4ab0b7..82e385542 100644
--- a/src/containers/run.c
+++ b/src/containers/run.c
@@ -831,7 +831,49 @@ int run_container_rank(const run_container_t *container, uint16_t x) {
     return sum;
 }
 
-#ifdef CROARING_IS_X64
+#if defined(CROARING_IS_X64) && CROARING_COMPILER_SUPPORTS_AVX512
+
+CROARING_TARGET_AVX512
+ALLOW_UNALIGNED
+/* Get the cardinality of `run'. Requires an actual computation. */
+static inline int _avx512_run_container_cardinality(const run_container_t *run) {
+    const int32_t n_runs = run->n_runs;
+    const rle16_t *runs = run->runs;
+
+    /* by initializing with n_runs, we omit counting the +1 for each pair. */
+    int sum = n_runs;
+    int32_t k = 0;
+    const int32_t step = sizeof(__m512i) / sizeof(rle16_t);
+    if (n_runs > step) {
+        __m512i total = _mm512_setzero_si512();
+        for (; k + step <= n_runs; k += step) {
+            __m512i ymm1 = _mm512_loadu_si512((const __m512i *)(runs + k));
+            __m512i justlengths = _mm512_srli_epi32(ymm1, 16);
+            total = _mm512_add_epi32(total, justlengths);
+        }
+
+        __m256i lo = _mm512_extracti32x8_epi32(total, 0);
+        __m256i hi = _mm512_extracti32x8_epi32(total, 1);
+
+        // a store might be faster than extract?
+        uint32_t buffer[sizeof(__m256i) / sizeof(rle16_t)];
+        _mm256_storeu_si256((__m256i *)buffer, lo);
+        sum += (buffer[0] + buffer[1]) + (buffer[2] + buffer[3]) +
+               (buffer[4] + buffer[5]) + (buffer[6] + buffer[7]);
+
+        _mm256_storeu_si256((__m256i *)buffer, hi);
+        sum += (buffer[0] + buffer[1]) + (buffer[2] + buffer[3]) +
+               (buffer[4] + buffer[5]) + (buffer[6] + buffer[7]);
+
+    }
+    for (; k < n_runs; ++k) {
+        sum += runs[k].length;
+    }
+
+    return sum;
+}
+
+CROARING_UNTARGET_REGION
 
 CROARING_TARGET_AVX2
 ALLOW_UNALIGNED
@@ -881,6 +923,12 @@ static inline int _scalar_run_container_cardinality(const run_container_t *run)
 }
 
 int run_container_cardinality(const run_container_t *run) {
+#if CROARING_COMPILER_SUPPORTS_AVX512
+  if( croaring_avx512() ) {
+    return _avx512_run_container_cardinality(run);
+  }
+  else
+#endif
   if( croaring_avx2() ) {
     return _avx2_run_container_cardinality(run);
   } else {
diff --git a/tools/cmake/FindOptions.cmake b/tools/cmake/FindOptions.cmake
index 49f797664..0ee4e3752 100644
--- a/tools/cmake/FindOptions.cmake
+++ b/tools/cmake/FindOptions.cmake
@@ -36,6 +36,14 @@ else()
 endif()
 endif()
 
+if(FORCE_AVX512) # some compilers like clang do not automagically define __AVX512__ even when the hardware supports it
+if(NOT MSVC)
+   set (OPT_FLAGS "${OPT_FLAGS} -mbmi2 -mavx512f -mavx512bw -mavx512dq -mavx512vbmi2 -mavx512bitalg -mavx512vpopcntdq")
+else()
+   set (OPT_FLAGS "${OPT_FLAGS} /arch:AVX512")
+endif()
+endif()
+
 if(NOT MSVC)
 set(STD_FLAGS "-std=c11 -fPIC")
 set(CXXSTD_FLAGS "-std=c++11 -fPIC")

From 13efaf0b28e885655aa4518d83c1b57911e24ec8 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Sun, 26 Mar 2023 07:25:03 -0400
Subject: [PATCH 096/162] feat: Conversion to conventional bitset (#448)

* Conversion to conventional bitset implemented.

* Minor cleaning.

* Minor fixes

* Adding bitset to amalgamation.

* Various fixes

* Adding C++ guards.

* Various fixes

* Make free functions safe for NULL ptr

* Various fixes

* Correcting typo in previous commit (duplicated line)

* Corrected new typo

* We renamed "hamming" to "roaring_hamming"

* We are adopting roaring_trailing_zeroes

* Let us modernize our Visual Studio 16 2019 builds.

* Switching to windows-2019

* Various fixes... to the AVX-512 kernel, which is unexpected.
---
 .github/workflows/vs16-arm-ci.yml       |  16 +-
 .github/workflows/vs16-ci.yml           |  42 ++-
 README.md                               |  71 ++++
 amalgamation.sh                         |  21 +-
 include/roaring/bitset/bitset.h         | 283 +++++++++++++++
 include/roaring/bitset_util.h           |   8 +-
 include/roaring/containers/containers.h | 111 +++---
 include/roaring/portability.h           |  57 ++-
 include/roaring/roaring.h               |  18 +
 src/CMakeLists.txt                      |   1 +
 src/bitset.c                            | 442 ++++++++++++++++++++++++
 src/bitset_util.c                       |  22 +-
 src/containers/bitset.c                 |  90 ++---
 src/containers/containers.c             |   8 +-
 src/containers/convert.c                |   6 +-
 src/containers/mixed_equal.c            |   2 +-
 src/containers/mixed_subset.c           |   2 +-
 src/roaring.c                           |  68 +++-
 tests/CMakeLists.txt                    |   1 +
 tests/bitset_container_unit.c           |  71 +++-
 tests/cbitset_unit.c                    | 277 +++++++++++++++
 tests/container_comparison_unit.c       |   4 +-
 tests/toplevel_unit.c                   |  26 ++
 23 files changed, 1444 insertions(+), 203 deletions(-)
 create mode 100644 include/roaring/bitset/bitset.h
 create mode 100644 src/bitset.c
 create mode 100644 tests/cbitset_unit.c

diff --git a/.github/workflows/vs16-arm-ci.yml b/.github/workflows/vs16-arm-ci.yml
index 7af16c77e..95c783242 100644
--- a/.github/workflows/vs16-arm-ci.yml
+++ b/.github/workflows/vs16-arm-ci.yml
@@ -8,17 +8,21 @@ permissions:
 jobs:
   ci:
     name: windows-vs16
-    runs-on: windows-latest
+    runs-on: windows-2019
     strategy:
       fail-fast: false
       matrix:
         include:
-          - {arch: ARM}
-          - {arch: ARM64}
+          - {gen: Visual Studio 16 2019, arch: ARM}
+          - {gen: Visual Studio 16 2019, arch: ARM64}
     steps:
       - name: checkout
         uses: actions/checkout@v2
-      - name: Use cmake
+      - name: Configure
         run: |
-          cmake -A ${{ matrix.arch }} -DCMAKE_CROSSCOMPILING=1 -B build  &&
-          cmake --build build --verbose
+          mkdir build
+          cd build && cmake -G "${{matrix.gen}}" -A ${{matrix.arch}}  ..
+      - name: Build
+        run: cmake --build build --config Release
+      - name: Build Debug
+        run: cmake --build build --config Debug
\ No newline at end of file
diff --git a/.github/workflows/vs16-ci.yml b/.github/workflows/vs16-ci.yml
index 3d52fc298..dfba9833f 100644
--- a/.github/workflows/vs16-ci.yml
+++ b/.github/workflows/vs16-ci.yml
@@ -8,21 +8,29 @@ permissions:
 jobs:
   ci:
     name: windows-vs16
-    runs-on: windows-latest
+    runs-on: windows-2019
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - {gen: Visual Studio 16 2019, arch: Win32}
+          - {gen: Visual Studio 16 2019, arch: x64}
     steps:
-    - uses: actions/checkout@v2
-    - name: 'Run CMake with VS16'
-      uses: lukka/run-cmake@v2
-      with:
-        cmakeListsOrSettingsJson: CMakeListsTxtAdvanced
-        cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt'
-        buildDirectory: "${{ github.workspace }}/../../_temp/windows"
-        cmakeBuildType: Release
-        buildWithCMake: true
-        cmakeGenerator: VS16Win64
-        cmakeAppendedArgs: -DROARING_BUILD_STATIC=ON
-        buildWithCMakeArgs: --config Release
-
-    - name: 'Run CTest'
-      run: ctest  --verbose
-      working-directory: "${{ github.workspace }}/../../_temp/windows"
+      - name: checkout
+        uses: actions/checkout@v2
+      - name: Configure
+        run: |
+          mkdir build
+          cd build && cmake -G "${{matrix.gen}}" -A ${{matrix.arch}}  ..
+      - name: Build
+        run: cmake --build build --config Release
+      - name: Run basic tests
+        run: |
+          cd build
+          ctest -C Release --output-on-failure
+      - name: Build Debug
+        run: cmake --build build --config Debug
+      - name: Run basic tests in Debug
+        run: |
+          cd build
+          ctest -C Debug --output-on-failure
\ No newline at end of file
diff --git a/README.md b/README.md
index d674e8f0e..cdf19907f 100644
--- a/README.md
+++ b/README.md
@@ -349,6 +349,77 @@ int main() {
 }
 ```
 
+# Conventional bitsets (C)
+
+We support convention bitsets (uncompressed) as part of the library.
+
+Simple example:
+
+```C
+bitset_t * b = bitset_create();
+bitset_set(b,10);
+bitset_get(b,10);// returns true
+bitset_free(b); // frees memory
+```
+
+More advanced example:
+
+```C
+    bitset_t *b = bitset_create();
+    for (int k = 0; k < 1000; ++k) {
+        bitset_set(b, 3 * k);
+    }
+    // We have bitset_count(b) == 1000.
+    // We have bitset_get(b, 3) is true
+    // You can iterate through the values:
+    size_t k = 0;
+    for (size_t i = 0; bitset_next_set_bit(b, &i); i++) {
+        // You will have i == k
+        k += 3;
+    }
+    // We support a wide range of operations on two bitsets such as
+    // bitset_inplace_symmetric_difference(b1,b2);
+    // bitset_inplace_symmetric_difference(b1,b2);
+    // bitset_inplace_difference(b1,b2);// should make no difference
+    // bitset_inplace_union(b1,b2);
+    // bitset_inplace_intersection(b1,b2);
+    // bitsets_disjoint
+    // bitsets_intersect
+```
+
+In some instances, you may want to convert a Roaring bitmap into a conventional (uncompressed) bitset.
+Indeed, bitsets have advantages such as higher query performances in some cases. The following code
+illustrates how you may do so:
+
+```C
+    roaring_bitmap_t *r1 = roaring_bitmap_create();
+    for (uint32_t i = 100; i < 100000; i+= 1 + (i%5)) {
+     roaring_bitmap_add(r1, i);
+    }
+    for (uint32_t i = 100000; i < 500000; i+= 100) {
+     roaring_bitmap_add(r1, i);
+    }
+    roaring_bitmap_add_range(r1, 500000, 600000);
+    bitset_t * bitset = bitset_create();
+    bool success = roaring_bitmap_to_bitset(r1, bitset);
+    assert(success); // could fail due to memory allocation.
+    assert(bitset_count(bitset) == roaring_bitmap_get_cardinality(r1));
+    // You can then query the bitset:
+    for (uint32_t i = 100; i < 100000; i+= 1 + (i%5)) {
+        assert(bitset_get(bitset,i));
+    }
+    for (uint32_t i = 100000; i < 500000; i+= 100) {
+        assert(bitset_get(bitset,i));
+    }
+    // you must free the memory:
+    bitset_free(bitset);
+    roaring_bitmap_free(r1);
+```
+
+You should be aware that a convention bitset (`bitset_t *`) may use much more
+memory than a Roaring bitmap in some cases. You should run benchmarks to determine
+whether the conversion to a bitset has performance benefits in your case.
+
 # Example (C++)
 
 
diff --git a/amalgamation.sh b/amalgamation.sh
index 8cbfba71b..161582e7a 100755
--- a/amalgamation.sh
+++ b/amalgamation.sh
@@ -31,6 +31,8 @@ DEMOCPP="amalgamation_demo.cpp"
 ALL_PUBLIC_H="
 $SCRIPTPATH/include/roaring/roaring_version.h
 $SCRIPTPATH/include/roaring/roaring_types.h
+$SCRIPTPATH/include/roaring/portability.h
+$SCRIPTPATH/include/roaring/bitset/bitset.h
 $SCRIPTPATH/include/roaring/roaring.h
 $SCRIPTPATH/include/roaring/memory.h
 "
@@ -47,7 +49,6 @@ $SCRIPTPATH/cpp/roaring64map.hh
 # need to be in this order.
 #
 ALL_PRIVATE_H="
-$SCRIPTPATH/include/roaring/portability.h
 $SCRIPTPATH/include/roaring/isadetection.h
 $SCRIPTPATH/include/roaring/containers/perfparameters.h
 $SCRIPTPATH/include/roaring/containers/container_defs.h
@@ -165,13 +166,21 @@ echo "Creating ${DEMOC}..."
 
     cat <<< '
 #include <stdio.h>
+#include <stdlib.h>
 #include "roaring.c"
 int main() {
   roaring_bitmap_t *r1 = roaring_bitmap_create();
   for (uint32_t i = 100; i < 1000; i++) roaring_bitmap_add(r1, i);
   printf("cardinality = %d\n", (int) roaring_bitmap_get_cardinality(r1));
   roaring_bitmap_free(r1);
-  return 0;
+
+  bitset_t *b = bitset_create();
+  for (int k = 0; k < 1000; ++k) {
+        bitset_set(b, 3 * k);
+  }
+  printf("%zu \n", bitset_count(b));
+  bitset_free(b);
+  return EXIT_SUCCESS;
 }
 '
 } > "${DEMOC}"
@@ -241,10 +250,10 @@ CPPBIN=${DEMOCPP%%.*}
 echo "The interface is found in the file 'include/roaring/roaring.h'."
 newline
 echo "For C, try:"
-echo "cc -march=native -O3 -std=c11  -o ${CBIN} ${DEMOC}  && ./${CBIN} "
+echo "cc -O3 -std=c11  -o ${CBIN} ${DEMOC}  && ./${CBIN} "
 newline
 echo "For C++, try:"
-echo "c++ -march=native -O3 -std=c++11 -o ${CPPBIN} ${DEMOCPP}  && ./${CPPBIN} "
+echo "c++ -O3 -std=c++11 -o ${CPPBIN} ${DEMOCPP}  && ./${CPPBIN} "
 
 lowercase(){
     echo "$1" | tr 'A-Z' 'a-z'
@@ -256,8 +265,8 @@ newline
 echo "You can build a shared library with the following command:"
 
 if [ $OS == "darwin" ]; then
-  echo "cc -march=native -O3 -std=c11 -shared -o libroaring.dylib -fPIC roaring.c"
+  echo "cc  -O3 -std=c11 -shared -o libroaring.dylib -fPIC roaring.c"
 else
-  echo "cc -march=native -O3 -std=c11 -shared -o libroaring.so -fPIC roaring.c"
+  echo "cc -O3 -std=c11 -shared -o libroaring.so -fPIC roaring.c"
 fi
 
diff --git a/include/roaring/bitset/bitset.h b/include/roaring/bitset/bitset.h
new file mode 100644
index 000000000..f9707b351
--- /dev/null
+++ b/include/roaring/bitset/bitset.h
@@ -0,0 +1,283 @@
+#ifndef CBITSET_BITSET_H
+#define CBITSET_BITSET_H
+
+// For compatibility with MSVC with the use of `restrict`
+#if (__STDC_VERSION__ >= 199901L) || \
+    (defined(__GNUC__) && defined(__STDC_VERSION__))
+#define CBITSET_RESTRICT restrict
+#else
+#define CBITSET_RESTRICT
+#endif  // (__STDC_VERSION__ >= 199901L) || (defined(__GNUC__) &&
+        // defined(__STDC_VERSION__ ))
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <roaring/portability.h>
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace api {
+#endif
+
+struct bitset_s {
+    uint64_t *CBITSET_RESTRICT array;
+    /* For simplicity and performance, we prefer to have a size and a capacity that is a multiple of 64 bits.
+     * Thus we only track the size and the capacity in terms of 64-bit words allocated */
+    size_t arraysize;
+    size_t capacity;
+};
+
+typedef struct bitset_s bitset_t;
+
+/* Create a new bitset. Return NULL in case of failure. */
+bitset_t *bitset_create(void);
+
+/* Create a new bitset able to contain size bits. Return NULL in case of
+ * failure. */
+bitset_t *bitset_create_with_capacity(size_t size);
+
+/* Free memory. */
+void bitset_free(bitset_t *bitset);
+
+/* Set all bits to zero. */
+void bitset_clear(bitset_t *bitset);
+
+/* Set all bits to one. */
+void bitset_fill(bitset_t *bitset);
+
+/* Create a copy */
+bitset_t *bitset_copy(const bitset_t *bitset);
+
+/* For advanced users: Resize the bitset so that it can support newarraysize * 64 bits.
+ * Return true in case of success, false for failure. Pad
+ * with zeroes new buffer areas if requested. */
+bool bitset_resize(bitset_t *bitset, size_t newarraysize, bool padwithzeroes);
+
+/* returns how many bytes of memory the backend buffer uses */
+static inline size_t bitset_size_in_bytes(const bitset_t *bitset) {
+    return bitset->arraysize * sizeof(uint64_t);
+}
+
+/* returns how many bits can be accessed */
+static inline size_t bitset_size_in_bits(const bitset_t *bitset) {
+    return bitset->arraysize * 64;
+}
+
+/* returns how many words (64-bit) of memory the backend buffer uses */
+static inline size_t bitset_size_in_words(const bitset_t *bitset) {
+    return bitset->arraysize;
+}
+
+/* For advanced users: Grow the bitset so that it can support newarraysize * 64 bits with padding.
+ * Return true in case of success, false for failure. */
+bool bitset_grow(bitset_t *bitset, size_t newarraysize);
+
+/* attempts to recover unused memory, return false in case of roaring_reallocation
+ * failure */
+bool bitset_trim(bitset_t *bitset);
+
+/* shifts all bits by 's' positions so that the bitset representing values
+ * 1,2,10 would represent values 1+s, 2+s, 10+s */
+void bitset_shift_left(bitset_t *bitset, size_t s);
+
+/* shifts all bits by 's' positions so that the bitset representing values
+ * 1,2,10 would represent values 1-s, 2-s, 10-s, negative values are deleted */
+void bitset_shift_right(bitset_t *bitset, size_t s);
+
+/* Set the ith bit. Attempts to resize the bitset if needed (may silently fail)
+ */
+static inline void bitset_set(bitset_t *bitset, size_t i) {
+    size_t shiftedi = i / 64;
+    if (shiftedi >= bitset->arraysize) {
+        if (!bitset_grow(bitset, shiftedi + 1)) {
+            return;
+        }
+    }
+    bitset->array[shiftedi] |= ((uint64_t)1) << (i % 64);
+}
+
+/* Set the ith bit to the specified value. Attempts to resize the bitset if
+ * needed (may silently fail) */
+static inline void bitset_set_to_value(bitset_t *bitset, size_t i, bool flag) {
+    size_t shiftedi = i / 64;
+    uint64_t mask = ((uint64_t)1) << (i % 64);
+    uint64_t dynmask = ((uint64_t)flag) << (i % 64);
+    if (shiftedi >= bitset->arraysize) {
+        if (!bitset_grow(bitset, shiftedi + 1)) {
+            return;
+        }
+    }
+    uint64_t w = bitset->array[shiftedi];
+    w &= ~mask;
+    w |= dynmask;
+    bitset->array[shiftedi] = w;
+}
+
+/* Get the value of the ith bit.  */
+static inline bool bitset_get(const bitset_t *bitset, size_t i) {
+    size_t shiftedi = i / 64;
+    if (shiftedi >= bitset->arraysize) {
+        return false;
+    }
+    return (bitset->array[shiftedi] & (((uint64_t)1) << (i % 64))) != 0;
+}
+
+/* Count number of bits set.  */
+size_t bitset_count(const bitset_t *bitset);
+
+/* Find the index of the first bit set. Or zero if the bitset is empty.  */
+size_t bitset_minimum(const bitset_t *bitset);
+
+/* Find the index of the last bit set. Or zero if the bitset is empty.  */
+size_t bitset_maximum(const bitset_t *bitset);
+
+/* compute the union in-place (to b1), returns true if successful, to generate a
+ * new bitset first call bitset_copy */
+bool bitset_inplace_union(bitset_t *CBITSET_RESTRICT b1,
+                          const bitset_t *CBITSET_RESTRICT b2);
+
+/* report the size of the union (without materializing it) */
+size_t bitset_union_count(const bitset_t *CBITSET_RESTRICT b1,
+                          const bitset_t *CBITSET_RESTRICT b2);
+
+/* compute the intersection in-place (to b1), to generate a new bitset first
+ * call bitset_copy */
+void bitset_inplace_intersection(bitset_t *CBITSET_RESTRICT b1,
+                                 const bitset_t *CBITSET_RESTRICT b2);
+
+/* report the size of the intersection (without materializing it) */
+size_t bitset_intersection_count(const bitset_t *CBITSET_RESTRICT b1,
+                                 const bitset_t *CBITSET_RESTRICT b2);
+
+/* returns true if the bitsets contain no common elements */
+bool bitsets_disjoint(const bitset_t *CBITSET_RESTRICT b1, const bitset_t *CBITSET_RESTRICT b2);
+
+/* returns true if the bitsets contain any common elements */
+bool bitsets_intersect(const bitset_t *CBITSET_RESTRICT b1, const bitset_t *CBITSET_RESTRICT b2);
+
+/* returns true if b1 contains all of the set bits of b2 */
+bool bitset_contains_all(const bitset_t *CBITSET_RESTRICT b1, const bitset_t *CBITSET_RESTRICT b2);
+
+/* compute the difference in-place (to b1), to generate a new bitset first call
+ * bitset_copy */
+void bitset_inplace_difference(bitset_t *CBITSET_RESTRICT b1,
+                               const bitset_t *CBITSET_RESTRICT b2);
+
+/* compute the size of the difference */
+size_t bitset_difference_count(const bitset_t *CBITSET_RESTRICT b1,
+                               const bitset_t *CBITSET_RESTRICT b2);
+
+/* compute the symmetric difference in-place (to b1), return true if successful,
+ * to generate a new bitset first call bitset_copy */
+bool bitset_inplace_symmetric_difference(bitset_t *CBITSET_RESTRICT b1,
+                                         const bitset_t *CBITSET_RESTRICT b2);
+
+/* compute the size of the symmetric difference  */
+size_t bitset_symmetric_difference_count(const bitset_t *CBITSET_RESTRICT b1,
+                                         const bitset_t *CBITSET_RESTRICT b2);
+
+/* iterate over the set bits
+ like so :
+  for(size_t i = 0; bitset_next_set_bit(b,&i) ; i++) {
+    //.....
+  }
+  */
+static inline bool bitset_next_set_bit(const bitset_t *bitset, size_t *i) {
+    size_t x = *i / 64;
+    if (x >= bitset->arraysize) {
+        return false;
+    }
+    uint64_t w = bitset->array[x];
+    w >>= (*i & 63);
+    if (w != 0) {
+        *i += roaring_trailing_zeroes(w);
+        return true;
+    }
+    x++;
+    while (x < bitset->arraysize) {
+        w = bitset->array[x];
+        if (w != 0) {
+            *i = x * 64 + roaring_trailing_zeroes(w);
+            return true;
+        }
+        x++;
+    }
+    return false;
+}
+
+/* iterate over the set bits
+ like so :
+   size_t buffer[256];
+   size_t howmany = 0;
+  for(size_t startfrom = 0; (howmany = bitset_next_set_bits(b,buffer,256, &startfrom)) >
+ 0 ; startfrom++) {
+    //.....
+  }
+  */
+static inline size_t bitset_next_set_bits(const bitset_t *bitset, size_t *buffer,
+                                 size_t capacity, size_t *startfrom) {
+    if (capacity == 0) return 0;  // sanity check
+    size_t x = *startfrom / 64;
+    if (x >= bitset->arraysize) {
+        return 0;  // nothing more to iterate over
+    }
+    uint64_t w = bitset->array[x];
+    w >>= (*startfrom & 63);
+    size_t howmany = 0;
+    size_t base = x << 6;
+    while (howmany < capacity) {
+        while (w != 0) {
+            uint64_t t = w & (~w + 1);
+            int r = roaring_trailing_zeroes(w);
+            buffer[howmany++] = r + base;
+            if (howmany == capacity) goto end;
+            w ^= t;
+        }
+        x += 1;
+        if (x == bitset->arraysize) {
+            break;
+        }
+        base += 64;
+        w = bitset->array[x];
+    }
+end:
+    if (howmany > 0) {
+        *startfrom = buffer[howmany - 1];
+    }
+    return howmany;
+}
+
+typedef bool (*bitset_iterator)(size_t value, void *param);
+
+// return true if uninterrupted
+static inline bool bitset_for_each(const bitset_t *b, bitset_iterator iterator,
+                                   void *ptr) {
+    size_t base = 0;
+    for (size_t i = 0; i < b->arraysize; ++i) {
+        uint64_t w = b->array[i];
+        while (w != 0) {
+            uint64_t t = w & (~w + 1);
+            int r = roaring_trailing_zeroes(w);
+            if (!iterator(r + base, ptr)) return false;
+            w ^= t;
+        }
+        base += 64;
+    }
+    return true;
+}
+
+static inline void bitset_print(const bitset_t *b) {
+    printf("{");
+    for (size_t i = 0; bitset_next_set_bit(b, &i); i++) {
+        printf("%zu, ", i);
+    }
+    printf("}");
+}
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring { namespace api {
+#endif
+
+#endif
diff --git a/include/roaring/bitset_util.h b/include/roaring/bitset_util.h
index 9da26f463..650f2566a 100644
--- a/include/roaring/bitset_util.h
+++ b/include/roaring/bitset_util.h
@@ -40,16 +40,16 @@ static inline int bitset_lenrange_cardinality(const uint64_t *words,
     uint32_t firstword = start / 64;
     uint32_t endword = (start + lenminusone) / 64;
     if (firstword == endword) {
-        return hamming(words[firstword] &
+        return roaring_hamming(words[firstword] &
                        ((~UINT64_C(0)) >> ((63 - lenminusone) % 64))
                            << (start % 64));
     }
-    int answer = hamming(words[firstword] & ((~UINT64_C(0)) << (start % 64)));
+    int answer = roaring_hamming(words[firstword] & ((~UINT64_C(0)) << (start % 64)));
     for (uint32_t i = firstword + 1; i < endword; i++) {
-        answer += hamming(words[i]);
+        answer += roaring_hamming(words[i]);
     }
     answer +=
-        hamming(words[endword] &
+        roaring_hamming(words[endword] &
                 (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64));
     return answer;
 }
diff --git a/include/roaring/containers/containers.h b/include/roaring/containers/containers.h
index 47001da0b..b1391b529 100644
--- a/include/roaring/containers/containers.h
+++ b/include/roaring/containers/containers.h
@@ -172,9 +172,10 @@ static inline bitset_container_t *container_to_bitset(
             return result;
         case SHARED_CONTAINER_TYPE:
             assert(false);
+            roaring_unreachable;
     }
     assert(false);
-    __builtin_unreachable();
+    roaring_unreachable;
     return 0;  // unreached
 }
 
@@ -194,7 +195,7 @@ static inline bitset_container_t *container_to_bitset(
             return container_names[3];
         default:
             assert(false);
-            __builtin_unreachable();
+            roaring_unreachable;
             return "unknown";
     }
 }*/
@@ -219,16 +220,16 @@ static inline const char *get_full_container_name(
                     return shared_container_names[2];
                 default:
                     assert(false);
-                    __builtin_unreachable();
+                    roaring_unreachable;
                     return "unknown";
             }
             break;
         default:
             assert(false);
-            __builtin_unreachable();
+            roaring_unreachable;
             return "unknown";
     }
-    __builtin_unreachable();
+    roaring_unreachable;
     return NULL;
 }
 
@@ -248,7 +249,7 @@ static inline int container_get_cardinality(
             return run_container_cardinality(const_CAST_run(c));
     }
     assert(false);
-    __builtin_unreachable();
+    roaring_unreachable;
     return 0;  // unreached
 }
 
@@ -270,7 +271,7 @@ static inline bool container_is_full(const container_t *c, uint8_t typecode) {
             return run_container_is_full(const_CAST_run(c));
     }
     assert(false);
-    __builtin_unreachable();
+    roaring_unreachable;
     return 0;  // unreached
 }
 
@@ -287,7 +288,7 @@ static inline int container_shrink_to_fit(
             return run_container_shrink_to_fit(CAST_run(c));
     }
     assert(false);
-    __builtin_unreachable();
+    roaring_unreachable;
     return 0;  // unreached
 }
 
@@ -371,7 +372,7 @@ static inline container_t *container_repair_after_lazy(
             assert(false);
     }
     assert(false);
-    __builtin_unreachable();
+    roaring_unreachable;
     return 0;  // unreached
 }
 
@@ -397,7 +398,7 @@ static inline int32_t container_write(
             return run_container_write(const_CAST_run(c), buf);
     }
     assert(false);
-    __builtin_unreachable();
+    roaring_unreachable;
     return 0;  // unreached
 }
 
@@ -419,7 +420,7 @@ static inline int32_t container_size_in_bytes(
             return run_container_size_in_bytes(const_CAST_run(c));
     }
     assert(false);
-    __builtin_unreachable();
+    roaring_unreachable;
     return 0;  // unreached
 }
 
@@ -452,7 +453,7 @@ static inline bool container_nonzero_cardinality(
             return run_container_nonzero_cardinality(const_CAST_run(c));
     }
     assert(false);
-    __builtin_unreachable();
+    roaring_unreachable;
     return 0;  // unreached
 }
 
@@ -484,7 +485,7 @@ static inline int container_to_uint32_array(
                             output, const_CAST_run(c), base);
     }
     assert(false);
-    __builtin_unreachable();
+    roaring_unreachable;
     return 0;  // unreached
 }
 
@@ -524,7 +525,7 @@ static inline container_t *container_add(
             return c;
         default:
             assert(false);
-            __builtin_unreachable();
+            roaring_unreachable;
             return NULL;
     }
 }
@@ -564,7 +565,7 @@ static inline container_t *container_remove(
             return c;
         default:
             assert(false);
-            __builtin_unreachable();
+            roaring_unreachable;
             return NULL;
     }
 }
@@ -587,7 +588,7 @@ static inline bool container_contains(
             return run_container_contains(const_CAST_run(c), val);
         default:
             assert(false);
-            __builtin_unreachable();
+            roaring_unreachable;
             return false;
     }
 }
@@ -614,7 +615,7 @@ static inline bool container_contains_range(
                                                     range_start, range_end);
         default:
             assert(false);
-            __builtin_unreachable();
+            roaring_unreachable;
             return false;
     }
 }
@@ -670,7 +671,7 @@ static inline bool container_equals(
 
         default:
             assert(false);
-            __builtin_unreachable();
+            roaring_unreachable;
             return false;
     }
 }
@@ -723,7 +724,7 @@ static inline bool container_is_subset(
 
         default:
             assert(false);
-            __builtin_unreachable();
+            roaring_unreachable;
             return false;
     }
 }
@@ -818,7 +819,7 @@ static inline container_t *container_and(
 
         default:
             assert(false);
-            __builtin_unreachable();
+            roaring_unreachable;
             return NULL;
     }
 }
@@ -871,7 +872,7 @@ static inline int container_and_cardinality(
 
         default:
             assert(false);
-            __builtin_unreachable();
+            roaring_unreachable;
             return 0;
     }
 }
@@ -924,7 +925,7 @@ static inline bool container_intersect(
 
         default:
             assert(false);
-            __builtin_unreachable();
+            roaring_unreachable;
             return 0;
     }
 }
@@ -1022,7 +1023,7 @@ static inline container_t *container_iand(
 
         default:
             assert(false);
-            __builtin_unreachable();
+            roaring_unreachable;
             return NULL;
     }
 }
@@ -1134,7 +1135,7 @@ static inline container_t *container_or(
 
         default:
             assert(false);
-            __builtin_unreachable();
+            roaring_unreachable;
             return NULL;  // unreached
     }
 }
@@ -1249,7 +1250,7 @@ static inline container_t *container_lazy_or(
 
         default:
             assert(false);
-            __builtin_unreachable();
+            roaring_unreachable;
             return NULL;  // unreached
     }
 }
@@ -1361,7 +1362,7 @@ static inline container_t *container_ior(
 
         default:
             assert(false);
-            __builtin_unreachable();
+            roaring_unreachable;
             return NULL;
     }
 }
@@ -1492,7 +1493,7 @@ static inline container_t *container_lazy_ior(
 
         default:
             assert(false);
-            __builtin_unreachable();
+            roaring_unreachable;
             return NULL;
     }
 }
@@ -1579,7 +1580,7 @@ static inline container_t* container_xor(
 
         default:
             assert(false);
-            __builtin_unreachable();
+            roaring_unreachable;
             return NULL;  // unreached
     }
 }
@@ -1616,7 +1617,7 @@ static inline void container_add_offset(const container_t *c, uint8_t type,
         break;
     default:
         assert(false);
-        __builtin_unreachable();
+        roaring_unreachable;
         break;
     }
 }
@@ -1715,7 +1716,7 @@ static inline container_t *container_lazy_xor(
 
         default:
             assert(false);
-            __builtin_unreachable();
+            roaring_unreachable;
             return NULL;  // unreached
     }
 }
@@ -1799,7 +1800,7 @@ static inline container_t *container_ixor(
 
         default:
             assert(false);
-            __builtin_unreachable();
+            roaring_unreachable;
             return NULL;
     }
 }
@@ -1946,7 +1947,7 @@ static inline container_t *container_andnot(
 
         default:
             assert(false);
-            __builtin_unreachable();
+            roaring_unreachable;
             return NULL;  // unreached
     }
 }
@@ -2032,7 +2033,7 @@ static inline container_t *container_iandnot(
 
         default:
             assert(false);
-            __builtin_unreachable();
+            roaring_unreachable;
             return NULL;
     }
 }
@@ -2060,10 +2061,10 @@ static inline bool container_iterate(
                                          base, iterator, ptr);
         default:
             assert(false);
-            __builtin_unreachable();
+            roaring_unreachable;
     }
     assert(false);
-    __builtin_unreachable();
+    roaring_unreachable;
     return false;
 }
 
@@ -2086,10 +2087,10 @@ static inline bool container_iterate64(
                                            iterator, high_bits, ptr);
         default:
             assert(false);
-            __builtin_unreachable();
+            roaring_unreachable;
     }
     assert(false);
-    __builtin_unreachable();
+    roaring_unreachable;
     return false;
 }
 
@@ -2119,10 +2120,10 @@ static inline container_t *container_not(
 
         default:
             assert(false);
-            __builtin_unreachable();
+            roaring_unreachable;
     }
     assert(false);
-    __builtin_unreachable();
+    roaring_unreachable;
     return NULL;
 }
 
@@ -2155,10 +2156,10 @@ static inline container_t *container_not_range(
 
         default:
             assert(false);
-            __builtin_unreachable();
+            roaring_unreachable;
     }
     assert(false);
-    __builtin_unreachable();
+    roaring_unreachable;
     return NULL;
 }
 
@@ -2190,10 +2191,10 @@ static inline container_t *container_inot(
 
         default:
             assert(false);
-            __builtin_unreachable();
+            roaring_unreachable;
     }
     assert(false);
-    __builtin_unreachable();
+    roaring_unreachable;
     return NULL;
 }
 
@@ -2226,10 +2227,10 @@ static inline container_t *container_inot_range(
 
         default:
             assert(false);
-            __builtin_unreachable();
+            roaring_unreachable;
     }
     assert(false);
-    __builtin_unreachable();
+    roaring_unreachable;
     return NULL;
 }
 
@@ -2259,10 +2260,10 @@ static inline bool container_select(
                                         start_rank, rank, element);
         default:
             assert(false);
-            __builtin_unreachable();
+            roaring_unreachable;
     }
     assert(false);
-    __builtin_unreachable();
+    roaring_unreachable;
     return false;
 }
 
@@ -2279,10 +2280,10 @@ static inline uint16_t container_maximum(
             return run_container_maximum(const_CAST_run(c));
         default:
             assert(false);
-            __builtin_unreachable();
+            roaring_unreachable;
     }
     assert(false);
-    __builtin_unreachable();
+    roaring_unreachable;
     return false;
 }
 
@@ -2299,10 +2300,10 @@ static inline uint16_t container_minimum(
             return run_container_minimum(const_CAST_run(c));
         default:
             assert(false);
-            __builtin_unreachable();
+            roaring_unreachable;
     }
     assert(false);
-    __builtin_unreachable();
+    roaring_unreachable;
     return false;
 }
 
@@ -2321,10 +2322,10 @@ static inline int container_rank(
             return run_container_rank(const_CAST_run(c), x);
         default:
             assert(false);
-            __builtin_unreachable();
+            roaring_unreachable;
     }
     assert(false);
-    __builtin_unreachable();
+    roaring_unreachable;
     return false;
 }
 
@@ -2402,7 +2403,7 @@ static inline container_t *container_add_range(
             }
         }
         default:
-            __builtin_unreachable();
+            roaring_unreachable;
     }
 }
 
@@ -2472,7 +2473,7 @@ static inline container_t *container_remove_range(
             return convert_run_to_efficient_container(run, result_type);
         }
         default:
-            __builtin_unreachable();
+            roaring_unreachable;
      }
 }
 
diff --git a/include/roaring/portability.h b/include/roaring/portability.h
index e650466fc..e973e6cea 100644
--- a/include/roaring/portability.h
+++ b/include/roaring/portability.h
@@ -71,7 +71,9 @@ extern "C" {  // portability definitions are in global scope, not a namespace
 #endif
 
 #if CROARING_REGULAR_VISUAL_STUDIO
+#ifndef __restrict__
 #define __restrict__ __restrict
+#endif // __restrict__
 #endif // CROARING_REGULAR_VISUAL_STUDIO
 
 
@@ -147,10 +149,10 @@ extern "C" {  // portability definitions are in global scope, not a namespace
 #endif // CROARING_REGULAR_VISUAL_STUDIO
 #endif // defined(__x86_64__) || defined(_M_X64)
 
-#if !defined(USENEON) && !defined(DISABLENEON) && defined(__ARM_NEON)
-#  define USENEON
+#if !defined(CROARING_USENEON) && !defined(DISABLENEON) && defined(__ARM_NEON)
+#  define CROARING_USENEON
 #endif
-#if defined(USENEON)
+#if defined(CROARING_USENEON)
 #  include <arm_neon.h>
 #endif
 
@@ -166,12 +168,13 @@ extern "C" {  // portability definitions are in global scope, not a namespace
 
 #ifndef __clang__  // if one compiles with MSVC *with* clang, then these
                    // intrinsics are defined!!!
+#define CROARING_INTRINSICS 1
 // sadly there is no way to check whether we are missing these intrinsics
 // specifically.
 
-/* wrappers for Visual Studio built-ins that look like gcc built-ins */
+/* wrappers for Visual Studio built-ins that look like gcc built-ins __builtin_ctzll */
 /* result might be undefined when input_num is zero */
-inline int __builtin_ctzll(unsigned long long input_num) {
+static inline int roaring_trailing_zeroes(unsigned long long input_num) {
     unsigned long index;
 #ifdef _WIN64  // highly recommended!!!
     _BitScanForward64(&index, input_num);
@@ -182,12 +185,13 @@ inline int __builtin_ctzll(unsigned long long input_num) {
         _BitScanForward(&index, (uint32_t)(input_num >> 32));
         index += 32;
     }
-#endif
+#endif // _WIN64
     return index;
 }
 
+/* wrappers for Visual Studio built-ins that look like gcc built-ins __builtin_clzll */
 /* result might be undefined when input_num is zero */
-inline int __builtin_clzll(unsigned long long input_num) {
+inline int roaring_leading_zeroes(unsigned long long input_num) {
     unsigned long index;
 #ifdef _WIN64  // highly recommended!!!
     _BitScanReverse64(&index, input_num);
@@ -198,28 +202,21 @@ inline int __builtin_clzll(unsigned long long input_num) {
     } else {
         _BitScanReverse(&index, (uint32_t)(input_num));
     }
-#endif
+#endif // _WIN64
     return 63 - index;
 }
 
-
-/* software implementation avoids POPCNT */
-/*static inline int __builtin_popcountll(unsigned long long input_num) {
-  const uint64_t m1 = 0x5555555555555555; //binary: 0101...
-  const uint64_t m2 = 0x3333333333333333; //binary: 00110011..
-  const uint64_t m4 = 0x0f0f0f0f0f0f0f0f; //binary:  4 zeros,  4 ones ...
-  const uint64_t h01 = 0x0101010101010101; //the sum of 256 to the power of 0,1,2,3...
-
-  input_num -= (input_num >> 1) & m1;
-  input_num = (input_num & m2) + ((input_num >> 2) & m2);
-  input_num = (input_num + (input_num >> 4)) & m4;
-  return (input_num * h01) >> 56;
-}*/
-
 /* Use #define so this is effective even under /Ob0 (no inline) */
-#define __builtin_unreachable() __assume(0)
-#endif
+#define roaring_unreachable __assume(0)
+#endif // __clang__
+
+#endif // CROARING_REGULAR_VISUAL_STUDIO
 
+#ifndef CROARING_INTRINSICS
+#define CROARING_INTRINSICS 1
+#define roaring_unreachable __builtin_unreachable()
+static inline int roaring_trailing_zeroes(unsigned long long input_num) { return __builtin_ctzll(input_num); }
+static inline int roaring_leading_zeroes(unsigned long long input_num) { return __builtin_clzll(input_num); }
 #endif
 
 #if CROARING_REGULAR_VISUAL_STUDIO
@@ -239,11 +236,11 @@ inline int __builtin_clzll(unsigned long long input_num) {
 
 #define IS_BIG_ENDIAN (*(uint16_t *)"\0\xff" < 0x100)
 
-#ifdef USENEON
+#ifdef CROARING_USENEON
 // we can always compute the popcount fast.
 #elif (defined(_M_ARM) || defined(_M_ARM64)) && ((defined(_WIN64) || defined(_WIN32)) && defined(CROARING_REGULAR_VISUAL_STUDIO) && CROARING_REGULAR_VISUAL_STUDIO)
 // we will need this function:
-static inline int hammingbackup(uint64_t x) {
+static inline int roaring_hamming_backup(uint64_t x) {
   uint64_t c1 = UINT64_C(0x5555555555555555);
   uint64_t c2 = UINT64_C(0x3333333333333333);
   uint64_t c4 = UINT64_C(0x0F0F0F0F0F0F0F0F);
@@ -255,19 +252,19 @@ static inline int hammingbackup(uint64_t x) {
 #endif
 
 
-static inline int hamming(uint64_t x) {
+static inline int roaring_hamming(uint64_t x) {
 #if defined(_WIN64) && defined(CROARING_REGULAR_VISUAL_STUDIO) && CROARING_REGULAR_VISUAL_STUDIO
-#ifdef USENEON
+#ifdef CROARING_USENEON
    return vaddv_u8(vcnt_u8(vcreate_u8(input_num)));
 #elif defined(_M_ARM64)
-  return hammingbackup(x);
+  return roaring_hamming_backup(x);
   // (int) _CountOneBits64(x); is unavailable
 #else  // _M_ARM64
   return (int) __popcnt64(x);
 #endif // _M_ARM64
 #elif defined(_WIN32) && defined(CROARING_REGULAR_VISUAL_STUDIO) && CROARING_REGULAR_VISUAL_STUDIO
 #ifdef _M_ARM
-  return hammingbackup(x);
+  return roaring_hamming_backup(x);
   // _CountOneBits is unavailable
 #else // _M_ARM
     return (int) __popcnt(( unsigned int)x) + (int)  __popcnt(( unsigned int)(x>>32));
diff --git a/include/roaring/roaring.h b/include/roaring/roaring.h
index 04291c4c0..801825592 100644
--- a/include/roaring/roaring.h
+++ b/include/roaring/roaring.h
@@ -12,6 +12,7 @@
 #include <roaring/memory.h>
 #include <roaring/roaring_types.h>
 #include <roaring/roaring_version.h>
+#include <roaring/bitset/bitset.h>
 
 #ifdef __cplusplus
 extern "C" { namespace roaring { namespace api {
@@ -435,6 +436,23 @@ void roaring_bitmap_clear(roaring_bitmap_t *r);
  */
 void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans);
 
+/**
+ * Store the bitmap to a bitset. This can be useful for people
+ * who need the performance and simplicity of a standard bitset.
+ * We assume that the input bitset is originally empty (does not
+ * have any set bit).
+ *
+ *   bitset_t * out = bitset_create();
+ *   // if the bitset has content in it, call "bitset_clear(out)"
+ *   bool success = roaring_bitmap_to_bitset(mybitmap, out); 
+ *   // on failure, success will be false.
+ *   // You can then query the bitset:
+ *   bool is_present = bitset_get(out,  10011 );
+ *   // you must free the memory:
+ *   bitset_free(out);
+ *
+ */
+bool roaring_bitmap_to_bitset(const roaring_bitmap_t *r, bitset_t * bitset);
 
 /**
  * Convert the bitmap to a sorted array from `offset` by `limit`, output in `ans`.
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index e68af7957..7b3e0d02a 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -17,6 +17,7 @@ MESSAGE( STATUS "ROARING_LIB_TYPE: " ${ROARING_LIB_TYPE})
 set(ROARING_SRC
     array_util.c
     bitset_util.c
+    bitset.c
     containers/array.c
     containers/bitset.c
     containers/containers.c
diff --git a/src/bitset.c b/src/bitset.c
new file mode 100644
index 000000000..c493c0b14
--- /dev/null
+++ b/src/bitset.c
@@ -0,0 +1,442 @@
+#include <limits.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <roaring/bitset/bitset.h>
+#include <roaring/portability.h>
+#include <roaring/memory.h>
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace internal {
+#endif
+
+/* Create a new bitset. Return NULL in case of failure. */
+bitset_t *bitset_create() {
+    bitset_t *bitset = NULL;
+    /* Allocate the bitset itself. */
+    if ((bitset = (bitset_t *)roaring_malloc(sizeof(bitset_t))) == NULL) {
+        return NULL;
+    }
+    bitset->array = NULL;
+    bitset->arraysize = 0;
+    bitset->capacity = 0;
+    return bitset;
+}
+
+/* Create a new bitset able to contain size bits. Return NULL in case of
+ * failure. */
+bitset_t *bitset_create_with_capacity(size_t size) {
+    bitset_t *bitset = NULL;
+    /* Allocate the bitset itself. */
+    if ((bitset = (bitset_t *)roaring_malloc(sizeof(bitset_t))) == NULL) {
+        return NULL;
+    }
+    bitset->arraysize =
+        (size + sizeof(uint64_t) * 8 - 1) / (sizeof(uint64_t) * 8);
+    bitset->capacity = bitset->arraysize;
+    if ((bitset->array =
+             (uint64_t *)roaring_calloc(bitset->arraysize, sizeof(uint64_t))) == NULL) {
+        roaring_free(bitset);
+        return NULL;
+    }
+    return bitset;
+}
+
+/* Create a copy */
+bitset_t *bitset_copy(const bitset_t *bitset) {
+    bitset_t *copy = NULL;
+    /* Allocate the bitset itself. */
+    if ((copy = (bitset_t *)roaring_malloc(sizeof(bitset_t))) == NULL) {
+        return NULL;
+    }
+    memcpy(copy, bitset, sizeof(bitset_t));
+    copy->capacity = copy->arraysize;
+    if ((copy->array = (uint64_t *)roaring_malloc(sizeof(uint64_t) *
+                                          bitset->arraysize)) == NULL) {
+        roaring_free(copy);
+        return NULL;
+    }
+    memcpy(copy->array, bitset->array, sizeof(uint64_t) * bitset->arraysize);
+    return copy;
+}
+
+void bitset_clear(bitset_t *bitset) {
+    memset(bitset->array, 0, sizeof(uint64_t) * bitset->arraysize);
+}
+
+void bitset_fill(bitset_t *bitset) {
+    memset(bitset->array, 0xff, sizeof(uint64_t) * bitset->arraysize);
+}
+
+void bitset_shift_left(bitset_t *bitset, size_t s) {
+    size_t extra_words = s / 64;
+    int inword_shift = s % 64;
+    size_t as = bitset->arraysize;
+    if (inword_shift == 0) {
+        bitset_resize(bitset, as + extra_words, false);
+        // could be done with a memmove
+        for (size_t i = as + extra_words; i > extra_words; i--) {
+            bitset->array[i - 1] = bitset->array[i - 1 - extra_words];
+        }
+    } else {
+        bitset_resize(bitset, as + extra_words + 1, true);
+        bitset->array[as + extra_words] =
+            bitset->array[as - 1] >> (64 - inword_shift);
+        for (size_t i = as + extra_words; i >= extra_words + 2; i--) {
+            bitset->array[i - 1] =
+                (bitset->array[i - 1 - extra_words] << inword_shift) |
+                (bitset->array[i - 2 - extra_words] >> (64 - inword_shift));
+        }
+        bitset->array[extra_words] = bitset->array[0] << inword_shift;
+    }
+    for (size_t i = 0; i < extra_words; i++) {
+        bitset->array[i] = 0;
+    }
+}
+
+void bitset_shift_right(bitset_t *bitset, size_t s) {
+    size_t extra_words = s / 64;
+    int inword_shift = s % 64;
+    size_t as = bitset->arraysize;
+    if (inword_shift == 0) {
+        // could be done with a memmove
+        for (size_t i = 0; i < as - extra_words; i++) {
+            bitset->array[i] = bitset->array[i + extra_words];
+        }
+        bitset_resize(bitset, as - extra_words, false);
+
+    } else {
+        for (size_t i = 0; i + extra_words + 1 < as; i++) {
+            bitset->array[i] =
+                (bitset->array[i + extra_words] >> inword_shift) |
+                (bitset->array[i + extra_words + 1] << (64 - inword_shift));
+        }
+        bitset->array[as - extra_words - 1] =
+            (bitset->array[as - 1] >> inword_shift);
+        bitset_resize(bitset, as - extra_words, false);
+    }
+}
+
+/* Free memory. */
+void bitset_free(bitset_t *bitset) {
+    if(bitset == NULL) { return; }
+    roaring_free(bitset->array);
+    roaring_free(bitset);
+}
+
+/* Resize the bitset so that it can support newarraysize * 64 bits. Return true
+ * in case of success, false for failure. */
+bool bitset_resize(bitset_t *bitset, size_t newarraysize, bool padwithzeroes) {
+    if(newarraysize > SIZE_MAX/64) { return false; }
+    size_t smallest =
+        newarraysize < bitset->arraysize ? newarraysize : bitset->arraysize;
+    if (bitset->capacity < newarraysize) {
+        uint64_t *newarray;
+        size_t newcapacity = bitset->capacity;
+        if(newcapacity == 0) { newcapacity = 1; }
+        while(newcapacity < newarraysize) { newcapacity *= 2; }
+        if ((newarray = (uint64_t *) roaring_realloc(bitset->array, sizeof(uint64_t) * newcapacity)) == NULL) {
+            return false;
+        }
+        bitset->capacity = newcapacity;
+        bitset->array = newarray;
+    }
+    if (padwithzeroes && (newarraysize > smallest))
+        memset(bitset->array + smallest, 0,
+               sizeof(uint64_t) * (newarraysize - smallest));
+    bitset->arraysize = newarraysize;
+    return true;  // success!
+}
+
+size_t bitset_count(const bitset_t *bitset) {
+    size_t card = 0;
+    size_t k = 0;
+    for (; k + 7 < bitset->arraysize; k += 8) {
+        card += roaring_hamming(bitset->array[k]);
+        card += roaring_hamming(bitset->array[k + 1]);
+        card += roaring_hamming(bitset->array[k + 2]);
+        card += roaring_hamming(bitset->array[k + 3]);
+        card += roaring_hamming(bitset->array[k + 4]);
+        card += roaring_hamming(bitset->array[k + 5]);
+        card += roaring_hamming(bitset->array[k + 6]);
+        card += roaring_hamming(bitset->array[k + 7]);
+    }
+    for (; k + 3 < bitset->arraysize; k += 4) {
+        card += roaring_hamming(bitset->array[k]);
+        card += roaring_hamming(bitset->array[k + 1]);
+        card += roaring_hamming(bitset->array[k + 2]);
+        card += roaring_hamming(bitset->array[k + 3]);
+    }
+    for (; k < bitset->arraysize; k++) {
+        card += roaring_hamming(bitset->array[k]);
+    }
+    return card;
+}
+
+bool bitset_inplace_union(bitset_t *CBITSET_RESTRICT b1,
+                          const bitset_t *CBITSET_RESTRICT b2) {
+    size_t minlength =
+        b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize;
+    for (size_t k = 0; k < minlength; ++k) {
+        b1->array[k] |= b2->array[k];
+    }
+    if (b2->arraysize > b1->arraysize) {
+        size_t oldsize = b1->arraysize;
+        if (!bitset_resize(b1, b2->arraysize, false)) return false;
+        memcpy(b1->array + oldsize, b2->array + oldsize,
+               (b2->arraysize - oldsize) * sizeof(uint64_t));
+    }
+    return true;
+}
+
+size_t bitset_minimum(const bitset_t *bitset) {
+    for (size_t k = 0; k < bitset->arraysize; k++) {
+        uint64_t w = bitset->array[k];
+        if (w != 0) {
+            return roaring_trailing_zeroes(w) + k * 64;
+        }
+    }
+    return 0;
+}
+
+bool bitset_grow(bitset_t *bitset, size_t newarraysize) {
+    if(newarraysize < bitset->arraysize) { return false; }
+    if(newarraysize > SIZE_MAX/64) { return false; }
+    if (bitset->capacity < newarraysize) {
+        uint64_t *newarray;
+        size_t newcapacity = (UINT64_C(0xFFFFFFFFFFFFFFFF) >> roaring_leading_zeroes(newarraysize)) + 1;
+        while(newcapacity < newarraysize) { newcapacity *= 2; }
+        if ((newarray = (uint64_t *) roaring_realloc(bitset->array, sizeof(uint64_t) * newcapacity)) == NULL) {
+            return false;
+        }
+        bitset->capacity = newcapacity;
+        bitset->array = newarray;
+    }
+    memset(bitset->array + bitset->arraysize, 0,
+           sizeof(uint64_t) * (newarraysize - bitset->arraysize));
+    bitset->arraysize = newarraysize;
+    return true;  // success!
+}
+
+size_t bitset_maximum(const bitset_t *bitset) {
+    for (size_t k = bitset->arraysize; k > 0; k--) {
+        uint64_t w = bitset->array[k - 1];
+        if (w != 0) {
+            return 63 - roaring_leading_zeroes(w) + (k - 1) * 64;
+        }
+    }
+    return 0;
+}
+
+/* Returns true if bitsets share no common elements, false otherwise.
+ *
+ * Performs early-out if common element found. */
+bool bitsets_disjoint(const bitset_t *CBITSET_RESTRICT b1, const bitset_t *CBITSET_RESTRICT b2) {
+    size_t minlength =
+        b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize;
+
+    for (size_t k = 0; k < minlength; k++) {
+        if ((b1->array[k] & b2->array[k]) != 0) return false;
+    }
+    return true;
+}
+
+/* Returns true if bitsets contain at least 1 common element, false if they are
+ * disjoint.
+ *
+ * Performs early-out if common element found. */
+bool bitsets_intersect(const bitset_t *CBITSET_RESTRICT b1, const bitset_t *CBITSET_RESTRICT b2) {
+    size_t minlength =
+        b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize;
+
+    for (size_t k = 0; k < minlength; k++) {
+        if ((b1->array[k] & b2->array[k]) != 0) return true;
+    }
+    return false;
+}
+
+/* Returns true if b has any bits set in or after b->array[starting_loc]. */
+static bool any_bits_set(const bitset_t *b, size_t starting_loc) {
+    if (starting_loc >= b->arraysize) {
+        return false;
+    }
+    for (size_t k = starting_loc; k < b->arraysize; k++) {
+        if (b->array[k] != 0) return true;
+    }
+    return false;
+}
+
+/* Returns true if b1 has all of b2's bits set.
+ *
+ * Performs early out if a bit is found in b2 that is not found in b1. */
+bool bitset_contains_all(const bitset_t *CBITSET_RESTRICT b1, const bitset_t *CBITSET_RESTRICT b2) {
+    size_t min_size = b1->arraysize;
+    if(b1->arraysize > b2->arraysize) {
+        min_size = b2->arraysize;
+    }
+    for (size_t k = 0; k < min_size; k++) {
+        if ((b1->array[k] & b2->array[k]) != b2->array[k]) {
+            return false;
+        }
+    }
+    if (b2->arraysize > b1->arraysize) {
+        /* Need to check if b2 has any bits set beyond b1's array */
+        return !any_bits_set(b2, b1->arraysize);
+    }
+    return true;
+}
+
+size_t bitset_union_count(const bitset_t *CBITSET_RESTRICT b1,
+                          const bitset_t *CBITSET_RESTRICT b2) {
+    size_t answer = 0;
+    size_t minlength =
+        b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize;
+    size_t k = 0;
+    for (; k + 3 < minlength; k += 4) {
+        answer += roaring_hamming(b1->array[k] | b2->array[k]);
+        answer += roaring_hamming(b1->array[k + 1] | b2->array[k + 1]);
+        answer += roaring_hamming(b1->array[k + 2] | b2->array[k + 2]);
+        answer += roaring_hamming(b1->array[k + 3] | b2->array[k + 3]);
+    }
+    for (; k < minlength; ++k) {
+        answer += roaring_hamming(b1->array[k] | b2->array[k]);
+    }
+    if (b2->arraysize > b1->arraysize) {
+        // k is equal to b1->arraysize
+        for (; k + 3 < b2->arraysize; k += 4) {
+            answer += roaring_hamming(b2->array[k]);
+            answer += roaring_hamming(b2->array[k + 1]);
+            answer += roaring_hamming(b2->array[k + 2]);
+            answer += roaring_hamming(b2->array[k + 3]);
+        }
+        for (; k < b2->arraysize; ++k) {
+            answer += roaring_hamming(b2->array[k]);
+        }
+    } else {
+        // k is equal to b2->arraysize
+        for (; k + 3 < b1->arraysize; k += 4) {
+            answer += roaring_hamming(b1->array[k]);
+            answer += roaring_hamming(b1->array[k + 1]);
+            answer += roaring_hamming(b1->array[k + 2]);
+            answer += roaring_hamming(b1->array[k + 3]);
+        }
+        for (; k < b1->arraysize; ++k) {
+            answer += roaring_hamming(b1->array[k]);
+        }
+    }
+    return answer;
+}
+
+void bitset_inplace_intersection(bitset_t *CBITSET_RESTRICT b1,
+                                 const bitset_t *CBITSET_RESTRICT b2) {
+    size_t minlength =
+        b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize;
+    size_t k = 0;
+    for (; k < minlength; ++k) {
+        b1->array[k] &= b2->array[k];
+    }
+    for (; k < b1->arraysize; ++k) {
+        b1->array[k] = 0;  // memset could, maybe, be a tiny bit faster
+    }
+}
+
+size_t bitset_intersection_count(const bitset_t *CBITSET_RESTRICT b1,
+                                 const bitset_t *CBITSET_RESTRICT b2) {
+    size_t answer = 0;
+    size_t minlength =
+        b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize;
+    for (size_t k = 0; k < minlength; ++k) {
+        answer += roaring_hamming(b1->array[k] & b2->array[k]);
+    }
+    return answer;
+}
+
+void bitset_inplace_difference(bitset_t *CBITSET_RESTRICT b1,
+                               const bitset_t *CBITSET_RESTRICT b2) {
+    size_t minlength =
+        b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize;
+    size_t k = 0;
+    for (; k < minlength; ++k) {
+        b1->array[k] &= ~(b2->array[k]);
+    }
+}
+
+size_t bitset_difference_count(const bitset_t *CBITSET_RESTRICT b1,
+                               const bitset_t *CBITSET_RESTRICT b2) {
+    size_t minlength =
+        b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize;
+    size_t k = 0;
+    size_t answer = 0;
+    for (; k < minlength; ++k) {
+        answer += roaring_hamming(b1->array[k] & ~(b2->array[k]));
+    }
+    for (; k < b1->arraysize; ++k) {
+        answer += roaring_hamming(b1->array[k]);
+    }
+    return answer;
+}
+
+bool bitset_inplace_symmetric_difference(bitset_t *CBITSET_RESTRICT b1,
+                                         const bitset_t *CBITSET_RESTRICT b2) {
+    size_t minlength =
+        b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize;
+    size_t k = 0;
+    for (; k < minlength; ++k) {
+        b1->array[k] ^= b2->array[k];
+    }
+    if (b2->arraysize > b1->arraysize) {
+        size_t oldsize = b1->arraysize;
+        if (!bitset_resize(b1, b2->arraysize, false)) return false;
+        memcpy(b1->array + oldsize, b2->array + oldsize,
+               (b2->arraysize - oldsize) * sizeof(uint64_t));
+    }
+    return true;
+}
+
+size_t bitset_symmetric_difference_count(const bitset_t *CBITSET_RESTRICT b1,
+                                         const bitset_t *CBITSET_RESTRICT b2) {
+    size_t minlength =
+        b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize;
+    size_t k = 0;
+    size_t answer = 0;
+    for (; k < minlength; ++k) {
+        answer += roaring_hamming(b1->array[k] ^ b2->array[k]);
+    }
+    if (b2->arraysize > b1->arraysize) {
+        for (; k < b2->arraysize; ++k) {
+            answer += roaring_hamming(b2->array[k]);
+        }
+    } else {
+        for (; k < b1->arraysize; ++k) {
+            answer += roaring_hamming(b1->array[k]);
+        }
+    }
+    return answer;
+}
+
+bool bitset_trim(bitset_t *bitset) {
+    size_t newsize = bitset->arraysize;
+    while (newsize > 0) {
+        if (bitset->array[newsize - 1] == 0)
+            newsize -= 1;
+        else
+            break;
+    }
+    if (bitset->capacity == newsize) return true;  // nothing to do
+    uint64_t *newarray;
+    if ((newarray = (uint64_t *)roaring_realloc(
+             bitset->array, sizeof(uint64_t) * newsize)) == NULL) {
+        return false;
+    }
+    bitset->array = newarray;
+    bitset->capacity = newsize;
+    bitset->arraysize = newsize;
+    return true;
+}
+
+
+#ifdef __cplusplus
+} } }  // extern "C" { namespace roaring { namespace internal {
+#endif
diff --git a/src/bitset_util.c b/src/bitset_util.c
index f1a04dcec..038728589 100644
--- a/src/bitset_util.c
+++ b/src/bitset_util.c
@@ -7,7 +7,7 @@
 #include <roaring/bitset_util.h>
 
 #ifdef __cplusplus
-extern "C" { namespace roaring { namespace internal {
+extern "C" { namespace roaring { namespace api {
 #endif
 
 #ifdef CROARING_IS_X64
@@ -570,7 +570,7 @@ size_t bitset_extract_setbits_avx512(const uint64_t *words, size_t length, uint3
         uint64_t v = words[i];		
         __m512i vec = _mm512_maskz_compress_epi8(v, index_table);	
         	    
-        uint8_t advance = hamming(v);
+        uint8_t advance = roaring_hamming(v);
         
         __m512i vbase = _mm512_add_epi32(base_v, _mm512_set1_epi32(i * 64));
         __m512i r1 = _mm512_cvtepi8_epi32(_mm512_extracti32x4_epi32(vec,0));
@@ -597,7 +597,7 @@ size_t bitset_extract_setbits_avx512(const uint64_t *words, size_t length, uint3
          uint64_t w = words[i];
          while ((w != 0) && (out < safeout)) {
              uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail)
-             int r = __builtin_ctzll(w); // on x64, should compile to TZCNT
+             int r = roaring_trailing_zeroes(w); // on x64, should compile to TZCNT
              uint32_t val = r + base;
              memcpy(out, &val,
                     sizeof(uint32_t));  // should be compiled as a MOV on x64
@@ -628,7 +628,7 @@ size_t bitset_extract_setbits_avx512_uint16(const uint64_t *array, size_t length
         uint64_t v = array[i];
         __m512i vec = _mm512_maskz_compress_epi8(v, index_table);
 
-        uint8_t advance = hamming(v);
+        uint8_t advance = roaring_hamming(v);
 
         __m512i vbase = _mm512_add_epi16(base_v, _mm512_set1_epi16(i * 64));
         __m512i r1 = _mm512_cvtepi8_epi16(_mm512_extracti32x8_epi32(vec,0));
@@ -649,7 +649,7 @@ size_t bitset_extract_setbits_avx512_uint16(const uint64_t *array, size_t length
          uint64_t w = array[i];
          while ((w != 0) && (out < safeout)) {
              uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail)
-             int r = __builtin_ctzll(w); // on x64, should compile to TZCNT
+             int r = roaring_trailing_zeroes(w); // on x64, should compile to TZCNT
              uint32_t val = r + base;
              memcpy(out, &val,
                     sizeof(uint16_t));
@@ -705,7 +705,7 @@ size_t bitset_extract_setbits_avx2(const uint64_t *words, size_t length,
         uint64_t w = words[i];
         while ((w != 0) && (out < safeout)) {
             uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail)
-            int r = __builtin_ctzll(w); // on x64, should compile to TZCNT
+            int r = roaring_trailing_zeroes(w); // on x64, should compile to TZCNT
             uint32_t val = r + base;
             memcpy(out, &val,
                    sizeof(uint32_t));  // should be compiled as a MOV on x64
@@ -726,7 +726,7 @@ size_t bitset_extract_setbits(const uint64_t *words, size_t length,
         uint64_t w = words[i];
         while (w != 0) {
             uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail)
-            int r = __builtin_ctzll(w); // on x64, should compile to TZCNT
+            int r = roaring_trailing_zeroes(w); // on x64, should compile to TZCNT
             uint32_t val = r + base;
             memcpy(out + outpos, &val,
                    sizeof(uint32_t));  // should be compiled as a MOV on x64
@@ -747,7 +747,7 @@ size_t bitset_extract_intersection_setbits_uint16(const uint64_t * __restrict__
         uint64_t w = words1[i] & words2[i];
         while (w != 0) {
             uint64_t t = w & (~w + 1);
-            int r = __builtin_ctzll(w);
+            int r = roaring_trailing_zeroes(w);
             out[outpos++] = r + base;
             w ^= t;
         }
@@ -811,7 +811,7 @@ size_t bitset_extract_setbits_sse_uint16(const uint64_t *words, size_t length,
         uint64_t w = words[i];
         while ((w != 0) && (out < safeout)) {
             uint64_t t = w & (~w + 1);
-            int r = __builtin_ctzll(w);
+            int r = roaring_trailing_zeroes(w);
             *out = r + base;
             out++;
             w ^= t;
@@ -839,7 +839,7 @@ size_t bitset_extract_setbits_uint16(const uint64_t *words, size_t length,
         uint64_t w = words[i];
         while (w != 0) {
             uint64_t t = w & (~w + 1);
-            int r = __builtin_ctzll(w);
+            int r = roaring_trailing_zeroes(w);
             out[outpos++] = r + base;
             w ^= t;
         }
@@ -1117,5 +1117,5 @@ void bitset_flip_list(uint64_t *words, const uint16_t *list, uint64_t length) {
 }
 
 #ifdef __cplusplus
-} } }  // extern "C" { namespace roaring { namespace internal {
+} } }  // extern "C" { namespace roaring { namespace api {
 #endif
diff --git a/src/containers/bitset.c b/src/containers/bitset.c
index 59874aeaa..81fd97707 100644
--- a/src/containers/bitset.c
+++ b/src/containers/bitset.c
@@ -248,10 +248,10 @@ static inline int _scalar_bitset_container_compute_cardinality(const bitset_cont
   const uint64_t *words = bitset->words;
   int32_t sum = 0;
   for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 4) {
-          sum += hamming(words[i]);
-          sum += hamming(words[i + 1]);
-          sum += hamming(words[i + 2]);
-          sum += hamming(words[i + 3]);
+          sum += roaring_hamming(words[i]);
+          sum += roaring_hamming(words[i + 1]);
+          sum += roaring_hamming(words[i + 2]);
+          sum += roaring_hamming(words[i + 3]);
   }
   return sum;
 }
@@ -274,7 +274,7 @@ int bitset_container_compute_cardinality(const bitset_container_t *bitset) {
     }
 }
 
-#elif defined(USENEON)
+#elif defined(CROARING_USENEON)
 int bitset_container_compute_cardinality(const bitset_container_t *bitset) {
     uint16x8_t n0 = vdupq_n_u16(0);
     uint16x8_t n1 = vdupq_n_u16(0);
@@ -305,10 +305,10 @@ int bitset_container_compute_cardinality(const bitset_container_t *bitset) {
     const uint64_t *words = bitset->words;
     int32_t sum = 0;
     for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 4) {
-        sum += hamming(words[i]);
-        sum += hamming(words[i + 1]);
-        sum += hamming(words[i + 2]);
-        sum += hamming(words[i + 3]);
+        sum += roaring_hamming(words[i]);
+        sum += roaring_hamming(words[i + 1]);
+        sum += roaring_hamming(words[i + 2]);
+        sum += roaring_hamming(words[i + 3]);
     }
     return sum;
 }
@@ -333,7 +333,7 @@ int bitset_container_compute_cardinality(const bitset_container_t *bitset) {
   static inline int _avx512_bitset_container_##opname##_nocard(                \
       const bitset_container_t *src_1, const bitset_container_t *src_2,        \
       bitset_container_t *dst) {                                               \
-        const uint8_t * __restrict__ words_1 = (const uint8_t *)src_1->words;  \
+    const uint8_t * __restrict__ words_1 = (const uint8_t *)src_1->words;      \
     const uint8_t * __restrict__ words_2 = (const uint8_t *)src_2->words;      \
     /* not using the blocking optimization for some reason*/                   \
     uint8_t *out = (uint8_t*)dst->words;                                       \
@@ -419,17 +419,17 @@ CROARING_UNTARGET_REGION
 
 // we duplicate the function because other containers use the "intersection" term, makes API more consistent
 CROARING_TARGET_AVX512
-AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, and,          &, _mm512_or_si512, vandq_u64, CROARING_UNTARGET_REGION)
+AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, and,          &, _mm512_and_si512, vandq_u64, CROARING_UNTARGET_REGION)
 CROARING_UNTARGET_REGION
 CROARING_TARGET_AVX512
-AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, intersection, &, _mm512_or_si512, vandq_u64, CROARING_UNTARGET_REGION)
+AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, intersection, &, _mm512_and_si512, vandq_u64, CROARING_UNTARGET_REGION)
 CROARING_UNTARGET_REGION
 
 CROARING_TARGET_AVX512
-AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, xor,    ^,  _mm512_or_si512,    veorq_u64, CROARING_UNTARGET_REGION)
+AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, xor,    ^,  _mm512_xor_si512,    veorq_u64, CROARING_UNTARGET_REGION)
 CROARING_UNTARGET_REGION
 CROARING_TARGET_AVX512
-AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, andnot, &~, _mm512_or_si512, vbicq_u64, CROARING_UNTARGET_REGION)
+AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, andnot, &~, _mm512_andnot_si512, vbicq_u64, CROARING_UNTARGET_REGION)
 CROARING_UNTARGET_REGION
 
 // we duplicate the function because other containers use the "or" term, makes API more consistent
@@ -442,17 +442,17 @@ CROARING_UNTARGET_REGION
 
 // we duplicate the function because other containers use the "intersection" term, makes API more consistent
 CROARING_TARGET_AVX512
-AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, and,          &, _mm512_or_si512, vandq_u64, CROARING_UNTARGET_REGION)
+AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, and,          &, _mm512_and_si512, vandq_u64, CROARING_UNTARGET_REGION)
 CROARING_UNTARGET_REGION
 CROARING_TARGET_AVX512
-AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, intersection, &, _mm512_or_si512, vandq_u64, CROARING_UNTARGET_REGION)
+AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, intersection, &, _mm512_and_si512, vandq_u64, CROARING_UNTARGET_REGION)
 CROARING_UNTARGET_REGION
 
 CROARING_TARGET_AVX512
-AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, xor,    ^,  _mm512_or_si512,    veorq_u64, CROARING_UNTARGET_REGION)
+AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, xor,    ^,  _mm512_xor_si512,    veorq_u64, CROARING_UNTARGET_REGION)
 CROARING_UNTARGET_REGION
 CROARING_TARGET_AVX512
-AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, andnot, &~, _mm512_or_si512, vbicq_u64, CROARING_UNTARGET_REGION)
+AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, andnot, &~, _mm512_andnot_si512, vbicq_u64, CROARING_UNTARGET_REGION)
 CROARING_UNTARGET_REGION
 
 // we duplicate the function because other containers use the "or" term, makes API more consistent
@@ -465,17 +465,17 @@ CROARING_UNTARGET_REGION
 
 // we duplicate the function because other containers use the "intersection" term, makes API more consistent
 CROARING_TARGET_AVX512
-AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, and,          &, _mm512_or_si512, vandq_u64, CROARING_UNTARGET_REGION)
+AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, and,          &, _mm512_and_si512, vandq_u64, CROARING_UNTARGET_REGION)
 CROARING_UNTARGET_REGION
 CROARING_TARGET_AVX512
-AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, intersection, &, _mm512_or_si512, vandq_u64, CROARING_UNTARGET_REGION)
+AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, intersection, &, _mm512_and_si512, vandq_u64, CROARING_UNTARGET_REGION)
 CROARING_UNTARGET_REGION
 
 CROARING_TARGET_AVX512
-AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, xor,    ^,  _mm512_or_si512,    veorq_u64, CROARING_UNTARGET_REGION)
+AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, xor,    ^,  _mm512_xor_si512,    veorq_u64, CROARING_UNTARGET_REGION)
 CROARING_UNTARGET_REGION
 CROARING_TARGET_AVX512
-AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, andnot, &~, _mm512_or_si512, vbicq_u64, CROARING_UNTARGET_REGION)
+AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, andnot, &~, _mm512_andnot_si512, vbicq_u64, CROARING_UNTARGET_REGION)
 CROARING_UNTARGET_REGION
 #endif // CROARING_COMPILER_SUPPORTS_AVX512
 
@@ -654,8 +654,8 @@ CROARING_UNTARGET_REGION
                      word_2 = (words_1[i + 1]) opsymbol(words_2[i + 1]);       \
       out[i] = word_1;                                                         \
       out[i + 1] = word_2;                                                     \
-      sum += hamming(word_1);                                                  \
-      sum += hamming(word_2);                                                  \
+      sum += roaring_hamming(word_1);                                                  \
+      sum += roaring_hamming(word_2);                                                  \
     }                                                                          \
     dst->cardinality = sum;                                                    \
     return dst->cardinality;                                                   \
@@ -680,8 +680,8 @@ CROARING_UNTARGET_REGION
     for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) {           \
       const uint64_t word_1 = (words_1[i])opsymbol(words_2[i]),                \
                      word_2 = (words_1[i + 1]) opsymbol(words_2[i + 1]);       \
-      sum += hamming(word_1);                                                  \
-      sum += hamming(word_2);                                                  \
+      sum += roaring_hamming(word_1);                                                  \
+      sum += roaring_hamming(word_2);                                                  \
     }                                                                          \
     return sum;                                                                \
   }
@@ -770,7 +770,7 @@ SCALAR_BITSET_CONTAINER_FN(andnot, &~, _mm256_andnot_si256, vbicq_u64)
 
 #endif //  CROARING_COMPILER_SUPPORTS_AVX512
 
-#elif defined(USENEON)
+#elif defined(CROARING_USENEON)
 
 #define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic)  \
 int bitset_container_##opname(const bitset_container_t *src_1,                \
@@ -873,8 +873,8 @@ int bitset_container_##opname(const bitset_container_t *src_1,            \
                        word_2 = (words_1[i + 1])opsymbol(words_2[i + 1]); \
         out[i] = word_1;                                                  \
         out[i + 1] = word_2;                                              \
-        sum += hamming(word_1);                                    \
-        sum += hamming(word_2);                                    \
+        sum += roaring_hamming(word_1);                                    \
+        sum += roaring_hamming(word_2);                                    \
     }                                                                     \
     dst->cardinality = sum;                                               \
     return dst->cardinality;                                              \
@@ -899,8 +899,8 @@ int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \
     for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) {      \
         const uint64_t word_1 = (words_1[i])opsymbol(words_2[i]),         \
                        word_2 = (words_1[i + 1])opsymbol(words_2[i + 1]); \
-        sum += hamming(word_1);                                    \
-        sum += hamming(word_2);                                    \
+        sum += roaring_hamming(word_1);                                    \
+        sum += roaring_hamming(word_2);                                    \
     }                                                                     \
     return sum;                                                           \
 }
@@ -956,7 +956,7 @@ void bitset_container_printf(const bitset_container_t * v) {
 		uint64_t w = v->words[i];
 		while (w != 0) {
 			uint64_t t = w & (~w + 1);
-			int r = __builtin_ctzll(w);
+			int r = roaring_trailing_zeroes(w);
 			if(iamfirst) {// predicted to be false
 				printf("%u",base + r);
 				iamfirst = false;
@@ -980,7 +980,7 @@ void bitset_container_printf_as_uint32_array(const bitset_container_t * v, uint3
 		uint64_t w = v->words[i];
 		while (w != 0) {
 			uint64_t t = w & (~w + 1);
-			int r = __builtin_ctzll(w);
+			int r = roaring_trailing_zeroes(w);
 			if(iamfirst) {// predicted to be false
 				printf("%u", r + base);
 				iamfirst = false;
@@ -1002,11 +1002,11 @@ int bitset_container_number_of_runs(bitset_container_t *bc) {
   for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS-1; ++i) {
     uint64_t word = next_word;
     next_word = bc->words[i+1];
-    num_runs += hamming((~word) & (word << 1)) + ( (word >> 63) & ~next_word);
+    num_runs += roaring_hamming((~word) & (word << 1)) + ( (word >> 63) & ~next_word);
   }
 
   uint64_t word = next_word;
-  num_runs += hamming((~word) & (word << 1));
+  num_runs += roaring_hamming((~word) & (word << 1));
   if((word & 0x8000000000000000ULL) != 0)
     num_runs++;
   return num_runs;
@@ -1032,7 +1032,7 @@ bool bitset_container_iterate(const bitset_container_t *cont, uint32_t base, roa
     uint64_t w = cont->words[i];
     while (w != 0) {
       uint64_t t = w & (~w + 1);
-      int r = __builtin_ctzll(w);
+      int r = roaring_trailing_zeroes(w);
       if(!iterator(r + base, ptr)) return false;
       w ^= t;
     }
@@ -1046,7 +1046,7 @@ bool bitset_container_iterate64(const bitset_container_t *cont, uint32_t base, r
     uint64_t w = cont->words[i];
     while (w != 0) {
       uint64_t t = w & (~w + 1);
-      int r = __builtin_ctzll(w);
+      int r = roaring_trailing_zeroes(w);
       if(!iterator(high_bits | (uint64_t)(r + base), ptr)) return false;
       w ^= t;
     }
@@ -1142,13 +1142,13 @@ bool bitset_container_select(const bitset_container_t *container, uint32_t *star
     const uint64_t *words = container->words;
     int32_t size;
     for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 1) {
-        size = hamming(words[i]);
+        size = roaring_hamming(words[i]);
         if(rank <= *start_rank + size) {
             uint64_t w = container->words[i];
             uint16_t base = i*64;
             while (w != 0) {
                 uint64_t t = w & (~w + 1);
-                int r = __builtin_ctzll(w);
+                int r = roaring_trailing_zeroes(w);
                 if(*start_rank == rank) {
                     *element = r+base;
                     return true;
@@ -1161,7 +1161,7 @@ bool bitset_container_select(const bitset_container_t *container, uint32_t *star
             *start_rank += size;
     }
     assert(false);
-    __builtin_unreachable();
+    roaring_unreachable;
 }
 
 
@@ -1170,7 +1170,7 @@ uint16_t bitset_container_minimum(const bitset_container_t *container) {
   for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) {
     uint64_t w = container->words[i];
     if (w != 0) {
-      int r = __builtin_ctzll(w);
+      int r = roaring_trailing_zeroes(w);
       return r + i * 64;
     }
   }
@@ -1182,7 +1182,7 @@ uint16_t bitset_container_maximum(const bitset_container_t *container) {
   for (int32_t i = BITSET_CONTAINER_SIZE_IN_WORDS - 1; i > 0; --i ) {
     uint64_t w = container->words[i];
     if (w != 0) {
-      int r = __builtin_clzll(w);
+      int r = roaring_leading_zeroes(w);
       return i * 64 + 63  - r;
     }
   }
@@ -1195,12 +1195,12 @@ int bitset_container_rank(const bitset_container_t *container, uint16_t x) {
   int sum = 0;
   int i = 0;
   for (int end = x / 64; i < end; i++){
-    sum += hamming(container->words[i]);
+    sum += roaring_hamming(container->words[i]);
   }
   uint64_t lastword = container->words[i];
   uint64_t lastpos = UINT64_C(1) << (x % 64);
   uint64_t mask = lastpos + lastpos - 1; // smear right
-  sum += hamming(lastword & mask);
+  sum += roaring_hamming(lastword & mask);
   return sum;
 }
 
@@ -1216,7 +1216,7 @@ int bitset_container_index_equalorlarger(const bitset_container_t *container, ui
     if(k == BITSET_CONTAINER_SIZE_IN_WORDS) return -1;
     word = container->words[k];
   }
-  return k * 64 + __builtin_ctzll(word);
+  return k * 64 + roaring_trailing_zeroes(word);
 }
 
 #ifdef __cplusplus
diff --git a/src/containers/containers.c b/src/containers/containers.c
index a1fb598c6..c2fd32942 100644
--- a/src/containers/containers.c
+++ b/src/containers/containers.c
@@ -51,7 +51,7 @@ void container_free(container_t *c, uint8_t type) {
             break;
         default:
             assert(false);
-            __builtin_unreachable();
+            roaring_unreachable;
     }
 }
 
@@ -68,7 +68,7 @@ void container_printf(const container_t *c, uint8_t type) {
             run_container_printf(const_CAST_run(c));
             return;
         default:
-            __builtin_unreachable();
+            roaring_unreachable;
     }
 }
 
@@ -91,7 +91,7 @@ void container_printf_as_uint32_array(
                 const_CAST_run(c), base);
             return;
         default:
-            __builtin_unreachable();
+            roaring_unreachable;
     }
 }
 
@@ -180,7 +180,7 @@ container_t *container_clone(const container_t *c, uint8_t typecode) {
             return NULL;
         default:
             assert(false);
-            __builtin_unreachable();
+            roaring_unreachable;
             return NULL;
     }
 }
diff --git a/src/containers/convert.c b/src/containers/convert.c
index 4c9db7238..ec3b94ceb 100644
--- a/src/containers/convert.c
+++ b/src/containers/convert.c
@@ -290,7 +290,7 @@ container_t *convert_run_optimize(
                 return answer;
             }
 
-            int local_run_start = __builtin_ctzll(cur_word);
+            int local_run_start = roaring_trailing_zeroes(cur_word);
             int run_start = local_run_start + 64 * long_ctr;
             uint64_t cur_word_with_1s = cur_word | (cur_word - 1);
 
@@ -306,7 +306,7 @@ container_t *convert_run_optimize(
                 *typecode_after = RUN_CONTAINER_TYPE;
                 return answer;
             }
-            int local_run_end = __builtin_ctzll(~cur_word_with_1s);
+            int local_run_end = roaring_trailing_zeroes(~cur_word_with_1s);
             run_end = local_run_end + long_ctr * 64;
             add_run(answer, run_start, run_end - 1);
             cur_word = cur_word_with_1s & (cur_word_with_1s + 1);
@@ -314,7 +314,7 @@ container_t *convert_run_optimize(
         return answer;
     } else {
         assert(false);
-        __builtin_unreachable();
+        roaring_unreachable;
         return NULL;
     }
 }
diff --git a/src/containers/mixed_equal.c b/src/containers/mixed_equal.c
index fdf4c2517..120d900b6 100644
--- a/src/containers/mixed_equal.c
+++ b/src/containers/mixed_equal.c
@@ -16,7 +16,7 @@ bool array_container_equal_bitset(const array_container_t* container1,
         uint64_t w = container2->words[i];
         while (w != 0) {
             uint64_t t = w & (~w + 1);
-            uint16_t r = i * 64 + __builtin_ctzll(w);
+            uint16_t r = i * 64 + roaring_trailing_zeroes(w);
             if (pos >= container1->cardinality) {
                 return false;
             }
diff --git a/src/containers/mixed_subset.c b/src/containers/mixed_subset.c
index 86fe098f5..695d49731 100644
--- a/src/containers/mixed_subset.c
+++ b/src/containers/mixed_subset.c
@@ -108,7 +108,7 @@ bool bitset_container_is_subset_run(const bitset_container_t* container1,
             uint32_t start = container2->runs[i_run].value;
             uint32_t stop = start + container2->runs[i_run].length;
             uint64_t t = w & (~w + 1);
-            uint16_t r = i_bitset * 64 + __builtin_ctzll(w);
+            uint16_t r = i_bitset * 64 + roaring_trailing_zeroes(w);
             if (r < start) {
                 return false;
             } else if (r > stop) {
diff --git a/src/roaring.c b/src/roaring.c
index d75fb5cb7..6557ff45a 100644
--- a/src/roaring.c
+++ b/src/roaring.c
@@ -419,7 +419,7 @@ void roaring_bitmap_statistics(const roaring_bitmap_t *r,
                 break;
             default:
                 assert(false);
-                __builtin_unreachable();
+                roaring_unreachable;
         }
     }
 }
@@ -454,6 +454,7 @@ bool roaring_bitmap_overwrite(roaring_bitmap_t *dest,
 }
 
 void roaring_bitmap_free(const roaring_bitmap_t *r) {
+    if(r == NULL) { return; }
     if (!is_frozen(r)) {
       ra_clear((roaring_array_t*)&r->high_low_container);
     }
@@ -1558,7 +1559,7 @@ static bool loadfirstvalue(roaring_uint32_iterator_t *newit) {
                 wordindex++;  // advance
             }
             // here "word" is non-zero
-            newit->in_container_index = wordindex * 64 + __builtin_ctzll(word);
+            newit->in_container_index = wordindex * 64 + roaring_trailing_zeroes(word);
             newit->current_value = newit->highbits | newit->in_container_index;
             break; }
 
@@ -1591,7 +1592,7 @@ static bool loadlastvalue(roaring_uint32_iterator_t* newit) {
             while ((word = bitset_container->words[wordindex]) == 0)
                 --wordindex;
 
-            int num_leading_zeros = __builtin_clzll(word);
+            int num_leading_zeros = roaring_leading_zeroes(word);
             newit->in_container_index = (wordindex * 64) + (63 - num_leading_zeros);
             newit->current_value = newit->highbits | newit->in_container_index;
             break;
@@ -1650,7 +1651,7 @@ static bool loadfirstvalue_largeorequal(roaring_uint32_iterator_t *newit, uint32
             break; }
 
         default:
-            __builtin_unreachable();
+            roaring_unreachable;
     }
 
     return true;
@@ -1734,7 +1735,7 @@ bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it) {
                 word = bc->words[wordindex];
             }
             if (word != 0) {
-                it->in_container_index = wordindex * 64 + __builtin_ctzll(word);
+                it->in_container_index = wordindex * 64 + roaring_trailing_zeroes(word);
                 it->current_value = it->highbits | it->in_container_index;
                 return (it->has_value = true);
             }
@@ -1771,7 +1772,7 @@ bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it) {
         }
 
         default:
-            __builtin_unreachable();
+            roaring_unreachable;
     }
 
     // moving to next container
@@ -1803,7 +1804,7 @@ bool roaring_previous_uint32_iterator(roaring_uint32_iterator_t *it) {
             if (word == 0)
                 break;
 
-            int num_leading_zeros = __builtin_clzll(word);
+            int num_leading_zeros = roaring_leading_zeroes(word);
             it->in_container_index = (wordindex * 64) + (63 - num_leading_zeros);
             it->current_value = it->highbits | it->in_container_index;
             return (it->has_value = true);
@@ -1859,7 +1860,7 @@ uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it, uint32_t* b
         word = bcont->words[wordindex] & (UINT64_MAX << (it->in_container_index % 64));
         do {
           while (word != 0 && ret < count) {
-            buf[0] = it->highbits | (wordindex * 64 + __builtin_ctzll(word));
+            buf[0] = it->highbits | (wordindex * 64 + roaring_trailing_zeroes(word));
             word = word & (word - 1);
             buf++;
             ret++;
@@ -1871,7 +1872,7 @@ uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it, uint32_t* b
         } while (word != 0 && ret < count);
         it->has_value = (word != 0);
         if (it->has_value) {
-          it->in_container_index = wordindex * 64 + __builtin_ctzll(word);
+          it->in_container_index = wordindex * 64 + roaring_trailing_zeroes(word);
           it->current_value = it->highbits | it->in_container_index;
         }
         break;
@@ -2965,7 +2966,7 @@ size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *rb) {
                 break;
             }
             default:
-                __builtin_unreachable();
+                roaring_unreachable;
         }
     }
     num_bytes += (2 + 2 + 1) * ra->size; // keys, counts, typecodes
@@ -3009,7 +3010,7 @@ void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *rb, char *buf) {
                 break;
             }
             default:
-                __builtin_unreachable();
+                roaring_unreachable;
         }
     }
 
@@ -3055,7 +3056,7 @@ void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *rb, char *buf) {
                 break;
             }
             default:
-                __builtin_unreachable();
+                roaring_unreachable;
         }
         memcpy(&count_zone[i], &count, 2);
     }
@@ -3347,6 +3348,49 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf) {
     return rb;
 }
 
+bool roaring_bitmap_to_bitset(const roaring_bitmap_t *r, bitset_t * bitset) {
+    uint32_t max_value = roaring_bitmap_maximum(r);
+    size_t new_array_size = (size_t)(((uint64_t)max_value + 63)/64);
+    bool resize_ok = bitset_resize(bitset, new_array_size, true);
+    if(!resize_ok) { return false; }
+    const roaring_array_t *ra = &r->high_low_container;
+    for (int i = 0; i < ra->size; ++i) {
+        uint64_t* words = bitset->array + (ra->keys[i]<<10);
+        uint8_t type = ra->typecodes[i];
+        const container_t *c = ra->containers[i];
+        if(type == SHARED_CONTAINER_TYPE) {
+            c = container_unwrap_shared(c, &type);
+        }
+        switch (type) {
+          case BITSET_CONTAINER_TYPE:
+          {
+            size_t max_word_index = new_array_size - (ra->keys[i]<<10);
+            if(max_word_index > 1024) { max_word_index = 1024; }
+            const bitset_container_t *src = const_CAST_bitset(c);
+            memcpy(words, src->words, max_word_index * sizeof(uint64_t));
+          }
+          break;
+          case ARRAY_CONTAINER_TYPE:
+          {
+            const array_container_t *src = const_CAST_array(c);
+            bitset_set_list(words, src->array, src->cardinality);
+          }
+          break;
+          case RUN_CONTAINER_TYPE:
+          {
+            const run_container_t *src = const_CAST_run(c);
+            for (int32_t rlepos = 0; rlepos < src->n_runs; ++rlepos) {
+                rle16_t rle = src->runs[rlepos];
+                bitset_set_lenrange(words, rle.value, rle.length);
+            }
+          }
+          break;
+          default:
+          roaring_unreachable;
+        }
+    }
+    return true;
+}
 
 #ifdef __cplusplus
 } } }  // extern "C" { namespace roaring {
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 724c0047c..41fe0e6c2 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -9,6 +9,7 @@ add_cpp_test(cpp_random_unit)
 add_cpp_test(cpp_example1)
 add_cpp_test(cpp_example2)
 add_c_test(c_example1)
+add_c_test(cbitset_unit)
 add_c_test(array_container_unit)
 add_c_test(bitset_container_unit)
 add_c_test(mixed_container_unit)
diff --git a/tests/bitset_container_unit.c b/tests/bitset_container_unit.c
index 64d752d5b..b92279893 100644
--- a/tests/bitset_container_unit.c
+++ b/tests/bitset_container_unit.c
@@ -17,6 +17,17 @@
 
 #include "test.h"
 
+DEFINE_TEST(hamming_test) {
+  assert_true(roaring_hamming(0xffffffffffffffffULL) == 64);
+  for(int k = 0; k < 64; k++) {
+    assert_true(roaring_hamming(1ULL<<k) == 1);
+  }
+  for(int k = 0; k < 64; k++) {
+    for(int l = 0; l < 64; l++) {
+       assert_true(roaring_hamming((1ULL<<k)|(1ULL<<l)) == 2-(k==l));
+    }
+  }
+}
 
 DEFINE_TEST(test_bitset_lenrange_cardinality) {
   uint64_t words[] = {~UINT64_C(0), ~UINT64_C(0), ~UINT64_C(0), ~UINT64_C(0), 0, 0, 0, 0};
@@ -93,34 +104,81 @@ DEFINE_TEST(and_or_test) {
     bitset_container_t* B2 = bitset_container_create();
     bitset_container_t* BI = bitset_container_create();
     bitset_container_t* BO = bitset_container_create();
+    assert_true(bitset_container_compute_cardinality(B1) == 0);
+    assert_true(bitset_container_compute_cardinality(B2) == 0);
+    assert_true(bitset_container_compute_cardinality(BI) == 0);
+    assert_true(bitset_container_compute_cardinality(BO) == 0);
 
     assert_non_null(B1);
     assert_non_null(B2);
     assert_non_null(BI);
     assert_non_null(BO);
 
-    for (size_t x = 0; x < (1 << 16); x += 3) {
+    size_t max_value = 60000;
+
+    size_t b1_count = 0;
+    size_t bi_count = 0;
+    for (size_t x = 0; x < max_value; x += 3) {
         bitset_container_set(B1, x);
         bitset_container_set(BI, x);
+        b1_count++;
+        bi_count++;
+    }
+    for (size_t x = 0; x < max_value; x += 3) {
+        assert_true(bitset_container_get(B1, x));
+        assert_true(bitset_container_get(BI, x));
     }
 
+    assert_true(bitset_container_compute_cardinality(B1) == b1_count);
+    assert_true(bitset_container_compute_cardinality(BI) == bi_count);
+
+    size_t b2_count = 0;
     // important: 62 is not divisible by 3
-    for (size_t x = 0; x < (1 << 16); x += 62) {
+    for (size_t x = 0; x < max_value; x += 62) {
+        bi_count += !bitset_container_get(BI, x);
+
         bitset_container_set(B2, x);
         bitset_container_set(BI, x);
+        b2_count++;
     }
 
-    for (size_t x = 0; x < (1 << 16); x += 62 * 3) {
+    assert_true(bitset_container_compute_cardinality(B2) == b2_count);
+    assert_true(bitset_container_compute_cardinality(BI) == bi_count);
+    size_t bo_count = 0;
+    for (size_t x = 0; x < max_value; x += 62 * 3) {
         bitset_container_set(BO, x);
+        bo_count++;
     }
 
+    assert_true(bitset_container_compute_cardinality(BO) == bo_count);
+    assert_true(bitset_container_compute_cardinality(BI) == bi_count);
     const int card_union = bitset_container_compute_cardinality(BI);
     const int card_inter = bitset_container_compute_cardinality(BO);
-
-    bitset_container_and_nocard(B1, B2, BI);
+    assert_true(bitset_container_compute_cardinality(BI) == card_union);
+    assert_true(bi_count == card_union);
+    assert_true(bitset_container_compute_cardinality(BO) == bo_count);
+    assert_true(bitset_container_compute_cardinality(BO) == bitset_container_compute_cardinality(BO));
+    assert_true(card_inter == bo_count);
+    bitset_container_printf(B1);  // does it crash?
+    bitset_container_printf(B2);  // does it crash?
+    bitset_container_printf(BI);  // does it crash?
+
+    bitset_container_andnot_nocard(B1, B2, BI);
+
+    bitset_container_printf(B1);  // does it crash?
+    bitset_container_printf(B2);  // does it crash?
+    bitset_container_printf(BI);  // does it crash?
+    size_t interc = 0;
+    for (size_t x = 0; x < max_value; x ++) {
+        bool in1 = bitset_container_get(B1, x);
+        bool in2 = bitset_container_get(B2, x);
+        bool ini = bitset_container_get(BI, x);
+        assert_true(ini == (in1 & !in2));
+        interc += ini;
+    }
+    assert_true(bitset_container_compute_cardinality(BI) == interc);
     assert_int_not_equal(bitset_container_compute_cardinality(BI), card_union);
     assert_int_not_equal(bitset_container_and(B1, B2, BI), card_union);
-
     bitset_container_or_nocard(B1, B2, BO);
     assert_int_not_equal(bitset_container_compute_cardinality(BO), card_inter);
     assert_int_not_equal(bitset_container_or(B1, B2, BO), card_inter);
@@ -255,6 +313,7 @@ DEFINE_TEST(select_test) {
 
 int main() {
     const struct CMUnitTest tests[] = {
+        cmocka_unit_test(hamming_test),
         cmocka_unit_test(test_bitset_lenrange_cardinality),
         cmocka_unit_test(printf_test), cmocka_unit_test(set_get_test),
         cmocka_unit_test(and_or_test), cmocka_unit_test(xor_test),
diff --git a/tests/cbitset_unit.c b/tests/cbitset_unit.c
new file mode 100644
index 000000000..512854410
--- /dev/null
+++ b/tests/cbitset_unit.c
@@ -0,0 +1,277 @@
+#include <assert.h>
+#include <roaring/bitset/bitset.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "test.h"
+
+int compute_cardinality(bitset_t *b) {
+    size_t k = 0;
+    for (size_t i = 0; bitset_next_set_bit(b, &i); i++) {
+        k += 1;
+    }
+    return k;
+}
+
+void test_iterate() {
+    bitset_t *b = bitset_create();
+    for (int k = 0; k < 1000; ++k) bitset_set(b, 3 * k);
+    assert_true(bitset_count(b) == 1000);
+    assert_true(compute_cardinality(b) == 1000);
+    size_t k = 0;
+    for (size_t i = 0; bitset_next_set_bit(b, &i); i++) {
+        assert_true(i == k);
+        k += 3;
+    }
+    assert_true(k == 3000);
+    bitset_free(b);
+}
+
+bool increment(size_t value, void *param) {
+    size_t k;
+    memcpy(&k, param, sizeof(size_t));
+    assert_true(value == k);
+    k += 3;
+    memcpy(param, &k, sizeof(size_t));
+    return true;
+}
+
+void test_iterate2() {
+    bitset_t *b = bitset_create();
+    for (int k = 0; k < 1000; ++k) bitset_set(b, 3 * k);
+    assert_true(compute_cardinality(b) == 1000);
+    assert_true(bitset_count(b) == 1000);
+    size_t k = 0;
+    bitset_for_each(b, increment, &k);
+    assert_true(k == 3000);
+    bitset_free(b);
+}
+
+void test_construct() {
+    bitset_t *b = bitset_create();
+    for (int k = 0; k < 1000; ++k) bitset_set(b, 3 * k);
+    assert_true(compute_cardinality(b) == 1000);
+    assert_true(bitset_count(b) == 1000);
+    for (int k = 0; k < 3 * 1000; ++k)
+        assert_true(bitset_get(b, k) == (k / 3 * 3 == k));
+    bitset_free(b);
+}
+
+void test_max_min() {
+    bitset_t *b = bitset_create();
+    for (size_t k = 100; k < 1000; ++k) {
+        bitset_set(b, 3 * k);
+        assert_true(bitset_minimum(b) == 3 * 100);
+        assert_true(bitset_maximum(b) == 3 * k);
+    }
+    bitset_free(b);
+}
+
+void test_shift_left() {
+    for (size_t sh = 0; sh < 256; sh++) {
+        bitset_t *b = bitset_create();
+        int power = 3;
+        size_t s1 = 100;
+        size_t s2 = 5000;
+        for (size_t k = s1; k < s2; ++k) {
+            bitset_set(b, power * k);
+        }
+        size_t mycount = bitset_count(b);
+        assert_true(compute_cardinality(b) == mycount);
+        bitset_shift_left(b, sh);
+        assert_true(bitset_count(b) == mycount);
+        assert_true(compute_cardinality(b) == mycount);
+        for (size_t k = s1; k < s2; ++k) {
+            assert_true(bitset_get(b, power * k + sh));
+        }
+        bitset_free(b);
+    }
+}
+
+void test_set_to_val() {
+    bitset_t *b = bitset_create();
+    bitset_set_to_value(b, 1, true);
+    bitset_set_to_value(b, 1, false);
+    bitset_set_to_value(b, 10, false);
+    bitset_set_to_value(b, 10, true);
+    assert_true(bitset_get(b, 10));
+    assert_true(!bitset_get(b, 1));
+    bitset_free(b);
+}
+
+void test_shift_right() {
+    for (size_t sh = 0; sh < 256; sh++) {
+        bitset_t *b = bitset_create();
+        int power = 3;
+        size_t s1 = 100 + sh;
+        size_t s2 = s1 + 5000;
+        for (size_t k = s1; k < s2; ++k) {
+            bitset_set(b, power * k);
+        }
+        size_t mycount = bitset_count(b);
+        bitset_shift_right(b, sh);
+        assert_true(bitset_count(b) == mycount);
+        for (size_t k = s1; k < s2; ++k) {
+            assert_true(bitset_get(b, power * k - sh));
+        }
+        bitset_free(b);
+    }
+}
+
+void test_union_intersection() {
+    bitset_t *b1 = bitset_create();
+    bitset_t *b2 = bitset_create();
+
+    for (int k = 0; k < 1000; ++k) {
+        bitset_set(b1, 2 * k);
+        bitset_set(b2, 2 * k + 1);
+    }
+    // calling xor twice should leave things unchanged
+    bitset_inplace_symmetric_difference(b1, b2);
+    assert_true(bitset_count(b1) == 2000);
+    bitset_inplace_symmetric_difference(b1, b2);
+    assert_true(bitset_count(b1) == 1000);
+    bitset_inplace_difference(b1, b2);  // should make no difference
+    assert_true(bitset_count(b1) == 1000);
+    bitset_inplace_union(b1, b2);
+    assert_true(bitset_count(b1) == 2000);
+    bitset_inplace_intersection(b1, b2);
+    assert_true(bitset_count(b1) == 1000);
+    bitset_inplace_difference(b1, b2);
+    assert_true(bitset_count(b1) == 0);
+    bitset_inplace_union(b1, b2);
+    bitset_inplace_difference(b2, b1);
+    assert_true(bitset_count(b2) == 0);
+    bitset_free(b1);
+    bitset_free(b2);
+}
+
+void test_counts() {
+    bitset_t *b1 = bitset_create();
+    bitset_t *b2 = bitset_create();
+
+    for (int k = 0; k < 1000; ++k) {
+        bitset_set(b1, 2 * k);
+        bitset_set(b2, 3 * k);
+    }
+    assert_true(bitset_intersection_count(b1, b2) == 334);
+    assert_true(bitset_union_count(b1, b2) == 1666);
+    bitset_free(b1);
+    bitset_free(b2);
+}
+
+/* Creates 2 bitsets, one containing even numbers the other odds.
+Checks bitsets_disjoint() returns that they are disjoint, then sets a common
+bit between both sets and checks that they are no longer disjoint. */
+void test_disjoint() {
+    bitset_t *evens = bitset_create();
+    bitset_t *odds = bitset_create();
+
+    for (int i = 0; i < 1000; i++) {
+        if (i % 2 == 0)
+            bitset_set(evens, i);
+        else
+            bitset_set(odds, i);
+    }
+
+    assert_true(bitsets_disjoint(evens, odds));
+
+    bitset_set(evens, 501);
+    bitset_set(odds, 501);
+
+    assert_true(!bitsets_disjoint(evens, odds));
+
+    bitset_free(evens);
+    bitset_free(odds);
+}
+
+/* Creates 2 bitsets, one containing even numbers the other odds.
+Checks that bitsets_intersect() returns that they do not intersect, then sets
+a common bit and checks that they now intersect. */
+void test_intersects() {
+    bitset_t *evens = bitset_create();
+    bitset_t *odds = bitset_create();
+
+    for (int i = 0; i < 1000; i++) {
+        if (i % 2 == 0)
+            bitset_set(evens, i);
+        else
+            bitset_set(odds, i);
+    }
+
+    assert_true(!bitsets_intersect(evens, odds));
+
+    bitset_set(evens, 1001);
+    bitset_set(odds, 1001);
+
+    assert_true(bitsets_intersect(evens, odds));
+
+    bitset_free(evens);
+    bitset_free(odds);
+}
+/* Create 2 bitsets with different capacity, where the bigger superset
+contains the subset bits plus additional bits after the subset arraysize.
+Checks that the bitset_contains_all() returns false when checking if
+the superset contains all the subset bits, and true in the opposite case. */
+void test_contains_all_different_sizes() {
+    const size_t superset_size = 10;
+    const size_t subset_size = 5;
+
+    bitset_t *superset = bitset_create_with_capacity(superset_size);
+    bitset_t *subset = bitset_create_with_capacity(subset_size);
+
+    bitset_set(superset, 1);
+    bitset_set(superset, subset_size - 1);
+    bitset_set(superset, subset_size + 1);
+
+    bitset_set(subset, 1);
+    bitset_set(subset, subset_size - 1);
+
+    assert_true(bitset_contains_all(superset, subset));
+    assert_true(!bitset_contains_all(subset, superset));
+
+    bitset_free(superset);
+    bitset_free(subset);
+}
+
+/* Creates 2 bitsets, one with all bits from 0->1000 set, the other with only
+even bits set in the same range. Checks that the bitset_contains_all()
+returns true, then sets a single bit at 1001 in the prior subset and checks that
+bitset_contains_all() returns false. */
+void test_contains_all() {
+    bitset_t *superset = bitset_create();
+    bitset_t *subset = bitset_create();
+
+    for (int i = 0; i < 1000; i++) {
+        bitset_set(superset, i);
+        if (i % 2 == 0) bitset_set(subset, i);
+    }
+
+    assert_true(bitset_contains_all(superset, subset));
+    assert_true(!bitset_contains_all(subset, superset));
+
+    bitset_set(subset, 1001);
+
+    assert_true(!bitset_contains_all(superset, subset));
+    assert_true(!bitset_contains_all(subset, superset));
+
+    bitset_free(superset);
+    bitset_free(subset);
+}
+
+int main() {
+    test_set_to_val();
+    test_construct();
+    test_union_intersection();
+    test_iterate();
+    test_iterate2();
+    test_max_min();
+    test_counts();
+    test_shift_right();
+    test_shift_left();
+    test_disjoint();
+    test_intersects();
+    test_contains_all();
+    test_contains_all_different_sizes();
+    printf("All asserts passed. Code is probably ok.\n");
+}
diff --git a/tests/container_comparison_unit.c b/tests/container_comparison_unit.c
index 4e8d05165..4f3240ea7 100644
--- a/tests/container_comparison_unit.c
+++ b/tests/container_comparison_unit.c
@@ -44,7 +44,7 @@ static inline void delegated_add(container_t *container, uint8_t typecode,
             break;
         default:
             assert(false);
-            __builtin_unreachable();
+            roaring_unreachable;
     }
 }
 
@@ -62,7 +62,7 @@ static inline container_t *container_create(uint8_t typecode) {
             break;
         default:
             assert(false);
-            __builtin_unreachable();
+            roaring_unreachable;
     }
     assert_non_null(result);
     return result;
diff --git a/tests/toplevel_unit.c b/tests/toplevel_unit.c
index aab8ae5df..5b3aac343 100644
--- a/tests/toplevel_unit.c
+++ b/tests/toplevel_unit.c
@@ -4411,10 +4411,36 @@ DEFINE_TEST(test_portable_deserialize_frozen) {
     free(serialized);
 }
 
+DEFINE_TEST(convert_to_bitset) {
+    roaring_bitmap_t *r1 = roaring_bitmap_create();
+    for (uint32_t i = 100; i < 100000; i+= 1 + (i%5)) {
+     roaring_bitmap_add(r1, i);
+    }
+    for (uint32_t i = 100000; i < 500000; i+= 100) {
+     roaring_bitmap_add(r1, i);
+    }
+    roaring_bitmap_add_range(r1, 500000, 600000);
+    bitset_t * bitset = bitset_create();
+    bool success = roaring_bitmap_to_bitset(r1, bitset);
+    assert_true(success); // could fail due to memory allocation.
+    assert_true(bitset_count(bitset) == roaring_bitmap_get_cardinality(r1));
+    // You can then query the bitset:
+    for (uint32_t i = 100; i < 100000; i+= 1 + (i%5)) {
+        assert_true(bitset_get(bitset,i));
+    }
+    for (uint32_t i = 100000; i < 500000; i+= 100) {
+        assert_true(bitset_get(bitset,i));
+    }
+    // you must free the memory:
+    bitset_free(bitset);
+    roaring_bitmap_free(r1);
+}
+
 int main() {
     tellmeall();
 
     const struct CMUnitTest tests[] = {
+        cmocka_unit_test(convert_to_bitset),
         cmocka_unit_test(issue440),
         cmocka_unit_test(issue436),
         cmocka_unit_test(issue433),

From eac38a09861bbf5893f3d7b24b53c0a48bfec000 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Sun, 26 Mar 2023 07:27:18 -0400
Subject: [PATCH 097/162] Releasing

---
 CMakeLists.txt                    | 10 +++++-----
 doxygen                           |  2 +-
 include/roaring/roaring_version.h |  8 ++++----
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index fba717e4b..8f78b74c5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -16,11 +16,11 @@ if(CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_C_COMPILER_VERSION VERSIO
     message(FATAL_ERROR "${PROJECT_NAME} requires at least apple-clang version 11 to support runtime dispatching.")
 endif()
 set(ROARING_LIB_NAME roaring)
-set(PROJECT_VERSION_MAJOR 0)
-set(PROJECT_VERSION_MINOR 9)
-set(PROJECT_VERSION_PATCH 9)
-set(ROARING_LIB_VERSION "0.9.9" CACHE STRING "Roaring library version")
-set(ROARING_LIB_SOVERSION "7" CACHE STRING "Roaring library soversion")
+set(PROJECT_VERSION_MAJOR 1)
+set(PROJECT_VERSION_MINOR 0)
+set(PROJECT_VERSION_PATCH 0)
+set(ROARING_LIB_VERSION "1.0.0" CACHE STRING "Roaring library version")
+set(ROARING_LIB_SOVERSION "8" CACHE STRING "Roaring library soversion")
 
 option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON)
 if(NOT ROARING_EXCEPTIONS)
diff --git a/doxygen b/doxygen
index c2d59286f..8f7e77eec 100644
--- a/doxygen
+++ b/doxygen
@@ -48,7 +48,7 @@ PROJECT_NAME           = "CRoaring"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "0.9.9"
+PROJECT_NUMBER         = "1.0.0"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/include/roaring/roaring_version.h b/include/roaring/roaring_version.h
index 25a5632d4..1daf016a3 100644
--- a/include/roaring/roaring_version.h
+++ b/include/roaring/roaring_version.h
@@ -1,10 +1,10 @@
 // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand 
 #ifndef ROARING_INCLUDE_ROARING_VERSION 
 #define ROARING_INCLUDE_ROARING_VERSION 
-#define ROARING_VERSION "0.9.9"
+#define ROARING_VERSION "1.0.0"
 enum { 
-    ROARING_VERSION_MAJOR = 0,
-    ROARING_VERSION_MINOR = 9,
-    ROARING_VERSION_REVISION = 9
+    ROARING_VERSION_MAJOR = 1,
+    ROARING_VERSION_MINOR = 0,
+    ROARING_VERSION_REVISION = 0
 }; 
 #endif // ROARING_INCLUDE_ROARING_VERSION 

From 7bd53cee9cbf877da6c1b24ff4fec290e387d5e3 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Mon, 27 Mar 2023 08:00:54 -0400
Subject: [PATCH 098/162] Adding more comments.

---
 src/containers/mixed_union.c | 22 ++++++++++++++++++++++
 src/containers/mixed_xor.c   | 14 +++++++++++++-
 2 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/src/containers/mixed_union.c b/src/containers/mixed_union.c
index 2af172375..5caf3b993 100644
--- a/src/containers/mixed_union.c
+++ b/src/containers/mixed_union.c
@@ -242,6 +242,17 @@ bool array_array_container_lazy_union(
     container_t **dst
 ){
     int totalCardinality = src_1->cardinality + src_2->cardinality;
+    //
+    // We assume that operations involving bitset containers will be faster than
+    // operations involving solely array containers, except maybe when array containers
+    // are small. Indeed, for example, it is cheap to compute the union between an array and
+    // a bitset container, generally more so than between a large array and another array.
+    // So it is advantageous to favour bitset containers during the computation.
+    // Of course, if we convert array containers eagerly to bitset containers, we may later
+    // need to revert the bitset containers to array containerr to satisfy the Roaring format requirements,
+    // but such one-time conversions at the end may not be overly expensive. We arrived to this design
+    // based on extensive benchmarking.
+    //
     if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) {
         *dst = array_container_create_given_capacity(totalCardinality);
         if (*dst != NULL) {
@@ -269,6 +280,17 @@ bool array_array_container_lazy_inplace_union(
 ){
     int totalCardinality = src_1->cardinality + src_2->cardinality;
     *dst = NULL;
+    //
+    // We assume that operations involving bitset containers will be faster than
+    // operations involving solely array containers, except maybe when array containers
+    // are small. Indeed, for example, it is cheap to compute the union between an array and
+    // a bitset container, generally more so than between a large array and another array.
+    // So it is advantageous to favour bitset containers during the computation.
+    // Of course, if we convert array containers eagerly to bitset containers, we may later
+    // need to revert the bitset containers to array containerr to satisfy the Roaring format requirements,
+    // but such one-time conversions at the end may not be overly expensive. We arrived to this design
+    // based on extensive benchmarking.
+    //
     if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) {
         if(src_1->capacity < totalCardinality) {
           *dst = array_container_create_given_capacity(2  * totalCardinality); // be purposefully generous
diff --git a/src/containers/mixed_xor.c b/src/containers/mixed_xor.c
index d9cacc7ab..0d77d61b2 100644
--- a/src/containers/mixed_xor.c
+++ b/src/containers/mixed_xor.c
@@ -230,7 +230,19 @@ bool array_array_container_lazy_xor(
     container_t **dst
 ){
     int totalCardinality = src_1->cardinality + src_2->cardinality;
-    // upper bound, but probably poor estimate for xor
+    //
+    // We assume that operations involving bitset containers will be faster than
+    // operations involving solely array containers, except maybe when array containers
+    // are small. Indeed, for example, it is cheap to compute the exclusive union between an array and
+    // a bitset container, generally more so than between a large array and another array.
+    // So it is advantageous to favour bitset containers during the computation.
+    // Of course, if we convert array containers eagerly to bitset containers, we may later
+    // need to revert the bitset containers to array containerr to satisfy the Roaring format requirements,
+    // but such one-time conversions at the end may not be overly expensive. We arrived to this design
+    // based on extensive benchmarking on unions.
+    // For XOR/exclusive union, we simply followed the heuristic used by the unions (see  mixed_union.c).
+    // Further tuning is possible.
+    //
     if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) {
         *dst = array_container_create_given_capacity(totalCardinality);
         if (*dst != NULL)

From ff08b575b9bda7e76ca87456931bdf0429e60baf Mon Sep 17 00:00:00 2001
From: Chen Tianjie <chentianjie.ctj@alibaba-inc.com>
Date: Wed, 29 Mar 2023 00:33:29 +0800
Subject: [PATCH 099/162] Use fast union instead of ordinary union. (#452)

---
 src/containers/mixed_union.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/containers/mixed_union.c b/src/containers/mixed_union.c
index 5caf3b993..912596e9f 100644
--- a/src/containers/mixed_union.c
+++ b/src/containers/mixed_union.c
@@ -206,7 +206,7 @@ bool array_array_container_inplace_union(
           return false;  // not a bitset
         } else {
           memmove(src_1->array + src_2->cardinality, src_1->array, src_1->cardinality * sizeof(uint16_t));
-          src_1->cardinality = (int32_t)union_uint16(src_1->array + src_2->cardinality, src_1->cardinality,
+          src_1->cardinality = (int32_t)fast_union_uint16(src_1->array + src_2->cardinality, src_1->cardinality,
                                   src_2->array, src_2->cardinality, src_1->array);
           return false; // not a bitset
         }
@@ -302,7 +302,7 @@ bool array_array_container_lazy_inplace_union(
           return false;  // not a bitset
         } else {
           memmove(src_1->array + src_2->cardinality, src_1->array, src_1->cardinality * sizeof(uint16_t));
-          src_1->cardinality = (int32_t)union_uint16(src_1->array + src_2->cardinality, src_1->cardinality,
+          src_1->cardinality = (int32_t)fast_union_uint16(src_1->array + src_2->cardinality, src_1->cardinality,
                                   src_2->array, src_2->cardinality, src_1->array);
           return false; // not a bitset
         }

From b36a210ddad33af5dc05fe7ad9e0c1ac6f8ee974 Mon Sep 17 00:00:00 2001
From: longqimin <long2008920@163.com>
Date: Wed, 29 Mar 2023 00:35:00 +0800
Subject: [PATCH 100/162] fix potential unterminated CROARING_TARGET_REGION
 (#456)

- bug case: `CROARING_TARGET_AVX512` is unterminated when `defined(__AVX2__) and !defined(__AVX512VBMI2__)`
- fixes as:
  - termiate CROARING_TARGET_AVX2 with CROARING_UNTARGET_AVX2. the same to CROARING_TARGET_AVX512/CROARING_UNTARGET_AVX512.
  - define/undef CROARING_(UN)TARGET_{region} pair members at the same place.
---
 include/roaring/bitset_util.h |  22 ++---
 include/roaring/portability.h |  11 ++-
 src/array_util.c              |  20 ++---
 src/bitset_util.c             |   6 +-
 src/containers/bitset.c       | 148 +++++++++++++++++-----------------
 src/containers/run.c          |   4 +-
 6 files changed, 107 insertions(+), 104 deletions(-)

diff --git a/include/roaring/bitset_util.h b/include/roaring/bitset_util.h
index 650f2566a..02b5bdd3f 100644
--- a/include/roaring/bitset_util.h
+++ b/include/roaring/bitset_util.h
@@ -294,7 +294,7 @@ static inline __m256i popcount256(__m256i v) {
     const __m256i popcnt2 = _mm256_shuffle_epi8(lookupneg, hi);
     return _mm256_sad_epu8(popcnt1, popcnt2);
 }
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
 
 CROARING_TARGET_AVX2
 /**
@@ -306,7 +306,7 @@ static inline void CSA(__m256i *h, __m256i *l, __m256i a, __m256i b,
     *h = _mm256_or_si256(_mm256_and_si256(a, b), _mm256_and_si256(u, c));
     *l = _mm256_xor_si256(u, c);
 }
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
 
 CROARING_TARGET_AVX2
 /**
@@ -370,7 +370,7 @@ inline static uint64_t avx2_harley_seal_popcount256(const __m256i *data,
            (uint64_t)(_mm256_extract_epi64(total, 2)) +
            (uint64_t)(_mm256_extract_epi64(total, 3));
 }
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
 
 #define AVXPOPCNTFNC(opname, avx_intrinsic)                                    \
     static inline uint64_t avx2_harley_seal_popcount256_##opname(              \
@@ -554,27 +554,27 @@ CROARING_UNTARGET_REGION
 
 CROARING_TARGET_AVX2
 AVXPOPCNTFNC(or, _mm256_or_si256)
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
 
 CROARING_TARGET_AVX2
 AVXPOPCNTFNC(union, _mm256_or_si256)
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
 
 CROARING_TARGET_AVX2
 AVXPOPCNTFNC(and, _mm256_and_si256)
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
 
 CROARING_TARGET_AVX2
 AVXPOPCNTFNC(intersection, _mm256_and_si256)
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
 
 CROARING_TARGET_AVX2
 AVXPOPCNTFNC (xor, _mm256_xor_si256)
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
 
 CROARING_TARGET_AVX2
 AVXPOPCNTFNC(andnot, _mm256_andnot_si256)
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
 
 
 #define VPOPCNT_AND_ADD(ptr, i, accu)   \
@@ -622,7 +622,7 @@ static inline uint64_t avx512_vpopcount(const __m512i* data, const uint64_t size
         
     return simd_sum_epu64(total);
 }
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX512
 #endif
 
 #define AVXPOPCNTFNC512(opname, avx_intrinsic)                                 \
@@ -693,7 +693,7 @@ AVXPOPCNTFNC512(and, _mm512_and_si512)
 AVXPOPCNTFNC512(intersection, _mm512_and_si512)
 AVXPOPCNTFNC512(xor, _mm512_xor_si512)
 AVXPOPCNTFNC512(andnot, _mm512_andnot_si512)
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX512
 #endif
 /***
  * END Harley-Seal popcount functions.
diff --git a/include/roaring/portability.h b/include/roaring/portability.h
index e973e6cea..a726b5ef4 100644
--- a/include/roaring/portability.h
+++ b/include/roaring/portability.h
@@ -333,16 +333,19 @@ static inline int roaring_hamming(uint64_t x) {
 #define CROARING_UNTARGET_REGION
 #endif
 
+
 #define CROARING_TARGET_AVX2 CROARING_TARGET_REGION("avx2,bmi,pclmul,lzcnt")
 #define CROARING_TARGET_AVX512 CROARING_TARGET_REGION("bmi2,avx512f,avx512dq,avx512bw,avx512vbmi2,avx512bitalg,avx512vpopcntdq")
+#define CROARING_UNTARGET_AVX2 CROARING_UNTARGET_REGION
+#define CROARING_UNTARGET_AVX512 CROARING_UNTARGET_REGION
 
 #ifdef __AVX2__
 // No need for runtime dispatching.
 // It is unnecessary and harmful to old clang to tag regions.
 #undef CROARING_TARGET_AVX2
 #define CROARING_TARGET_AVX2
-#undef CROARING_UNTARGET_REGION
-#define CROARING_UNTARGET_REGION
+#undef CROARING_UNTARGET_AVX2
+#define CROARING_UNTARGET_AVX2
 #endif
 
 #if defined(__AVX512F__) && defined(__AVX512DQ__) && defined(__AVX512BW__) && defined(__AVX512VBMI2__) && defined(__AVX512BITALG__) && defined(__AVX512VPOPCNTDQ__)
@@ -350,8 +353,8 @@ static inline int roaring_hamming(uint64_t x) {
 // It is unnecessary and harmful to old clang to tag regions.
 #undef CROARING_TARGET_AVX512
 #define CROARING_TARGET_AVX512
-#undef CROARING_UNTARGET_REGION
-#define CROARING_UNTARGET_REGION
+#undef CROARING_UNTARGET_AVX512
+#define CROARING_UNTARGET_AVX512
 #endif
 
 // Allow unaligned memory access
diff --git a/src/array_util.c b/src/array_util.c
index 9600ce1fd..cadb76821 100644
--- a/src/array_util.c
+++ b/src/array_util.c
@@ -536,7 +536,7 @@ int32_t intersect_vector16_inplace(uint16_t *__restrict__ A, size_t s_a,
     }
     return (int32_t)count;
 }
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
 
 CROARING_TARGET_AVX2
 int32_t intersect_vector16_cardinality(const uint16_t *__restrict__ A,
@@ -608,7 +608,7 @@ int32_t intersect_vector16_cardinality(const uint16_t *__restrict__ A,
     }
     return (int32_t)count;
 }
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
 
 CROARING_TARGET_AVX2
 /////////
@@ -752,7 +752,7 @@ int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a,
     }
     return count;
 }
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
 #endif  // CROARING_IS_X64
 
 
@@ -1241,7 +1241,7 @@ static inline void sse_merge(const __m128i *vInput1,
     *vecMax = _mm_max_epu16(vecTmp, *vecMax);
     *vecMin = _mm_alignr_epi8(*vecMin, *vecMin, 2);
 }
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
 // used by store_unique, generated by simdunion.py
 static uint8_t uniqshuf[] = {
     0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7,  0x8,  0x9,  0xa,  0xb,
@@ -1600,7 +1600,7 @@ static inline int store_unique(__m128i old, __m128i newval, uint16_t *output) {
     _mm_storeu_si128((__m128i *)output, val);
     return numberofnewvalues;
 }
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
 
 // working in-place, this function overwrites the repeated values
 // could be avoided?
@@ -1701,7 +1701,7 @@ uint32_t union_vector16(const uint16_t *__restrict__ array1, uint32_t length1,
     }
     return len;
 }
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
 
 /**
  * End of the SIMD 16-bit union code
@@ -1730,7 +1730,7 @@ static inline int store_unique_xor(__m128i old, __m128i newval,
     _mm_storeu_si128((__m128i *)output, val);
     return numberofnewvalues;
 }
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
 
 // working in-place, this function overwrites the repeated values
 // could be avoided? Warning: assumes len > 0
@@ -1848,7 +1848,7 @@ uint32_t xor_vector16(const uint16_t *__restrict__ array1, uint32_t length1,
     }
     return len;
 }
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
 /**
  * End of SIMD 16-bit XOR code
  */
@@ -2041,7 +2041,7 @@ static inline bool _avx512_memequals(const void *s1, const void *s2, size_t n) {
 
     return true;
 }
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX512
 #endif // CROARING_COMPILER_SUPPORTS_AVX512
 
 CROARING_TARGET_AVX2
@@ -2084,7 +2084,7 @@ static inline bool _avx2_memequals(const void *s1, const void *s2, size_t n) {
 
     return true;
 }
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
 #endif
 
 bool memequals(const void *s1, const void *s2, size_t n) {
diff --git a/src/bitset_util.c b/src/bitset_util.c
index 038728589..331c65620 100644
--- a/src/bitset_util.c
+++ b/src/bitset_util.c
@@ -661,7 +661,7 @@ size_t bitset_extract_setbits_avx512_uint16(const uint64_t *array, size_t length
 
     return out - initout;
 }
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX512
 #endif
 
 CROARING_TARGET_AVX2
@@ -716,7 +716,7 @@ size_t bitset_extract_setbits_avx2(const uint64_t *words, size_t length,
     }
     return out - initout;
 }
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
 #endif  // CROARING_IS_X64
 
 size_t bitset_extract_setbits(const uint64_t *words, size_t length,
@@ -820,7 +820,7 @@ size_t bitset_extract_setbits_sse_uint16(const uint64_t *words, size_t length,
     }
     return out - initout;
 }
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
 #endif
 
 /*
diff --git a/src/containers/bitset.c b/src/containers/bitset.c
index 81fd97707..a20741117 100644
--- a/src/containers/bitset.c
+++ b/src/containers/bitset.c
@@ -411,72 +411,72 @@ int bitset_container_compute_cardinality(const bitset_container_t *bitset) {
 // we duplicate the function because other containers use the "or" term, makes API more consistent
 #if CROARING_COMPILER_SUPPORTS_AVX512
 CROARING_TARGET_AVX512
-AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, or,    |, _mm512_or_si512, vorrq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, or,    |, _mm512_or_si512, vorrq_u64, CROARING_UNTARGET_AVX512)
+CROARING_UNTARGET_AVX512
 CROARING_TARGET_AVX512
-AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, union, |, _mm512_or_si512, vorrq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, union, |, _mm512_or_si512, vorrq_u64, CROARING_UNTARGET_AVX512)
+CROARING_UNTARGET_AVX512
 
 // we duplicate the function because other containers use the "intersection" term, makes API more consistent
 CROARING_TARGET_AVX512
-AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, and,          &, _mm512_and_si512, vandq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, and,          &, _mm512_and_si512, vandq_u64, CROARING_UNTARGET_AVX512)
+CROARING_UNTARGET_AVX512
 CROARING_TARGET_AVX512
-AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, intersection, &, _mm512_and_si512, vandq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, intersection, &, _mm512_and_si512, vandq_u64, CROARING_UNTARGET_AVX512)
+CROARING_UNTARGET_AVX512
 
 CROARING_TARGET_AVX512
-AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, xor,    ^,  _mm512_xor_si512,    veorq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, xor,    ^,  _mm512_xor_si512,    veorq_u64, CROARING_UNTARGET_AVX512)
+CROARING_UNTARGET_AVX512
 CROARING_TARGET_AVX512
-AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, andnot, &~, _mm512_andnot_si512, vbicq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, andnot, &~, _mm512_andnot_si512, vbicq_u64, CROARING_UNTARGET_AVX512)
+CROARING_UNTARGET_AVX512
 
 // we duplicate the function because other containers use the "or" term, makes API more consistent
 CROARING_TARGET_AVX512
-AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, or,    |, _mm512_or_si512, vorrq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, or,    |, _mm512_or_si512, vorrq_u64, CROARING_UNTARGET_AVX512)
+CROARING_UNTARGET_AVX512
 CROARING_TARGET_AVX512
-AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, union, |, _mm512_or_si512, vorrq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, union, |, _mm512_or_si512, vorrq_u64, CROARING_UNTARGET_AVX512)
+CROARING_UNTARGET_AVX512
 
 // we duplicate the function because other containers use the "intersection" term, makes API more consistent
 CROARING_TARGET_AVX512
-AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, and,          &, _mm512_and_si512, vandq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, and,          &, _mm512_and_si512, vandq_u64, CROARING_UNTARGET_AVX512)
+CROARING_UNTARGET_AVX512
 CROARING_TARGET_AVX512
-AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, intersection, &, _mm512_and_si512, vandq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, intersection, &, _mm512_and_si512, vandq_u64, CROARING_UNTARGET_AVX512)
+CROARING_UNTARGET_AVX512
 
 CROARING_TARGET_AVX512
-AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, xor,    ^,  _mm512_xor_si512,    veorq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, xor,    ^,  _mm512_xor_si512,    veorq_u64, CROARING_UNTARGET_AVX512)
+CROARING_UNTARGET_AVX512
 CROARING_TARGET_AVX512
-AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, andnot, &~, _mm512_andnot_si512, vbicq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, andnot, &~, _mm512_andnot_si512, vbicq_u64, CROARING_UNTARGET_AVX512)
+CROARING_UNTARGET_AVX512
 
 // we duplicate the function because other containers use the "or" term, makes API more consistent
 CROARING_TARGET_AVX512
-AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, or,    |, _mm512_or_si512, vorrq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, or,    |, _mm512_or_si512, vorrq_u64, CROARING_UNTARGET_AVX512)
+CROARING_UNTARGET_AVX512
 CROARING_TARGET_AVX512
-AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, union, |, _mm512_or_si512, vorrq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, union, |, _mm512_or_si512, vorrq_u64, CROARING_UNTARGET_AVX512)
+CROARING_UNTARGET_AVX512
 
 // we duplicate the function because other containers use the "intersection" term, makes API more consistent
 CROARING_TARGET_AVX512
-AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, and,          &, _mm512_and_si512, vandq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, and,          &, _mm512_and_si512, vandq_u64, CROARING_UNTARGET_AVX512)
+CROARING_UNTARGET_AVX512
 CROARING_TARGET_AVX512
-AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, intersection, &, _mm512_and_si512, vandq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, intersection, &, _mm512_and_si512, vandq_u64, CROARING_UNTARGET_AVX512)
+CROARING_UNTARGET_AVX512
 
 CROARING_TARGET_AVX512
-AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, xor,    ^,  _mm512_xor_si512,    veorq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, xor,    ^,  _mm512_xor_si512,    veorq_u64, CROARING_UNTARGET_AVX512)
+CROARING_UNTARGET_AVX512
 CROARING_TARGET_AVX512
-AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, andnot, &~, _mm512_andnot_si512, vbicq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, andnot, &~, _mm512_andnot_si512, vbicq_u64, CROARING_UNTARGET_AVX512)
+CROARING_UNTARGET_AVX512
 #endif // CROARING_COMPILER_SUPPORTS_AVX512
 
 #ifndef WORDS_IN_AVX2_REG
@@ -572,72 +572,72 @@ CROARING_UNTARGET_REGION
 
 // we duplicate the function because other containers use the "or" term, makes API more consistent
 CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, or,    |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, or,    |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_AVX2)
+CROARING_UNTARGET_AVX2
 CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, union, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, union, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_AVX2)
+CROARING_UNTARGET_AVX2
 
 // we duplicate the function because other containers use the "intersection" term, makes API more consistent
 CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, and,          &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, and,          &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_AVX2)
+CROARING_UNTARGET_AVX2
 CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, intersection, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, intersection, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_AVX2)
+CROARING_UNTARGET_AVX2
 
 CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, xor,    ^,  _mm256_xor_si256,    veorq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, xor,    ^,  _mm256_xor_si256,    veorq_u64, CROARING_UNTARGET_AVX2)
+CROARING_UNTARGET_AVX2
 CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, andnot, &~, _mm256_andnot_si256, vbicq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, andnot, &~, _mm256_andnot_si256, vbicq_u64, CROARING_UNTARGET_AVX2)
+CROARING_UNTARGET_AVX2
 
 // we duplicate the function because other containers use the "or" term, makes API more consistent
 CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, or,    |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, or,    |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_AVX2)
+CROARING_UNTARGET_AVX2
 CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, union, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, union, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_AVX2)
+CROARING_UNTARGET_AVX2
 
 // we duplicate the function because other containers use the "intersection" term, makes API more consistent
 CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, and,          &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, and,          &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_AVX2)
+CROARING_UNTARGET_AVX2
 CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, intersection, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, intersection, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_AVX2)
+CROARING_UNTARGET_AVX2
 
 CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, xor,    ^,  _mm256_xor_si256,    veorq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, xor,    ^,  _mm256_xor_si256,    veorq_u64, CROARING_UNTARGET_AVX2)
+CROARING_UNTARGET_AVX2
 CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, andnot, &~, _mm256_andnot_si256, vbicq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, andnot, &~, _mm256_andnot_si256, vbicq_u64, CROARING_UNTARGET_AVX2)
+CROARING_UNTARGET_AVX2
 
 // we duplicate the function because other containers use the "or" term, makes API more consistent
 CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, or,    |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, or,    |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_AVX2)
+CROARING_UNTARGET_AVX2
 CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, union, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, union, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_AVX2)
+CROARING_UNTARGET_AVX2
 
 // we duplicate the function because other containers use the "intersection" term, makes API more consistent
 CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, and,          &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, and,          &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_AVX2)
+CROARING_UNTARGET_AVX2
 CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, intersection, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, intersection, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_AVX2)
+CROARING_UNTARGET_AVX2
 
 CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, xor,    ^,  _mm256_xor_si256,    veorq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, xor,    ^,  _mm256_xor_si256,    veorq_u64, CROARING_UNTARGET_AVX2)
+CROARING_UNTARGET_AVX2
 CROARING_TARGET_AVX2
-AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, andnot, &~, _mm256_andnot_si256, vbicq_u64, CROARING_UNTARGET_REGION)
-CROARING_UNTARGET_REGION
+AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, andnot, &~, _mm256_andnot_si256, vbicq_u64, CROARING_UNTARGET_AVX2)
+CROARING_UNTARGET_AVX2
 
 
 #define SCALAR_BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic,            \
@@ -1072,7 +1072,7 @@ static inline bool _avx512_bitset_container_equals(const bitset_container_t *con
   }
 	return true;
 }
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX512
 #endif // CROARING_COMPILER_SUPPORTS_AVX512
 CROARING_TARGET_AVX2
 ALLOW_UNALIGNED
@@ -1089,7 +1089,7 @@ static inline bool _avx2_bitset_container_equals(const bitset_container_t *conta
   }
 	return true;
 }
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
 #endif // CROARING_IS_X64
 
 ALLOW_UNALIGNED
diff --git a/src/containers/run.c b/src/containers/run.c
index 82e385542..dfcab49f0 100644
--- a/src/containers/run.c
+++ b/src/containers/run.c
@@ -873,7 +873,7 @@ static inline int _avx512_run_container_cardinality(const run_container_t *run)
     return sum;
 }
 
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX512
 
 CROARING_TARGET_AVX2
 ALLOW_UNALIGNED
@@ -906,7 +906,7 @@ static inline int _avx2_run_container_cardinality(const run_container_t *run) {
     return sum;
 }
 
-CROARING_UNTARGET_REGION
+CROARING_UNTARGET_AVX2
 
 /* Get the cardinality of `run'. Requires an actual computation. */
 static inline int _scalar_run_container_cardinality(const run_container_t *run) {

From 4bb23e52c789554733819fb131efc30aa8173e57 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Tue, 28 Mar 2023 12:38:14 -0400
Subject: [PATCH 101/162] Adding comments.

---
 src/containers/mixed_union.c | 60 ++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/src/containers/mixed_union.c b/src/containers/mixed_union.c
index 912596e9f..00822e166 100644
--- a/src/containers/mixed_union.c
+++ b/src/containers/mixed_union.c
@@ -206,6 +206,36 @@ bool array_array_container_inplace_union(
           return false;  // not a bitset
         } else {
           memmove(src_1->array + src_2->cardinality, src_1->array, src_1->cardinality * sizeof(uint16_t));
+          /*
+            Next line is safe:
+
+            We just need to focus on the reading and writing performed on array1. In `union_vector16`, both vectorized and scalar code still obey the basic rule: read from two inputs, do the union, and then write the output.
+
+            Let's say the length(cardinality) of input2 is L2:
+            ```
+                |<-  L2  ->|
+            array1: [output--- |input 1---|---]
+            array2: [input 2---]
+            ```
+            Let's define 3 __m128i pointers, `pos1` starts from `input1`, `pos2` starts from `input2`, these 2 point at the next byte to read, `out` starts from `output`, pointing at the next byte to overwrite.
+            ```
+            array1: [output--- |input 1---|---]
+                        ^          ^
+                    out        pos1
+            array2: [input 2---]
+                        ^
+                        pos2
+            ```
+            The union output always contains less or equal number of elements than all inputs added, so we have:
+            ```
+            out <= pos1 + pos2
+            ```
+            therefore:
+            ```
+            out <= pos1 + L2
+            ```
+            which means you will not overwrite data beyond pos1, so the data haven't read is safe, and we don't care the data already read.
+          */
           src_1->cardinality = (int32_t)fast_union_uint16(src_1->array + src_2->cardinality, src_1->cardinality,
                                   src_2->array, src_2->cardinality, src_1->array);
           return false; // not a bitset
@@ -302,6 +332,36 @@ bool array_array_container_lazy_inplace_union(
           return false;  // not a bitset
         } else {
           memmove(src_1->array + src_2->cardinality, src_1->array, src_1->cardinality * sizeof(uint16_t));
+          /*
+            Next line is safe:
+
+            We just need to focus on the reading and writing performed on array1. In `union_vector16`, both vectorized and scalar code still obey the basic rule: read from two inputs, do the union, and then write the output.
+
+            Let's say the length(cardinality) of input2 is L2:
+            ```
+                |<-  L2  ->|
+            array1: [output--- |input 1---|---]
+            array2: [input 2---]
+            ```
+            Let's define 3 __m128i pointers, `pos1` starts from `input1`, `pos2` starts from `input2`, these 2 point at the next byte to read, `out` starts from `output`, pointing at the next byte to overwrite.
+            ```
+            array1: [output--- |input 1---|---]
+                        ^          ^
+                    out        pos1
+            array2: [input 2---]
+                        ^
+                        pos2
+            ```
+            The union output always contains less or equal number of elements than all inputs added, so we have:
+            ```
+            out <= pos1 + pos2
+            ```
+            therefore:
+            ```
+            out <= pos1 + L2
+            ```
+            which means you will not overwrite data beyond pos1, so the data haven't read is safe, and we don't care the data already read.
+          */
           src_1->cardinality = (int32_t)fast_union_uint16(src_1->array + src_2->cardinality, src_1->cardinality,
                                   src_2->array, src_2->cardinality, src_1->array);
           return false; // not a bitset

From c0f0ded876a84d431c96eea87d0f128673a7ea4e Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Thu, 30 Mar 2023 17:32:46 -0400
Subject: [PATCH 102/162] Issue 457 (#458)

---
 src/containers/run.c  |  2 +-
 tests/toplevel_unit.c | 12 ++++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/containers/run.c b/src/containers/run.c
index dfcab49f0..a32e476f0 100644
--- a/src/containers/run.c
+++ b/src/containers/run.c
@@ -131,7 +131,7 @@ void run_container_offset(const run_container_t *c,
         lo_cap = c->n_runs;
         hi_cap = 0;
     } else {
-        split = c->runs[pivot].value <= top;
+        split = c->runs[pivot].value < top;
         lo_cap = pivot + (split ? 1 : 0);
         hi_cap = c->n_runs - pivot;
     }
diff --git a/tests/toplevel_unit.c b/tests/toplevel_unit.c
index 5b3aac343..b30d720ee 100644
--- a/tests/toplevel_unit.c
+++ b/tests/toplevel_unit.c
@@ -46,6 +46,17 @@ bool roaring_iterator_sumall(uint32_t value, void *param) {
     *(uint32_t *)param += value;
     return true;  // continue till the end
 }
+DEFINE_TEST(issue457) {
+  roaring_bitmap_t *r1 = roaring_bitmap_from_range(65539, 65541, 1);
+  roaring_bitmap_printf_describe(r1);
+  assert_true(roaring_bitmap_get_cardinality(r1) == 2);
+  roaring_bitmap_t *r2 = roaring_bitmap_add_offset(r1, -3);
+  roaring_bitmap_printf_describe(r2);
+  assert_true(roaring_bitmap_get_cardinality(r2) == 2);
+  roaring_bitmap_printf(r2);
+  roaring_bitmap_free(r1);
+  roaring_bitmap_free(r2);
+}
 
 DEFINE_TEST(issue429) {
   // This is a memory leak test, so we don't need to check the results.
@@ -4440,6 +4451,7 @@ int main() {
     tellmeall();
 
     const struct CMUnitTest tests[] = {
+        cmocka_unit_test(issue457),
         cmocka_unit_test(convert_to_bitset),
         cmocka_unit_test(issue440),
         cmocka_unit_test(issue436),

From 227c1a0a0fe578d3941d98cbde355decc1fdcfbe Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Thu, 30 Mar 2023 17:33:37 -0400
Subject: [PATCH 103/162] New release

---
 CMakeLists.txt                    | 4 ++--
 doxygen                           | 2 +-
 include/roaring/roaring_version.h | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8f78b74c5..56dbd3f2e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,8 +18,8 @@ endif()
 set(ROARING_LIB_NAME roaring)
 set(PROJECT_VERSION_MAJOR 1)
 set(PROJECT_VERSION_MINOR 0)
-set(PROJECT_VERSION_PATCH 0)
-set(ROARING_LIB_VERSION "1.0.0" CACHE STRING "Roaring library version")
+set(PROJECT_VERSION_PATCH 1)
+set(ROARING_LIB_VERSION "1.0.1" CACHE STRING "Roaring library version")
 set(ROARING_LIB_SOVERSION "8" CACHE STRING "Roaring library soversion")
 
 option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON)
diff --git a/doxygen b/doxygen
index 8f7e77eec..5d3a3b1df 100644
--- a/doxygen
+++ b/doxygen
@@ -48,7 +48,7 @@ PROJECT_NAME           = "CRoaring"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "1.0.0"
+PROJECT_NUMBER         = "1.0.1"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/include/roaring/roaring_version.h b/include/roaring/roaring_version.h
index 1daf016a3..037bcb883 100644
--- a/include/roaring/roaring_version.h
+++ b/include/roaring/roaring_version.h
@@ -1,10 +1,10 @@
 // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand 
 #ifndef ROARING_INCLUDE_ROARING_VERSION 
 #define ROARING_INCLUDE_ROARING_VERSION 
-#define ROARING_VERSION "1.0.0"
+#define ROARING_VERSION "1.0.1"
 enum { 
     ROARING_VERSION_MAJOR = 1,
     ROARING_VERSION_MINOR = 0,
-    ROARING_VERSION_REVISION = 0
+    ROARING_VERSION_REVISION = 1
 }; 
 #endif // ROARING_INCLUDE_ROARING_VERSION 

From 0eafc44b6f83522ab158676dd3553cbcd32e2eb3 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Sat, 1 Apr 2023 19:13:37 -0400
Subject: [PATCH 104/162] More reasonable and accurate benchmarks (#460)

* Ok.

* Adding dir

* Various fixes

* Tweaks.

* Documentation.

* Sensible?

* Minor fix for windows

* Better guarding.

* Tweaking cmocka

* Guarding the benchmarks.
---
 CMakeLists.txt                                |   15 +-
 README.md                                     |   34 +-
 amalgamation.sh                               |    1 -
 cmake/import.cmake                            |   52 +
 include/roaring/bitset_util.h                 |    8 +-
 include/roaring/isadetection.h                |  289 +----
 include/roaring/misc/configreport.h           |   35 +-
 microbenchmarks/CMakeLists.txt                |   16 +
 microbenchmarks/bench.cpp                     |  206 ++++
 microbenchmarks/bench.h                       |  247 ++++
 .../performancecounters/apple_arm_events.h    | 1011 ++++++++++++++++
 .../performancecounters/event_counter.h       |  150 +++
 microbenchmarks/performancecounters/ibireme.h |  917 ++++++++++++++
 .../performancecounters/linux-perf-events.h   |  101 ++
 microbenchmarks/toni_ronnko_dirent.h          | 1075 +++++++++++++++++
 src/CMakeLists.txt                            |   31 +-
 src/array_util.c                              |   14 +-
 src/bitset_util.c                             |   13 +-
 src/containers/array.c                        |   10 +-
 src/containers/bitset.c                       |   56 +-
 src/containers/convert.c                      |    8 +-
 src/containers/run.c                          |   10 +-
 src/isadetection.c                            |  268 ++++
 src/roaring.c                                 |    1 -
 tests/CMakeLists.txt                          |    5 -
 tests/bitset_container_unit.c                 |   10 +-
 tests/cbitset_unit.c                          |    4 +-
 tools/cmake/FindCTargets.cmake                |   14 +-
 tools/cmake/Import.cmake                      |   32 +-
 29 files changed, 4238 insertions(+), 395 deletions(-)
 create mode 100644 cmake/import.cmake
 create mode 100644 microbenchmarks/CMakeLists.txt
 create mode 100644 microbenchmarks/bench.cpp
 create mode 100644 microbenchmarks/bench.h
 create mode 100644 microbenchmarks/performancecounters/apple_arm_events.h
 create mode 100644 microbenchmarks/performancecounters/event_counter.h
 create mode 100644 microbenchmarks/performancecounters/ibireme.h
 create mode 100644 microbenchmarks/performancecounters/linux-perf-events.h
 create mode 100644 microbenchmarks/toni_ronnko_dirent.h
 create mode 100644 src/isadetection.c

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 56dbd3f2e..9b41269f4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -31,7 +31,7 @@ endif()
 option(ROARING_DISABLE_X64 "Forcefully disable x64 optimizations even if hardware supports it (this disables AVX)" OFF)
 option(ROARING_DISABLE_AVX "Forcefully disable AVX even if hardware supports it " OFF)
 option(ROARING_DISABLE_NEON "Forcefully disable NEON even if hardware supports it" OFF)
-option(ROARING_DISABLE_NATIVE "Forcefully disable -march optimizations (obsolete)" OFF)
+option(ROARING_DISABLE_AVX512 "Forcefully disable AVX512 even if compiler supports it" OFF)
 
 option(ROARING_BUILD_STATIC "Build a static library" ON)
 if(BUILD_SHARED_LIBS)
@@ -62,7 +62,7 @@ install(FILES "${CMAKE_CURRENT_BINARY_DIR}/roaring.pc" DESTINATION ${CMAKE_INSTA
 
 add_library(roaring-headers INTERFACE)
 target_include_directories(roaring-headers INTERFACE
-  $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include/${ROARING_LIB_NAME}>
+  $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include/roaring>
   $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCDIR}>)
 add_library(roaring-headers-cpp INTERFACE)
 target_include_directories(roaring-headers-cpp INTERFACE
@@ -73,11 +73,11 @@ target_include_directories(roaring-headers-cpp INTERFACE
 ### Some users want the C++ header files to be installed as well.
 ### C++ header files get installed to /usr/local/include/roaring typically
 SET(CPP_ROARING_HEADERS cpp/roaring64map.hh  cpp/roaring.hh) # needs to be updated if we add more files
-install(FILES ${CPP_ROARING_HEADERS} DESTINATION include/${ROARING_LIB_NAME})
-install(DIRECTORY include/${ROARING_LIB_NAME} DESTINATION include)
+install(FILES ${CPP_ROARING_HEADERS} DESTINATION include/roaring)
+install(DIRECTORY include/roaring DESTINATION include)
 
 install(TARGETS roaring-headers roaring-headers-cpp
-   EXPORT ${ROARING_LIB_NAME}-config
+   EXPORT roaring-config
    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
    RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
@@ -98,7 +98,10 @@ configure_file ("${CMAKE_CURRENT_SOURCE_DIR}/tests/config.h.in"
 
 add_subdirectory(src)
 if(ENABLE_ROARING_TESTS)
-  add_subdirectory(benchmarks)
+  if(CMAKE_SIZEOF_VOID_P EQUAL 8) # we only include the benchmarks on 64-bit systems.
+    add_subdirectory(microbenchmarks)
+    add_subdirectory(benchmarks)
+  endif()
   add_subdirectory(tests)
 endif()
 # Being terse is good, but knowing how the build is configured is important
diff --git a/README.md b/README.md
index cdf19907f..2d91e0668 100644
--- a/README.md
+++ b/README.md
@@ -179,6 +179,32 @@ The C interface is found in the file ``include/roaring/roaring.h``. We have C++
 
 Some users have to deal with large volumes of data. It  may be important for these users to be aware of the `addMany` (C++) `roaring_bitmap_or_many` (C) functions as it is much faster and economical to add values in batches when possible. Furthermore, calling periodically the `runOptimize` (C++) or `roaring_bitmap_run_optimize` (C) functions may help.
 
+
+# Running microbenchmarks
+
+We have microbenchmarks constructed with the Google Benchmarks.
+Under Linux or macOS, you may run them as follows:
+
+```
+cmake --build build
+./build/microbenchmarks/bench
+```
+
+By default, the benchmark tools picks one data set (e.g., `CRoaring/benchmarks/realdata/census1881`).
+We have several data sets and you may pick others:
+
+```
+./build/microbenchmarks/bench benchmarks/realdata/wikileaks-noquotes 
+```
+
+You may disable some functionality for the purpose of benchmarking. For example, you could
+benchmark the code without AVX-512 even if both your processor and compiler supports it:
+
+```
+cmake --buildnoavx512 -D ROARING_DISABLE_AVX512=OFF
+./buildnoavx512/microbenchmarks/bench
+```
+
 # Custom memory allocators
 For general users, CRoaring would apply default allocator without extra codes. But global memory hook is also provided for those who want a custom memory allocator. Here is an example:
 ```C
@@ -575,14 +601,6 @@ ctest
 ```
 
 
-To run real-data benchmark
-
-```
-./real_bitmaps_benchmark ../benchmarks/realdata/census1881
-```
-where you must adjust the path "../benchmarks/realdata/census1881" so that it points to one of the directories in the benchmarks/realdata directory.
-
-
 To check that your code abides by the style convention (make sure that ``clang-format`` is installed):
 
 ```
diff --git a/amalgamation.sh b/amalgamation.sh
index 161582e7a..e35e5bd6c 100755
--- a/amalgamation.sh
+++ b/amalgamation.sh
@@ -82,7 +82,6 @@ ALL_PRIVATE_C=$( ( \
         && ( type git >/dev/null 2>&1 ) \
         && ( git ls-files $SCRIPTPATH/src/*.c $SCRIPTPATH/src/**/*c ) \
     ) || ( find $SCRIPTPATH/src -name '*.c' ) )
-
 # Verify up-front that all the files exist
 #
 for i in ${ALL_PUBLIC_H} ${ALL_PUBLIC_HH} ${ALL_PRIVATE_H} ${ALL_PRIVATE_C}; do
diff --git a/cmake/import.cmake b/cmake/import.cmake
new file mode 100644
index 000000000..a9b6ffe5d
--- /dev/null
+++ b/cmake/import.cmake
@@ -0,0 +1,52 @@
+set(dep_root "${PROJEC_SOURCE_DIR}/dependencies/.cache")
+if(DEFINED ENV{roaring_DEPENDENCY_CACHE_DIR})
+  set(dep_root "$ENV{roaring_DEPENDENCY_CACHE_DIR}")
+endif()
+
+function(import_dependency NAME GITHUB_REPO COMMIT)
+  message(STATUS "Importing ${NAME} (${GITHUB_REPO}@${COMMIT})")
+  set(target "${dep_root}/${NAME}")
+
+  # If the folder exists in the cache, then we assume that everything is as
+  # should be and do nothing
+  if(EXISTS "${target}")
+    set("${NAME}_SOURCE_DIR" "${target}" PARENT_SCOPE)
+    return()
+  endif()
+
+  set(zip_url "https://github.com/${GITHUB_REPO}/archive/${COMMIT}.zip")
+  set(archive "${dep_root}/archive.zip")
+  set(dest "${dep_root}/_extract")
+
+  file(DOWNLOAD "${zip_url}" "${archive}")
+  file(MAKE_DIRECTORY "${dest}")
+  execute_process(
+          WORKING_DIRECTORY "${dest}"
+          COMMAND "${CMAKE_COMMAND}" -E tar xf "${archive}")
+  file(REMOVE "${archive}")
+
+  # GitHub archives only ever have one folder component at the root, so this
+  # will always match that single folder
+  file(GLOB dir LIST_DIRECTORIES YES "${dest}/*")
+
+  file(RENAME "${dir}" "${target}")
+
+  set("${NAME}_SOURCE_DIR" "${target}" PARENT_SCOPE)
+endfunction()
+
+# Delegates to the dependency
+macro(add_dependency NAME)
+  if(NOT DEFINED "${NAME}_SOURCE_DIR")
+    message(FATAL_ERROR "Missing ${NAME}_SOURCE_DIR variable")
+  endif()
+
+  add_subdirectory("${${NAME}_SOURCE_DIR}" "${PROJECT_BINARY_DIR}/_deps/${NAME}" EXCLUDE_FROM_ALL)
+endmacro()
+
+function(set_off NAME)
+  set("${NAME}" OFF CACHE INTERNAL "")
+endfunction()
+
+function(set_on NAME)
+  set("${NAME}" ON CACHE INTERNAL "")
+endfunction()
\ No newline at end of file
diff --git a/include/roaring/bitset_util.h b/include/roaring/bitset_util.h
index 02b5bdd3f..6b5207f96 100644
--- a/include/roaring/bitset_util.h
+++ b/include/roaring/bitset_util.h
@@ -6,6 +6,12 @@
 #include <roaring/portability.h>
 #include <roaring/utilasm.h>
 
+#if CROARING_IS_X64
+#ifndef CROARING_COMPILER_SUPPORTS_AVX512
+#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined."
+#endif // CROARING_COMPILER_SUPPORTS_AVX512
+#endif
+
 #ifdef __cplusplus
 extern "C" { namespace roaring { namespace internal {
 #endif
@@ -631,7 +637,7 @@ CROARING_UNTARGET_AVX512
         __m512i total = _mm512_setzero_si512();                                \
         const uint64_t limit = size - size % 4;                                \
         uint64_t i = 0;                                                        \
-	    for (; i < limit; i += 4) {                                        \
+	    for (; i < limit; i += 4) {                                            \
             __m512i a1 = avx_intrinsic(_mm512_loadu_si512(data1 + i),          \
                                        _mm512_loadu_si512(data2 + i));         \
             total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a1));          \
diff --git a/include/roaring/isadetection.h b/include/roaring/isadetection.h
index 39d1d6621..0e0ef0750 100644
--- a/include/roaring/isadetection.h
+++ b/include/roaring/isadetection.h
@@ -1,291 +1,42 @@
-/* From
-https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h
-Highly modified.
-
-Copyright (c) 2016-     Facebook, Inc            (Adam Paszke)
-Copyright (c) 2014-     Facebook, Inc            (Soumith Chintala)
-Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
-Copyright (c) 2012-2014 Deepmind Technologies    (Koray Kavukcuoglu)
-Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
-Copyright (c) 2011-2013 NYU                      (Clement Farabet)
-Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou,
-Iain Melvin, Jason Weston) Copyright (c) 2006      Idiap Research Institute
-(Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert,
-Samy Bengio, Johnny Mariethoz)
-
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-1. Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the distribution.
-
-3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories
-America and IDIAP Research Institute nor the names of its contributors may be
-   used to endorse or promote products derived from this software without
-   specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-*/
-
 #ifndef ROARING_ISADETECTION_H
 #define ROARING_ISADETECTION_H
+#if defined(__x86_64__) || defined(_M_AMD64) // x64
 
-// isadetection.h does not define any macro (except for ROARING_ISADETECTION_H).
 
-#include <stdint.h>
-#include <stdbool.h>
-#include <stdlib.h>
 
 
+#ifndef CROARING_COMPILER_SUPPORTS_AVX512
 #ifdef __has_include
 // We want to make sure that the AVX-512 functions are only built on compilers
 // fully supporting AVX-512.
 #if __has_include(<avx512vbmi2intrin.h>)
 #define CROARING_COMPILER_SUPPORTS_AVX512 1
-#endif
-#endif
+#endif // #if __has_include(<avx512vbmi2intrin.h>)
+#endif // #ifdef __has_include
 
 // Visual Studio 2019 and up support AVX-512
 #ifdef _MSC_VER
 #if _MSC_VER >= 1920
 #define CROARING_COMPILER_SUPPORTS_AVX512 1
-#endif
-#endif
-
-// We need portability.h to be included first, see
-// https://github.com/RoaringBitmap/CRoaring/issues/394
-#include <roaring/portability.h>
-#if CROARING_REGULAR_VISUAL_STUDIO
-#include <intrin.h>
-#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
-#include <cpuid.h>
-#endif // CROARING_REGULAR_VISUAL_STUDIO
-
+#endif // #if _MSC_VER >= 1920
+#endif // #ifdef _MSC_VER
 
-enum croaring_instruction_set {
-  CROARING_DEFAULT = 0x0,
-  CROARING_NEON = 0x1,
-  CROARING_AVX2 = 0x4,
-  CROARING_SSE42 = 0x8,
-  CROARING_PCLMULQDQ = 0x10,
-  CROARING_BMI1 = 0x20,
-  CROARING_BMI2 = 0x40,
-  CROARING_ALTIVEC = 0x80,
-  CROARING_AVX512F = 0x100,
-  CROARING_AVX512DQ = 0x200,
-  CROARING_AVX512BW = 0x400,
-  CROARING_AVX512VBMI2 = 0x800,
-  CROARING_AVX512BITALG = 0x1000,
-  CROARING_AVX512VPOPCNTDQ = 0x2000,
-  CROARING_UNINITIALIZED = 0x8000
-};
-
-static unsigned int CROARING_AVX512_REQUIRED = (CROARING_AVX512F | CROARING_AVX512DQ | CROARING_AVX512BW | CROARING_AVX512VBMI2 | CROARING_AVX512BITALG | CROARING_AVX512VPOPCNTDQ);
-
-
-#if defined(__x86_64__) || defined(_M_AMD64) // x64
+#ifndef CROARING_COMPILER_SUPPORTS_AVX512
+#define CROARING_COMPILER_SUPPORTS_AVX512 0
+#endif // #ifndef CROARING_COMPILER_SUPPORTS_AVX512
+#endif // #ifndef CROARING_COMPILER_SUPPORTS_AVX512
 
 
-static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx,
-                         uint32_t *edx) {
-
-#if CROARING_REGULAR_VISUAL_STUDIO
-  int cpu_info[4];
-  __cpuid(cpu_info, *eax);
-  *eax = cpu_info[0];
-  *ebx = cpu_info[1];
-  *ecx = cpu_info[2];
-  *edx = cpu_info[3];
-#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
-  uint32_t level = *eax;
-  __get_cpuid(level, eax, ebx, ecx, edx);
-#else
-  uint32_t a = *eax, b, c = *ecx, d;
-  __asm__("cpuid\n\t" : "+a"(a), "=b"(b), "+c"(c), "=d"(d));
-  *eax = a;
-  *ebx = b;
-  *ecx = c;
-  *edx = d;
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace internal {
 #endif
-}
-
-/**
- * This is a relatively expensive function but it will get called at most
- * *once* per compilation units. Normally, the CRoaring library is built
- * as one compilation unit.
- */
-static inline uint32_t dynamic_croaring_detect_supported_architectures() {
-  uint32_t eax, ebx, ecx, edx;
-  uint32_t host_isa = 0x0;
-  // Can be found on Intel ISA Reference for CPUID
-  static uint32_t cpuid_avx2_bit = 1 << 5;      ///< @private Bit 5 of EBX for EAX=0x7
-  static uint32_t cpuid_bmi1_bit = 1 << 3;      ///< @private bit 3 of EBX for EAX=0x7
-  static uint32_t cpuid_bmi2_bit = 1 << 8;      ///< @private bit 8 of EBX for EAX=0x7
-  static uint32_t cpuid_avx512f_bit = 1 << 16;  ///< @private bit 16 of EBX for EAX=0x7
-  static uint32_t cpuid_avx512dq_bit = 1 << 17; ///< @private bit 17 of EBX for EAX=0x7
-  static uint32_t cpuid_avx512bw_bit = 1 << 30; ///< @private bit 30 of EBX for EAX=0x7
-  static uint32_t cpuid_avx512vbmi2_bit = 1 << 6; ///< @private bit 6 of ECX for EAX=0x7
-  static uint32_t cpuid_avx512bitalg_bit = 1 << 12; ///< @private bit 12 of ECX for EAX=0x7
-  static uint32_t cpuid_avx512vpopcntdq_bit = 1 << 14; ///< @private bit 14 of ECX for EAX=0x7
-  static uint32_t cpuid_sse42_bit = 1 << 20;    ///< @private bit 20 of ECX for EAX=0x1
-  static uint32_t cpuid_pclmulqdq_bit = 1 << 1; ///< @private bit  1 of ECX for EAX=0x1
-  // ECX for EAX=0x7
-  eax = 0x7;
-  ecx = 0x0;
-  cpuid(&eax, &ebx, &ecx, &edx);
-  if (ebx & cpuid_avx2_bit) {
-    host_isa |= CROARING_AVX2;
-  }
-  if (ebx & cpuid_bmi1_bit) {
-    host_isa |= CROARING_BMI1;
-  }
-
-  if (ebx & cpuid_bmi2_bit) {
-    host_isa |= CROARING_BMI2;
-  }
-  
-  if (ebx & cpuid_avx512f_bit) {
-    host_isa |= CROARING_AVX512F;
-  }
-  
-  if (ebx & cpuid_avx512bw_bit) {
-    host_isa |= CROARING_AVX512BW;
-  }
-  
-  if (ebx & cpuid_avx512dq_bit) {
-    host_isa |= CROARING_AVX512DQ;
-  }
-  
-  if (ecx & cpuid_avx512vbmi2_bit) {
-    host_isa |= CROARING_AVX512VBMI2;
-  }
-  
-  if (ecx & cpuid_avx512bitalg_bit) {
-    host_isa |= CROARING_AVX512BITALG;
-  }
-  
-  if (ecx & cpuid_avx512vpopcntdq_bit) {
-    host_isa |= CROARING_AVX512VPOPCNTDQ;
-  }
-  
-  // EBX for EAX=0x1
-  eax = 0x1;
-  cpuid(&eax, &ebx, &ecx, &edx);
-
-  if (ecx & cpuid_sse42_bit) {
-    host_isa |= CROARING_SSE42;
-  }
-
-  if (ecx & cpuid_pclmulqdq_bit) {
-    host_isa |= CROARING_PCLMULQDQ;
-  }
-
-  return host_isa;
-}
-
-#endif // end SIMD extension detection code
-
-
-#if defined(__x86_64__) || defined(_M_AMD64) // x64
-
-#if defined(__cplusplus)
-static inline uint32_t croaring_detect_supported_architectures() {
-    // thread-safe as per the C++11 standard.
-    static uint32_t buffer = dynamic_croaring_detect_supported_architectures();
-    return buffer;
-}
-#elif CROARING_VISUAL_STUDIO
-// Visual Studio does not support C11 atomics.
-static inline uint32_t croaring_detect_supported_architectures() {
-    static int buffer = CROARING_UNINITIALIZED;
-    if (buffer == CROARING_UNINITIALIZED) {
-      buffer = dynamic_croaring_detect_supported_architectures();
-    }
-    return buffer;
-}
-#else // CROARING_VISUAL_STUDIO
-#include <stdatomic.h>
-static inline uint32_t croaring_detect_supported_architectures() {
-    // we use an atomic for thread safety
-    static _Atomic uint32_t buffer = CROARING_UNINITIALIZED;
-    if (buffer == CROARING_UNINITIALIZED) {
-      // atomicity is sufficient
-      buffer = dynamic_croaring_detect_supported_architectures();
-    }
-    return buffer;
-}
-#endif // CROARING_REGULAR_VISUAL_STUDIO
-
-#ifdef ROARING_DISABLE_AVX
-static inline bool croaring_avx2() {
-  return false;
-}
-static inline bool croaring_avx512() {
-  return false;
-}
-#elif defined(__AVX512F__) && defined(__AVX512DQ__) && defined(__AVX512BW__) && defined(__AVX512VBMI2__) && defined(__AVX512BITALG__) && defined(__AVX512VPOPCNTDQ__)
-static inline bool croaring_avx2() {
-  return true;
-}
-static inline bool croaring_avx512() {
-  return true;
-}
-#elif defined(__AVX2__)
-static inline bool croaring_avx2() {
-  return true;
-}
-static inline bool croaring_avx512() {
-#if CROARING_COMPILER_SUPPORTS_AVX512
-  // Even though we have set __AVX2__ at compile-time, it is still possible for the hardware
-  // to support AVX-512. By setting __AVX2__, all we are saying is that croaring_avx2() must be true!
-  static bool avx512_support = false;
-
-  if( !avx512_support )
-  {
-      avx512_support = ( (croaring_detect_supported_architectures() & CROARING_AVX512_REQUIRED)
-	                        == CROARING_AVX512_REQUIRED);
-  }
-  return avx512_support;
-#else
-  return false;
-#endif
-}
-#else
-static inline bool croaring_avx2() {
-  return  (croaring_detect_supported_architectures() & CROARING_AVX2) == CROARING_AVX2;
-}
-static inline bool croaring_avx512() {
-#if CROARING_COMPILER_SUPPORTS_AVX512
-  static bool avx512_support = false;
-
-  if( !avx512_support )
-  {
-      avx512_support = ( (croaring_detect_supported_architectures() & CROARING_AVX512_REQUIRED)
-	                        == CROARING_AVX512_REQUIRED);
-  }
-  return avx512_support;
-#else
-  return false;
-#endif
-}
+enum {
+  ROARING_SUPPORTS_AVX2 = 1,
+  ROARING_SUPPORTS_AVX512 = 2,
+};
+int croaring_hardware_support();
+#ifdef __cplusplus
+} } }  // extern "C" { namespace roaring { namespace internal {
 #endif
-
-#endif // defined(__x86_64__) || defined(_M_AMD64) // x64
-
+#endif // x64
 #endif // ROARING_ISADETECTION_H
diff --git a/include/roaring/misc/configreport.h b/include/roaring/misc/configreport.h
index 01974a77b..87a6aae8a 100644
--- a/include/roaring/misc/configreport.h
+++ b/include/roaring/misc/configreport.h
@@ -1,7 +1,7 @@
 /*
  * configreport.h
  * If this gets compiled into a different execution unit than the CRoaring library,
- * the functions croaring_avx512() and croaring_avx2() *may* trigger an additional
+ * the functions croaring_hardware_support() & ROARING_SUPPORTS_AVX512 and croaring_hardware_support() & ROARING_SUPPORTS_AVX2 *may* trigger an additional
  * call to dynamic_croaring_detect_supported_architectures().
  */
 #ifndef INCLUDE_MISC_CONFIGREPORT_H_
@@ -172,40 +172,11 @@ static inline void tellmeall() {
 #ifdef __VERSION__
     printf(" compiler version: %s\t", __VERSION__);
 #endif
-    uint32_t config =  croaring_detect_supported_architectures();
-    if((config & CROARING_NEON) == CROARING_NEON) {
-        printf(" NEON detected\t");
-    }
+
  #ifdef __AVX2__
     printf(" Building for AVX2\t");
  #endif
-    if(croaring_avx512()) {
-        printf( "AVX-512\t");
-    }
-    if(croaring_avx2()) {
-        printf( "AVX2\t");
-    }
-    if((config & CROARING_AVX2) == CROARING_AVX2) {
-        printf( "AVX2 detected\t");
-       if(!croaring_avx2()) {
-         printf( "AVX2 not used\t");
-       }
-    }
-    if((config & CROARING_AVX512_REQUIRED) == CROARING_AVX512_REQUIRED) {
-        printf( "AVX-512 detected\t");
-       if(!croaring_avx2()) {
-         printf( "AVX-512 not used\t");
-       }
-    }
-    if((config & CROARING_SSE42) == CROARING_SSE42) {
-        printf(" SSE4.2 detected\t");
-    }
-    if((config & CROARING_BMI1) == CROARING_BMI1) {
-        printf(" BMI1 detected\t");
-    }
-    if((config & CROARING_BMI2) == CROARING_BMI2) {
-        printf(" BMI2 detected\t");
-    }
+
     printf("\n");
     if ((sizeof(int) != 4) || (sizeof(long) != 8)) {
         printf("number of bytes: int = %lu long = %lu \n",
diff --git a/microbenchmarks/CMakeLists.txt b/microbenchmarks/CMakeLists.txt
new file mode 100644
index 000000000..ca8862df5
--- /dev/null
+++ b/microbenchmarks/CMakeLists.txt
@@ -0,0 +1,16 @@
+
+set (BENCHMARK_DATA_DIR "${PROJECT_SOURCE_DIR}/benchmarks/realdata/")
+
+include(${PROJECT_SOURCE_DIR}/tools/cmake/Import.cmake)
+
+set_off(BENCHMARK_ENABLE_TESTING)
+set_off(BENCHMARK_ENABLE_INSTALL)
+set_off(BENCHMARK_ENABLE_WERROR)
+
+import_dependency(google_benchmarks google/benchmark f91b6b4)
+add_dependency(google_benchmarks)
+
+add_executable(bench bench.cpp)
+target_link_libraries(bench PRIVATE roaring)
+target_link_libraries(bench PRIVATE benchmark::benchmark)
+target_compile_definitions(bench PRIVATE BENCHMARK_DATA_DIR="${BENCHMARK_DATA_DIR}")
diff --git a/microbenchmarks/bench.cpp b/microbenchmarks/bench.cpp
new file mode 100644
index 000000000..1616e5b3d
--- /dev/null
+++ b/microbenchmarks/bench.cpp
@@ -0,0 +1,206 @@
+#include "bench.h"
+
+
+struct successive_intersection {
+    static uint64_t run() {
+        uint64_t marker = 0;
+        for (size_t i = 0; i + 1 < count; ++i) {
+            roaring_bitmap_t *tempand =
+                roaring_bitmap_and(bitmaps[i], bitmaps[i + 1]);
+            marker += roaring_bitmap_get_cardinality(tempand);
+            roaring_bitmap_free(tempand);
+        }
+        return marker;
+    }
+};
+auto SuccessiveIntersection = BasicBench<successive_intersection>;
+BENCHMARK(SuccessiveIntersection);
+
+
+struct successive_intersection_cardinality {
+    static uint64_t run() {
+        uint64_t marker = 0;
+        for (size_t i = 0; i + 1 < count; ++i) {
+            marker += roaring_bitmap_and_cardinality(bitmaps[i], bitmaps[i + 1]);
+        }
+        return marker;
+    }
+};
+auto SuccessiveIntersectionCardinality = BasicBench<successive_intersection_cardinality>;
+BENCHMARK(SuccessiveIntersectionCardinality);
+
+
+struct successive_union_cardinality {
+    static uint64_t run() {
+        uint64_t marker = 0;
+        for (size_t i = 0; i + 1 < count; ++i) {
+            marker += roaring_bitmap_or_cardinality(bitmaps[i], bitmaps[i + 1]);
+        }
+        return marker;
+    }
+};
+auto SuccessiveUnionCardinality = BasicBench<successive_union_cardinality>;
+BENCHMARK(SuccessiveUnionCardinality);
+
+struct successive_difference_cardinality {
+    static uint64_t run() {
+        uint64_t marker = 0;
+        for (size_t i = 0; i + 1 < count; ++i) {
+            marker += roaring_bitmap_andnot_cardinality(bitmaps[i], bitmaps[i + 1]);
+        }
+        return marker;
+    }
+};
+auto SuccessiveDifferenceCardinality = BasicBench<successive_difference_cardinality>;
+BENCHMARK(SuccessiveDifferenceCardinality);
+
+struct successive_union {
+    static uint64_t run() {
+        uint64_t marker = 0;
+        for (size_t i = 0; i + 1 < count; ++i) {
+            roaring_bitmap_t *tempand =
+                roaring_bitmap_or(bitmaps[i], bitmaps[i + 1]);
+            marker += roaring_bitmap_get_cardinality(tempand);
+            roaring_bitmap_free(tempand);
+        }
+        return marker;
+    }
+};
+auto SuccessiveUnion = BasicBench<successive_union>;
+BENCHMARK(SuccessiveUnion);
+
+struct many_union {
+    static uint64_t run() {
+        uint64_t marker = 0;
+        roaring_bitmap_t *totalorbitmap =
+            roaring_bitmap_or_many(count, (const roaring_bitmap_t **)bitmaps);
+        marker = roaring_bitmap_get_cardinality(totalorbitmap);
+        roaring_bitmap_free(totalorbitmap);
+        return marker;
+    }
+};
+auto TotalUnion = BasicBench<many_union>;
+BENCHMARK(TotalUnion);
+
+struct many_union_heap {
+    static uint64_t run() {
+        uint64_t marker = 0;
+        roaring_bitmap_t *totalorbitmap = roaring_bitmap_or_many_heap(
+            count, (const roaring_bitmap_t **)bitmaps);
+        marker = roaring_bitmap_get_cardinality(totalorbitmap);
+        roaring_bitmap_free(totalorbitmap);
+        return marker;
+    }
+};
+auto TotalUnionHeap = BasicBench<many_union_heap>;
+BENCHMARK(TotalUnionHeap);
+
+struct random_access {
+    static uint64_t run() {
+        uint64_t marker = 0;
+        for (size_t i = 0; i < count; ++i) {
+            marker += roaring_bitmap_contains(bitmaps[i], maxvalue / 4);
+            marker += roaring_bitmap_contains(bitmaps[i], maxvalue / 2);
+            marker += roaring_bitmap_contains(bitmaps[i], 3 * maxvalue / 4);
+        }
+        return marker;
+    }
+};
+auto RandomAccess = BasicBench<random_access>;
+BENCHMARK(RandomAccess);
+
+struct to_array {
+    static uint64_t run() {
+        uint64_t marker = 0;
+        for (size_t i = 0; i < count; ++i) {
+            roaring_bitmap_to_uint32_array(bitmaps[i], array_buffer);
+            marker += array_buffer[0];
+        }
+        return marker;
+    }
+};
+auto ToArray = BasicBench<to_array>;
+BENCHMARK(ToArray);
+
+struct iterate_all {
+    static uint64_t run() {
+        uint64_t marker = 0;
+        for (size_t i = 0; i < count; ++i) {
+            roaring_bitmap_t *r = bitmaps[i];
+            roaring_uint32_iterator_t j;
+            roaring_init_iterator(r, &j);
+            while (j.has_value) {
+                marker++;
+                roaring_advance_uint32_iterator(&j);
+            }
+        }
+        return marker;
+    }
+};
+auto IterateAll = BasicBench<iterate_all>;
+BENCHMARK(IterateAll);
+
+
+struct compute_cardinality {
+    static uint64_t run() {
+        uint64_t marker = 0;
+        for (size_t i = 0; i < count; ++i) {
+            marker += roaring_bitmap_get_cardinality(bitmaps[i]);
+        }
+        return marker;
+    }
+};
+
+auto ComputeCardinality = BasicBench<compute_cardinality>;
+BENCHMARK(ComputeCardinality)->MinTime(2);
+
+int main(int argc, char **argv) {
+    const char *dir_name;
+    if ((argc == 1) || (argc > 1 && argv[1][0] == '-')) {
+        benchmark::AddCustomContext(
+            "benchmarking other files",
+            "You may pass is a data directory as a parameter.");
+        dir_name = BENCHMARK_DATA_DIR "census1881";
+    } else {
+        dir_name = argv[1];
+    }
+    int number_loaded = load(dir_name);
+#if (__APPLE__ && __aarch64__) || defined(__linux__)
+    if (!collector.has_events()) {
+        benchmark::AddCustomContext("performance counters",
+                                    "No privileged access (sudo may help).");
+    }
+#else
+    if (!collector.has_events()) {
+        benchmark::AddCustomContext("performance counters",
+                                    "Unsupported system.");
+    }
+#endif
+
+#if CROARING_IS_X64
+    benchmark::AddCustomContext("x64", "detected");
+    int support = roaring::internal::croaring_hardware_support();
+#if CROARING_COMPILER_SUPPORTS_AVX512
+    benchmark::AddCustomContext("AVX-512", "supported by compiler");
+    benchmark::AddCustomContext("AVX-512 hardware", ( support & roaring::internal::ROARING_SUPPORTS_AVX512 ) ? "yes" : "no");
+#endif // CROARING_COMPILER_SUPPORTS_AVX512
+    benchmark::AddCustomContext("AVX-2 hardware", ( support & roaring::internal::ROARING_SUPPORTS_AVX2 ) ? "yes" : "no");
+#endif // CROARING_IS_X64
+    benchmark::AddCustomContext("data source", dir_name);
+
+    benchmark::AddCustomContext("number of bitmaps", std::to_string(count));
+
+    benchmark::AddCustomContext(
+        "In RAM volume in MiB (estimated)",
+        std::to_string(bitmap_examples_bytes / (1024 * 1024.0)));
+    if (number_loaded == -1) {
+        return EXIT_FAILURE;
+    }
+    benchmark::Initialize(&argc, argv);
+    benchmark::RunSpecifiedBenchmarks();
+    benchmark::Shutdown();
+    for (size_t i = 0; i < count; ++i) {
+        roaring_bitmap_free(bitmaps[i]);
+    }
+    free(array_buffer);
+}
\ No newline at end of file
diff --git a/microbenchmarks/bench.h b/microbenchmarks/bench.h
new file mode 100644
index 000000000..7a8d0662c
--- /dev/null
+++ b/microbenchmarks/bench.h
@@ -0,0 +1,247 @@
+#ifndef CROARING_MICROBENCHMARKS_BENCH_H
+#define CROARING_MICROBENCHMARKS_BENCH_H
+// clang-format off
+#include <cstdlib>
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <sstream>
+
+#if (!defined(_WIN32) && !defined(_WIN64) && !(__MINGW32__) && !(__MINGW64__))
+#include <dirent.h>
+#else
+#include "toni_ronnko_dirent.h"
+#endif
+
+
+
+#include <benchmark/benchmark.h>
+#include <roaring/roaring.h>
+
+#include "performancecounters/event_counter.h"
+// clang-format on
+
+#if CROARING_IS_X64
+#ifndef CROARING_COMPILER_SUPPORTS_AVX512
+#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined."
+#endif // CROARING_COMPILER_SUPPORTS_AVX512
+#endif
+
+event_collector collector;
+size_t N = 1000;
+size_t bitmap_examples_bytes = 0;
+size_t count = 0;
+roaring_bitmap_t **bitmaps = NULL;
+uint32_t * array_buffer;
+uint32_t maxvalue = 0;
+uint32_t maxcard = 0;
+
+/**
+ * Read the content of a file to a char array. Caller is
+ * responsible for memory de-allocation.
+ * Returns NULL on error.
+ *
+ * (If the individual files are small, this function is
+ * a good idea.)
+ */
+static char *read_file(const char *filename) {
+    FILE *fp = fopen(filename, "r");
+    if (!fp) {
+        printf("Could not open file %s\n", filename);
+        return NULL;
+    }
+
+    fseek(fp, 0, SEEK_END);
+    size_t size = (size_t)ftell(fp);
+    rewind(fp);
+    char *answer = (char *)malloc(size + 1);
+    if (!answer) {
+        fclose(fp);
+        return NULL;
+    }
+    if (fread(answer, size, 1, fp) != 1) {
+        free(answer);
+        return NULL;
+    }
+    answer[size] = '\0';
+    fclose(fp);
+    return answer;
+}
+
+/**
+ * Given a file made of comma-separated integers,
+ * read it all and generate an array of integers.
+ * The caller is responsible for memory de-allocation.
+ */
+static uint32_t *read_integer_file(const char *filename, size_t *howmany) {
+    char *buffer = read_file(filename);
+    if (buffer == NULL) return NULL;
+
+    size_t howmanyints = 1;
+    size_t i1 = 0;
+    for (; buffer[i1] != '\0'; i1++) {
+        if (buffer[i1] == ',') ++howmanyints;
+    }
+
+    uint32_t *answer = (uint32_t *)malloc(howmanyints * sizeof(uint32_t));
+    if (answer == NULL) return NULL;
+    size_t pos = 0;
+    for (size_t i = 0; (i < i1) && (buffer[i] != '\0'); i++) {
+        uint32_t currentint;
+        while ((buffer[i] < '0') || (buffer[i] > '9')) {
+            i++;
+            if (buffer[i] == '\0') goto END;
+        }
+        currentint = (uint32_t)(buffer[i] - '0');
+        i++;
+        for (; (buffer[i] >= '0') && (buffer[i] <= '9'); i++)
+            currentint = currentint * 10 + (uint32_t)(buffer[i] - '0');
+        answer[pos++] = currentint;
+    }
+END:
+    if (pos != howmanyints) {
+        printf("unexpected number of integers! %d %d \n", (int)pos,
+               (int)howmanyints);
+    }
+    *howmany = pos;
+    free(buffer);
+    return answer;
+}
+
+/**
+ * Does the file filename ends with the given extension.
+ */
+static bool has_extension(const char *filename, const char *extension) {
+    const char *ext = strrchr(filename, '.');
+    return (ext && !strcmp(ext, extension));
+}
+
+/**
+ * read all (count) integer files in a directory. Caller is responsible
+ * for memory de-allocation. In case of error, a NULL is returned.
+ */
+static uint32_t **read_all_integer_files(const char *dirname,
+                                         const char *extension,
+                                         size_t **howmany, size_t *tcount) {
+    struct dirent **entry_list;
+
+    int c = scandir(dirname, &entry_list, 0, alphasort);
+    if (c < 0) return NULL;
+    size_t truec = 0;
+    for (int i = 0; i < c; i++) {
+        if (has_extension(entry_list[i]->d_name, extension)) ++truec;
+    }
+    *tcount = truec;
+    *howmany = (size_t *)malloc(sizeof(size_t) * (*tcount));
+    uint32_t **answer = (uint32_t **)malloc(sizeof(uint32_t *) * (*tcount));
+    size_t dirlen = strlen(dirname);
+    char *modifdirname = (char *)dirname;
+    if (modifdirname[dirlen - 1] != '/') {
+        modifdirname = (char *)malloc(dirlen + 2);
+        strcpy(modifdirname, dirname);
+        modifdirname[dirlen] = '/';
+        modifdirname[dirlen + 1] = '\0';
+        dirlen++;
+    }
+    for (size_t i = 0, pos = 0; i < (size_t)c;
+         i++) { /* formerly looped while i < *tcount */
+        if (!has_extension(entry_list[i]->d_name, extension)) continue;
+        size_t filelen = strlen(entry_list[i]->d_name);
+        char *fullpath = (char *)malloc(dirlen + filelen + 1);
+        strcpy(fullpath, modifdirname);
+        strcpy(fullpath + dirlen, entry_list[i]->d_name);
+        answer[pos] = read_integer_file(fullpath, &((*howmany)[pos]));
+        pos++;
+        free(fullpath);
+    }
+    if (modifdirname != dirname) {
+        free(modifdirname);
+    }
+    for (int i = 0; i < c; ++i) free(entry_list[i]);
+    free(entry_list);
+    return answer;
+}
+/**
+ * Once you have collected all the integers, build the bitmaps.
+ */
+static roaring_bitmap_t **create_all_bitmaps(size_t *howmany,
+                                             uint32_t **numbers, size_t tcount,
+                                             bool runoptimize,
+                                             bool copy_on_write) {
+    for (size_t i = 0; i < count; i++) {
+        if (howmany[i] > 0) {
+            if (maxvalue < numbers[i][howmany[i] - 1]) {
+                maxvalue = numbers[i][howmany[i] - 1];
+            }
+        }
+        if(maxcard < howmany[i]) { maxcard = howmany[i]; }
+    }
+    if (numbers == NULL) return NULL;
+    roaring_bitmap_t **answer =
+        (roaring_bitmap_t **)malloc(sizeof(roaring_bitmap_t *) * tcount);
+    bitmap_examples_bytes = 0;
+    for (size_t i = 0; i < tcount; i++) {
+        answer[i] = roaring_bitmap_of_ptr(howmany[i], numbers[i]);
+        if (runoptimize) roaring_bitmap_run_optimize(answer[i]);
+        roaring_bitmap_shrink_to_fit(answer[i]);
+        bitmap_examples_bytes += roaring_bitmap_size_in_bytes(answer[i]);
+        roaring_bitmap_set_copy_on_write(answer[i], copy_on_write);
+    }
+    array_buffer = (uint32_t*) malloc(maxcard * sizeof(uint32_t));
+    return answer;
+}
+
+template <class func>
+static void BasicBench(benchmark::State &state) {
+    // volatile to prevent optimizations.
+    volatile uint64_t marker = 0;
+    for (auto _ : state) {
+        marker = func::run();
+    }
+    if (collector.has_events()) {
+        event_aggregate aggregate{};
+        for (size_t i = 0; i < N; i++) {
+            std::atomic_thread_fence(std::memory_order_acquire);
+            collector.start();
+            marker = func::run();
+            std::atomic_thread_fence(std::memory_order_release);
+            event_count allocate_count = collector.end();
+            aggregate << allocate_count;
+        }
+        state.counters["cycles"] = aggregate.best.cycles();
+
+        state.counters["instructions"] =  aggregate.best.instructions();
+        state.counters["GHz"] =
+            aggregate.best.cycles() / aggregate.best.elapsed_ns();
+    }
+    (void)marker;
+}
+
+
+int load(const char *dirname) {
+    const char *extension = ".txt";
+    bool copy_on_write = false;
+    bool runoptimize = true;
+    size_t *howmany;
+
+    uint32_t **numbers =
+        read_all_integer_files(dirname, extension, &howmany, &count);
+    if (numbers == NULL) {
+        printf(
+            "I could not find or load any data file with extension %s in "
+            "directory %s.\n",
+            extension, dirname);
+        return -1;
+    }
+    bitmaps =
+        create_all_bitmaps(howmany, numbers, count, runoptimize, copy_on_write);
+
+    for (size_t i = 0; i < count; ++i) {
+        free(numbers[i]);
+    }
+    free(howmany);
+    if (bitmaps == NULL) return -1;
+    return count;
+}
+
+#endif
\ No newline at end of file
diff --git a/microbenchmarks/performancecounters/apple_arm_events.h b/microbenchmarks/performancecounters/apple_arm_events.h
new file mode 100644
index 000000000..5ce147ee2
--- /dev/null
+++ b/microbenchmarks/performancecounters/apple_arm_events.h
@@ -0,0 +1,1011 @@
+
+// Original design from:
+// =============================================================================
+// XNU kperf/kpc
+// Available for 64-bit Intel/Apple Silicon, macOS/iOS, with root privileges
+//
+// References:
+//
+// XNU source (since xnu 2422.1.72):
+// https://github.com/apple/darwin-xnu/blob/main/osfmk/kern/kpc.h
+// https://github.com/apple/darwin-xnu/blob/main/bsd/kern/kern_kpc.c
+//
+// Lightweight PET (Profile Every Thread, since xnu 3789.1.32):
+// https://github.com/apple/darwin-xnu/blob/main/osfmk/kperf/pet.c
+// https://github.com/apple/darwin-xnu/blob/main/osfmk/kperf/kperf_kpc.c
+//
+// System Private frameworks (since macOS 10.11, iOS 8.0):
+// /System/Library/PrivateFrameworks/kperf.framework
+// /System/Library/PrivateFrameworks/kperfdata.framework
+//
+// Xcode framework (since Xcode 7.0):
+// /Applications/Xcode.app/Contents/SharedFrameworks/DVTInstrumentsFoundation.framework
+//
+// CPU database (plist files)
+// macOS (since macOS 10.11):
+//     /usr/share/kpep/<name>.plist
+// iOS (copied from Xcode, since iOS 10.0, Xcode 8.0):
+//     /Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform
+//     /DeviceSupport/<version>/DeveloperDiskImage.dmg/usr/share/kpep/<name>.plist
+//
+//
+// Created by YaoYuan <ibireme@gmail.com> on 2021.
+// Released into the public domain (unlicense.org).
+// =============================================================================
+
+#ifndef M1CYCLES_H
+#define M1CYCLES_H
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <dlfcn.h>           // for dlopen() and dlsym()
+#include <mach/mach_time.h>  // for mach_absolute_time()
+#include <sys/kdebug.h>      // for kdebug trace decode
+#include <sys/sysctl.h>      // for sysctl()
+#include <unistd.h>          // for usleep()
+
+struct performance_counters {
+  double cycles;
+  double branches;
+  double missed_branches;
+  double instructions;
+  performance_counters(uint64_t c, uint64_t b, uint64_t m, uint64_t i)
+      : cycles(c), branches(b), missed_branches(m), instructions(i) {}
+  performance_counters(double c, double b, double m, double i)
+      : cycles(c), branches(b), missed_branches(m), instructions(i) {}
+  performance_counters(double init)
+      : cycles(init),
+        branches(init),
+        missed_branches(init),
+        instructions(init) {}
+
+  inline performance_counters &operator-=(const performance_counters &other) {
+    cycles -= other.cycles;
+    branches -= other.branches;
+    missed_branches -= other.missed_branches;
+    instructions -= other.instructions;
+    return *this;
+  }
+  inline performance_counters &min(const performance_counters &other) {
+    cycles = other.cycles < cycles ? other.cycles : cycles;
+    branches = other.branches < branches ? other.branches : branches;
+    missed_branches = other.missed_branches < missed_branches
+                          ? other.missed_branches
+                          : missed_branches;
+    instructions =
+        other.instructions < instructions ? other.instructions : instructions;
+    return *this;
+  }
+  inline performance_counters &operator+=(const performance_counters &other) {
+    cycles += other.cycles;
+    branches += other.branches;
+    missed_branches += other.missed_branches;
+    instructions += other.instructions;
+    return *this;
+  }
+
+  inline performance_counters &operator/=(double numerator) {
+    cycles /= numerator;
+    branches /= numerator;
+    missed_branches /= numerator;
+    instructions /= numerator;
+    return *this;
+  }
+};
+
+inline performance_counters operator-(const performance_counters &a,
+                                      const performance_counters &b) {
+  return performance_counters(a.cycles - b.cycles, a.branches - b.branches,
+                              a.missed_branches - b.missed_branches,
+                              a.instructions - b.instructions);
+}
+
+typedef float f32;
+typedef double f64;
+typedef int8_t i8;
+typedef uint8_t u8;
+typedef int16_t i16;
+typedef uint16_t u16;
+typedef int32_t i32;
+typedef uint32_t u32;
+typedef int64_t i64;
+typedef uint64_t u64;
+typedef size_t usize;
+
+// -----------------------------------------------------------------------------
+// <kperf.framework> header (reverse engineered)
+// This framework wraps some sysctl calls to communicate with the kpc in kernel.
+// Most functions requires root privileges, or process is "blessed".
+// -----------------------------------------------------------------------------
+
+// Cross-platform class constants.
+#define KPC_CLASS_FIXED (0)
+#define KPC_CLASS_CONFIGURABLE (1)
+#define KPC_CLASS_POWER (2)
+#define KPC_CLASS_RAWPMU (3)
+
+// Cross-platform class mask constants.
+#define KPC_CLASS_FIXED_MASK (1u << KPC_CLASS_FIXED)                // 1
+#define KPC_CLASS_CONFIGURABLE_MASK (1u << KPC_CLASS_CONFIGURABLE)  // 2
+#define KPC_CLASS_POWER_MASK (1u << KPC_CLASS_POWER)                // 4
+#define KPC_CLASS_RAWPMU_MASK (1u << KPC_CLASS_RAWPMU)              // 8
+
+// PMU version constants.
+#define KPC_PMU_ERROR (0)      // Error
+#define KPC_PMU_INTEL_V3 (1)   // Intel
+#define KPC_PMU_ARM_APPLE (2)  // ARM64
+#define KPC_PMU_INTEL_V2 (3)   // Old Intel
+#define KPC_PMU_ARM_V2 (4)     // Old ARM
+
+// The maximum number of counters we could read from every class in one go.
+// ARMV7: FIXED: 1, CONFIGURABLE: 4
+// ARM32: FIXED: 2, CONFIGURABLE: 6
+// ARM64: FIXED: 2, CONFIGURABLE: CORE_NCTRS - FIXED (6 or 8)
+// x86: 32
+#define KPC_MAX_COUNTERS 32
+
+// Bits for defining what to do on an action.
+// Defined in https://github.com/apple/darwin-xnu/blob/main/osfmk/kperf/action.h
+#define KPERF_SAMPLER_TH_INFO (1U << 0)
+#define KPERF_SAMPLER_TH_SNAPSHOT (1U << 1)
+#define KPERF_SAMPLER_KSTACK (1U << 2)
+#define KPERF_SAMPLER_USTACK (1U << 3)
+#define KPERF_SAMPLER_PMC_THREAD (1U << 4)
+#define KPERF_SAMPLER_PMC_CPU (1U << 5)
+#define KPERF_SAMPLER_PMC_CONFIG (1U << 6)
+#define KPERF_SAMPLER_MEMINFO (1U << 7)
+#define KPERF_SAMPLER_TH_SCHEDULING (1U << 8)
+#define KPERF_SAMPLER_TH_DISPATCH (1U << 9)
+#define KPERF_SAMPLER_TK_SNAPSHOT (1U << 10)
+#define KPERF_SAMPLER_SYS_MEM (1U << 11)
+#define KPERF_SAMPLER_TH_INSCYC (1U << 12)
+#define KPERF_SAMPLER_TK_INFO (1U << 13)
+
+// Maximum number of kperf action ids.
+#define KPERF_ACTION_MAX (32)
+
+// Maximum number of kperf timer ids.
+#define KPERF_TIMER_MAX (8)
+
+// x86/arm config registers are 64-bit
+typedef u64 kpc_config_t;
+
+/// Print current CPU identification string to the buffer (same as snprintf),
+/// such as "cpu_7_8_10b282dc_46". This string can be used to locate the PMC
+/// database in /usr/share/kpep.
+/// @return string's length, or negative value if error occurs.
+/// @note This method does not requires root privileges.
+/// @details sysctl get(hw.cputype), get(hw.cpusubtype),
+///                 get(hw.cpufamily), get(machdep.cpu.model)
+static int (*kpc_cpu_string)(char *buf, usize buf_size);
+
+/// Get the version of KPC that's being run.
+/// @return See `PMU version constants` above.
+/// @details sysctl get(kpc.pmu_version)
+static u32 (*kpc_pmu_version)(void);
+
+/// Get running PMC classes.
+/// @return See `class mask constants` above,
+///         0 if error occurs or no class is set.
+/// @details sysctl get(kpc.counting)
+static u32 (*kpc_get_counting)(void);
+
+/// Set PMC classes to enable counting.
+/// @param classes See `class mask constants` above, set 0 to shutdown counting.
+/// @return 0 for success.
+/// @details sysctl set(kpc.counting)
+static int (*kpc_set_counting)(u32 classes);
+
+/// Get running PMC classes for current thread.
+/// @return See `class mask constants` above,
+///         0 if error occurs or no class is set.
+/// @details sysctl get(kpc.thread_counting)
+static u32 (*kpc_get_thread_counting)(void);
+
+/// Set PMC classes to enable counting for current thread.
+/// @param classes See `class mask constants` above, set 0 to shutdown counting.
+/// @return 0 for success.
+/// @details sysctl set(kpc.thread_counting)
+static int (*kpc_set_thread_counting)(u32 classes);
+
+/// Get how many config registers there are for a given mask.
+/// For example: Intel may returns 1 for `KPC_CLASS_FIXED_MASK`,
+///                        returns 4 for `KPC_CLASS_CONFIGURABLE_MASK`.
+/// @param classes See `class mask constants` above.
+/// @return 0 if error occurs or no class is set.
+/// @note This method does not requires root privileges.
+/// @details sysctl get(kpc.config_count)
+static u32 (*kpc_get_config_count)(u32 classes);
+
+/// Get config registers.
+/// @param classes see `class mask constants` above.
+/// @param config Config buffer to receive values, should not smaller than
+///               kpc_get_config_count(classes) * sizeof(kpc_config_t).
+/// @return 0 for success.
+/// @details sysctl get(kpc.config_count), get(kpc.config)
+static int (*kpc_get_config)(u32 classes, kpc_config_t *config);
+
+/// Set config registers.
+/// @param classes see `class mask constants` above.
+/// @param config Config buffer, should not smaller than
+///               kpc_get_config_count(classes) * sizeof(kpc_config_t).
+/// @return 0 for success.
+/// @details sysctl get(kpc.config_count), set(kpc.config)
+static int (*kpc_set_config)(u32 classes, kpc_config_t *config);
+
+/// Get how many counters there are for a given mask.
+/// For example: Intel may returns 3 for `KPC_CLASS_FIXED_MASK`,
+///                        returns 4 for `KPC_CLASS_CONFIGURABLE_MASK`.
+/// @param classes See `class mask constants` above.
+/// @note This method does not requires root privileges.
+/// @details sysctl get(kpc.counter_count)
+static u32 (*kpc_get_counter_count)(u32 classes);
+
+/// Get counter accumulations.
+/// If `all_cpus` is true, the buffer count should not smaller than
+/// (cpu_count * counter_count). Otherwize, the buffer count should not smaller
+/// than (counter_count).
+/// @see kpc_get_counter_count(), kpc_cpu_count().
+/// @param all_cpus true for all CPUs, false for current cpu.
+/// @param classes See `class mask constants` above.
+/// @param curcpu A pointer to receive current cpu id, can be NULL.
+/// @param buf Buffer to receive counter's value.
+/// @return 0 for success.
+/// @details sysctl get(hw.ncpu), get(kpc.counter_count), get(kpc.counters)
+static int (*kpc_get_cpu_counters)(bool all_cpus, u32 classes, int *curcpu,
+                                   u64 *buf);
+
+/// Get counter accumulations for current thread.
+/// @param tid Thread id, should be 0.
+/// @param buf_count The number of buf's elements (not bytes),
+///                  should not smaller than kpc_get_counter_count().
+/// @param buf Buffer to receive counter's value.
+/// @return 0 for success.
+/// @details sysctl get(kpc.thread_counters)
+static int (*kpc_get_thread_counters)(u32 tid, u32 buf_count, u64 *buf);
+
+/// Acquire/release the counters used by the Power Manager.
+/// @param val 1:acquire, 0:release
+/// @return 0 for success.
+/// @details sysctl set(kpc.force_all_ctrs)
+static int (*kpc_force_all_ctrs_set)(int val);
+
+/// Get the state of all_ctrs.
+/// @return 0 for success.
+/// @details sysctl get(kpc.force_all_ctrs)
+static int (*kpc_force_all_ctrs_get)(int *val_out);
+
+/// Set number of actions, should be `KPERF_ACTION_MAX`.
+/// @details sysctl set(kperf.action.count)
+static int (*kperf_action_count_set)(u32 count);
+
+/// Get number of actions.
+/// @details sysctl get(kperf.action.count)
+static int (*kperf_action_count_get)(u32 *count);
+
+/// Set what to sample when a trigger fires an action, e.g.
+/// `KPERF_SAMPLER_PMC_CPU`.
+/// @details sysctl set(kperf.action.samplers)
+static int (*kperf_action_samplers_set)(u32 actionid, u32 sample);
+
+/// Get what to sample when a trigger fires an action.
+/// @details sysctl get(kperf.action.samplers)
+static int (*kperf_action_samplers_get)(u32 actionid, u32 *sample);
+
+/// Apply a task filter to the action, -1 to disable filter.
+/// @details sysctl set(kperf.action.filter_by_task)
+static int (*kperf_action_filter_set_by_task)(u32 actionid, i32 port);
+
+/// Apply a pid filter to the action, -1 to disable filter.
+/// @details sysctl set(kperf.action.filter_by_pid)
+static int (*kperf_action_filter_set_by_pid)(u32 actionid, i32 pid);
+
+/// Set number of time triggers, should be `KPERF_TIMER_MAX`.
+/// @details sysctl set(kperf.timer.count)
+static int (*kperf_timer_count_set)(u32 count);
+
+/// Get number of time triggers.
+/// @details sysctl get(kperf.timer.count)
+static int (*kperf_timer_count_get)(u32 *count);
+
+/// Set timer number and period.
+/// @details sysctl set(kperf.timer.period)
+static int (*kperf_timer_period_set)(u32 actionid, u64 tick);
+
+/// Get timer number and period.
+/// @details sysctl get(kperf.timer.period)
+static int (*kperf_timer_period_get)(u32 actionid, u64 *tick);
+
+/// Set timer number and actionid.
+/// @details sysctl set(kperf.timer.action)
+static int (*kperf_timer_action_set)(u32 actionid, u32 timerid);
+
+/// Get timer number and actionid.
+/// @details sysctl get(kperf.timer.action)
+static int (*kperf_timer_action_get)(u32 actionid, u32 *timerid);
+
+/// Set which timer ID does PET (Profile Every Thread).
+/// @details sysctl set(kperf.timer.pet_timer)
+static int (*kperf_timer_pet_set)(u32 timerid);
+
+/// Get which timer ID does PET (Profile Every Thread).
+/// @details sysctl get(kperf.timer.pet_timer)
+static int (*kperf_timer_pet_get)(u32 *timerid);
+
+/// Enable or disable sampling.
+/// @details sysctl set(kperf.sampling)
+static int (*kperf_sample_set)(u32 enabled);
+
+/// Get is currently sampling.
+/// @details sysctl get(kperf.sampling)
+static int (*kperf_sample_get)(u32 *enabled);
+
+/// Reset kperf: stop sampling, kdebug, timers and actions.
+/// @return 0 for success.
+static int (*kperf_reset)(void);
+
+/// Nanoseconds to CPU ticks.
+static u64 (*kperf_ns_to_ticks)(u64 ns);
+
+/// CPU ticks to nanoseconds.
+static u64 (*kperf_ticks_to_ns)(u64 ticks);
+
+/// CPU ticks frequency (mach_absolute_time).
+static u64 (*kperf_tick_frequency)(void);
+
+// -----------------------------------------------------------------------------
+// <kperfdata.framework> header (reverse engineered)
+// This framework provides some functions to access the local CPU database.
+// These functions do not require root privileges.
+// -----------------------------------------------------------------------------
+
+// KPEP CPU archtecture constants.
+#define KPEP_ARCH_I386 0
+#define KPEP_ARCH_X86_64 1
+#define KPEP_ARCH_ARM 2
+#define KPEP_ARCH_ARM64 3
+
+/// KPEP event (size: 48/28 bytes on 64/32 bit OS)
+typedef struct kpep_event {
+  const char *name;  ///< Unique name of a event, such as "INST_RETIRED.ANY".
+  const char *description;  ///< Description for this event.
+  const char *errata;       ///< Errata, currently NULL.
+  const char *alias;        ///< Alias name, such as "Instructions", "Cycles".
+  const char *fallback;     ///< Fallback event name for fixed counter.
+  u32 mask;
+  u8 number;
+  u8 umask;
+  u8 reserved;
+  u8 is_fixed;
+} kpep_event;
+
+/// KPEP database (size: 144/80 bytes on 64/32 bit OS)
+typedef struct kpep_db {
+  const char *name;            ///< Database name, such as "haswell".
+  const char *cpu_id;          ///< Plist name, such as "cpu_7_8_10b282dc".
+  const char *marketing_name;  ///< Marketing name, such as "Intel Haswell".
+  void *plist_data;            ///< Plist data (CFDataRef), currently NULL.
+  void *event_map;  ///< All events (CFDict<CFSTR(event_name), kpep_event *>).
+  kpep_event
+      *event_arr;  ///< Event struct buffer (sizeof(kpep_event) * events_count).
+  kpep_event **fixed_event_arr;  ///< Fixed counter events (sizeof(kpep_event *)
+                                 ///< * fixed_counter_count)
+  void *alias_map;  ///< All aliases (CFDict<CFSTR(event_name), kpep_event *>).
+  usize reserved_1;
+  usize reserved_2;
+  usize reserved_3;
+  usize event_count;  ///< All events count.
+  usize alias_count;
+  usize fixed_counter_count;
+  usize config_counter_count;
+  usize power_counter_count;
+  u32 archtecture;  ///< see `KPEP CPU archtecture constants` above.
+  u32 fixed_counter_bits;
+  u32 config_counter_bits;
+  u32 power_counter_bits;
+} kpep_db;
+
+/// KPEP config (size: 80/44 bytes on 64/32 bit OS)
+typedef struct kpep_config {
+  kpep_db *db;
+  kpep_event **ev_arr;  ///< (sizeof(kpep_event *) * counter_count), init NULL
+  usize *ev_map;        ///< (sizeof(usize *) * counter_count), init 0
+  usize *ev_idx;        ///< (sizeof(usize *) * counter_count), init -1
+  u32 *flags;           ///< (sizeof(u32 *) * counter_count), init 0
+  u64 *kpc_periods;     ///< (sizeof(u64 *) * counter_count), init 0
+  usize event_count;    /// kpep_config_events_count()
+  usize counter_count;
+  u32 classes;  ///< See `class mask constants` above.
+  u32 config_counter;
+  u32 power_counter;
+  u32 reserved;
+} kpep_config;
+
+/// Error code for kpep_config_xxx() and kpep_db_xxx() functions.
+typedef enum {
+  KPEP_CONFIG_ERROR_NONE = 0,
+  KPEP_CONFIG_ERROR_INVALID_ARGUMENT = 1,
+  KPEP_CONFIG_ERROR_OUT_OF_MEMORY = 2,
+  KPEP_CONFIG_ERROR_IO = 3,
+  KPEP_CONFIG_ERROR_BUFFER_TOO_SMALL = 4,
+  KPEP_CONFIG_ERROR_CUR_SYSTEM_UNKNOWN = 5,
+  KPEP_CONFIG_ERROR_DB_PATH_INVALID = 6,
+  KPEP_CONFIG_ERROR_DB_NOT_FOUND = 7,
+  KPEP_CONFIG_ERROR_DB_ARCH_UNSUPPORTED = 8,
+  KPEP_CONFIG_ERROR_DB_VERSION_UNSUPPORTED = 9,
+  KPEP_CONFIG_ERROR_DB_CORRUPT = 10,
+  KPEP_CONFIG_ERROR_EVENT_NOT_FOUND = 11,
+  KPEP_CONFIG_ERROR_CONFLICTING_EVENTS = 12,
+  KPEP_CONFIG_ERROR_COUNTERS_NOT_FORCED = 13,
+  KPEP_CONFIG_ERROR_EVENT_UNAVAILABLE = 14,
+  KPEP_CONFIG_ERROR_ERRNO = 15,
+  KPEP_CONFIG_ERROR_MAX
+} kpep_config_error_code;
+
+/// Error description for kpep_config_error_code.
+static const char *kpep_config_error_names[KPEP_CONFIG_ERROR_MAX] = {
+    "none",
+    "invalid argument",
+    "out of memory",
+    "I/O",
+    "buffer too small",
+    "current system unknown",
+    "database path invalid",
+    "database not found",
+    "database architecture unsupported",
+    "database version unsupported",
+    "database corrupt",
+    "event not found",
+    "conflicting events",
+    "all counters must be forced",
+    "event unavailable",
+    "check errno"};
+
+/// Error description.
+static const char *kpep_config_error_desc(int code) {
+  if (0 <= code && code < KPEP_CONFIG_ERROR_MAX) {
+    return kpep_config_error_names[code];
+  }
+  return "unknown error";
+}
+
+/// Create a config.
+/// @param db A kpep db, see kpep_db_create()
+/// @param cfg_ptr A pointer to receive the new config.
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_config_create)(kpep_db *db, kpep_config **cfg_ptr);
+
+/// Free the config.
+static void (*kpep_config_free)(kpep_config *cfg);
+
+/// Add an event to config.
+/// @param cfg The config.
+/// @param ev_ptr A event pointer.
+/// @param flag 0: all, 1: user space only
+/// @param err Error bitmap pointer, can be NULL.
+///            If return value is `CONFLICTING_EVENTS`, this bitmap contains
+///            the conflicted event indices, e.g. "1 << 2" means index 2.
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_config_add_event)(kpep_config *cfg, kpep_event **ev_ptr,
+                                    u32 flag, u32 *err);
+
+/// Remove event at index.
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_config_remove_event)(kpep_config *cfg, usize idx);
+
+/// Force all counters.
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_config_force_counters)(kpep_config *cfg);
+
+/// Get events count.
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_config_events_count)(kpep_config *cfg, usize *count_ptr);
+
+/// Get all event pointers.
+/// @param buf A buffer to receive event pointers.
+/// @param buf_size The buffer's size in bytes, should not smaller than
+///                 kpep_config_events_count() * sizeof(void *).
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_config_events)(kpep_config *cfg, kpep_event **buf,
+                                 usize buf_size);
+
+/// Get kpc register configs.
+/// @param buf A buffer to receive kpc register configs.
+/// @param buf_size The buffer's size in bytes, should not smaller than
+///                 kpep_config_kpc_count() * sizeof(kpc_config_t).
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_config_kpc)(kpep_config *cfg, kpc_config_t *buf,
+                              usize buf_size);
+
+/// Get kpc register config count.
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_config_kpc_count)(kpep_config *cfg, usize *count_ptr);
+
+/// Get kpc classes.
+/// @param classes See `class mask constants` above.
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_config_kpc_classes)(kpep_config *cfg, u32 *classes_ptr);
+
+/// Get the index mapping from event to counter.
+/// @param buf A buffer to receive indexes.
+/// @param buf_size The buffer's size in bytes, should not smaller than
+///                 kpep_config_events_count() * sizeof(kpc_config_t).
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_config_kpc_map)(kpep_config *cfg, usize *buf, usize buf_size);
+
+/// Open a kpep database file in "/usr/share/kpep/" or "/usr/local/share/kpep/".
+/// @param name File name, for example "haswell", "cpu_100000c_1_92fb37c8".
+///             Pass NULL for current CPU.
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_db_create)(const char *name, kpep_db **db_ptr);
+
+/// Free the kpep database.
+static void (*kpep_db_free)(kpep_db *db);
+
+/// Get the database's name.
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_db_name)(kpep_db *db, const char **name);
+
+/// Get the event alias count.
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_db_aliases_count)(kpep_db *db, usize *count);
+
+/// Get all alias.
+/// @param buf A buffer to receive all alias strings.
+/// @param buf_size The buffer's size in bytes,
+///        should not smaller than kpep_db_aliases_count() * sizeof(void *).
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_db_aliases)(kpep_db *db, const char **buf, usize buf_size);
+
+/// Get counters count for given classes.
+/// @param classes 1: Fixed, 2: Configurable.
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_db_counters_count)(kpep_db *db, u8 classes, usize *count);
+
+/// Get all event count.
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_db_events_count)(kpep_db *db, usize *count);
+
+/// Get all events.
+/// @param buf A buffer to receive all event pointers.
+/// @param buf_size The buffer's size in bytes,
+///        should not smaller than kpep_db_events_count() * sizeof(void *).
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_db_events)(kpep_db *db, kpep_event **buf, usize buf_size);
+
+/// Get one event by name.
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_db_event)(kpep_db *db, const char *name, kpep_event **ev_ptr);
+
+/// Get event's name.
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_event_name)(kpep_event *ev, const char **name_ptr);
+
+/// Get event's alias.
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_event_alias)(kpep_event *ev, const char **alias_ptr);
+
+/// Get event's description.
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_event_description)(kpep_event *ev, const char **str_ptr);
+
+// -----------------------------------------------------------------------------
+// load kperf/kperfdata dynamic library
+// -----------------------------------------------------------------------------
+
+typedef struct {
+  const char *name;
+  void **impl;
+} lib_symbol;
+
+#define lib_nelems(x) (sizeof(x) / sizeof((x)[0]))
+#define lib_symbol_def(name) \
+  {                          \
+#name, (void **)&name    \
+  }
+
+static const lib_symbol lib_symbols_kperf[] = {
+    lib_symbol_def(kpc_pmu_version),
+    lib_symbol_def(kpc_cpu_string),
+    lib_symbol_def(kpc_set_counting),
+    lib_symbol_def(kpc_get_counting),
+    lib_symbol_def(kpc_set_thread_counting),
+    lib_symbol_def(kpc_get_thread_counting),
+    lib_symbol_def(kpc_get_config_count),
+    lib_symbol_def(kpc_get_counter_count),
+    lib_symbol_def(kpc_set_config),
+    lib_symbol_def(kpc_get_config),
+    lib_symbol_def(kpc_get_cpu_counters),
+    lib_symbol_def(kpc_get_thread_counters),
+    lib_symbol_def(kpc_force_all_ctrs_set),
+    lib_symbol_def(kpc_force_all_ctrs_get),
+    lib_symbol_def(kperf_action_count_set),
+    lib_symbol_def(kperf_action_count_get),
+    lib_symbol_def(kperf_action_samplers_set),
+    lib_symbol_def(kperf_action_samplers_get),
+    lib_symbol_def(kperf_action_filter_set_by_task),
+    lib_symbol_def(kperf_action_filter_set_by_pid),
+    lib_symbol_def(kperf_timer_count_set),
+    lib_symbol_def(kperf_timer_count_get),
+    lib_symbol_def(kperf_timer_period_set),
+    lib_symbol_def(kperf_timer_period_get),
+    lib_symbol_def(kperf_timer_action_set),
+    lib_symbol_def(kperf_timer_action_get),
+    lib_symbol_def(kperf_sample_set),
+    lib_symbol_def(kperf_sample_get),
+    lib_symbol_def(kperf_reset),
+    lib_symbol_def(kperf_timer_pet_set),
+    lib_symbol_def(kperf_timer_pet_get),
+    lib_symbol_def(kperf_ns_to_ticks),
+    lib_symbol_def(kperf_ticks_to_ns),
+    lib_symbol_def(kperf_tick_frequency),
+};
+
+static const lib_symbol lib_symbols_kperfdata[] = {
+    lib_symbol_def(kpep_config_create),
+    lib_symbol_def(kpep_config_free),
+    lib_symbol_def(kpep_config_add_event),
+    lib_symbol_def(kpep_config_remove_event),
+    lib_symbol_def(kpep_config_force_counters),
+    lib_symbol_def(kpep_config_events_count),
+    lib_symbol_def(kpep_config_events),
+    lib_symbol_def(kpep_config_kpc),
+    lib_symbol_def(kpep_config_kpc_count),
+    lib_symbol_def(kpep_config_kpc_classes),
+    lib_symbol_def(kpep_config_kpc_map),
+    lib_symbol_def(kpep_db_create),
+    lib_symbol_def(kpep_db_free),
+    lib_symbol_def(kpep_db_name),
+    lib_symbol_def(kpep_db_aliases_count),
+    lib_symbol_def(kpep_db_aliases),
+    lib_symbol_def(kpep_db_counters_count),
+    lib_symbol_def(kpep_db_events_count),
+    lib_symbol_def(kpep_db_events),
+    lib_symbol_def(kpep_db_event),
+    lib_symbol_def(kpep_event_name),
+    lib_symbol_def(kpep_event_alias),
+    lib_symbol_def(kpep_event_description),
+};
+
+#define lib_path_kperf "/System/Library/PrivateFrameworks/kperf.framework/kperf"
+#define lib_path_kperfdata \
+  "/System/Library/PrivateFrameworks/kperfdata.framework/kperfdata"
+
+static bool lib_inited = false;
+static bool lib_has_err = false;
+static char lib_err_msg[256];
+
+static void *lib_handle_kperf = NULL;
+static void *lib_handle_kperfdata = NULL;
+
+static void lib_deinit(void) {
+  lib_inited = false;
+  lib_has_err = false;
+  if (lib_handle_kperf) dlclose(lib_handle_kperf);
+  if (lib_handle_kperfdata) dlclose(lib_handle_kperfdata);
+  lib_handle_kperf = NULL;
+  lib_handle_kperfdata = NULL;
+  for (usize i = 0; i < lib_nelems(lib_symbols_kperf); i++) {
+    const lib_symbol *symbol = &lib_symbols_kperf[i];
+    *symbol->impl = NULL;
+  }
+  for (usize i = 0; i < lib_nelems(lib_symbols_kperfdata); i++) {
+    const lib_symbol *symbol = &lib_symbols_kperfdata[i];
+    *symbol->impl = NULL;
+  }
+}
+
+static bool lib_init(void) {
+#define return_err()    \
+  do {                  \
+    lib_deinit();       \
+    lib_inited = true;  \
+    lib_has_err = true; \
+    return false;       \
+  } while (false)
+
+  if (lib_inited) return !lib_has_err;
+
+  // load dynamic library
+  lib_handle_kperf = dlopen(lib_path_kperf, RTLD_LAZY);
+  if (!lib_handle_kperf) {
+    snprintf(lib_err_msg, sizeof(lib_err_msg),
+             "Failed to load kperf.framework, message: %s.", dlerror());
+    return_err();
+  }
+  lib_handle_kperfdata = dlopen(lib_path_kperfdata, RTLD_LAZY);
+  if (!lib_handle_kperfdata) {
+    snprintf(lib_err_msg, sizeof(lib_err_msg),
+             "Failed to load kperfdata.framework, message: %s.", dlerror());
+    return_err();
+  }
+
+  // load symbol address from dynamic library
+  for (usize i = 0; i < lib_nelems(lib_symbols_kperf); i++) {
+    const lib_symbol *symbol = &lib_symbols_kperf[i];
+    *symbol->impl = dlsym(lib_handle_kperf, symbol->name);
+    if (!*symbol->impl) {
+      snprintf(lib_err_msg, sizeof(lib_err_msg),
+               "Failed to load kperf function: %s.", symbol->name);
+      return_err();
+    }
+  }
+  for (usize i = 0; i < lib_nelems(lib_symbols_kperfdata); i++) {
+    const lib_symbol *symbol = &lib_symbols_kperfdata[i];
+    *symbol->impl = dlsym(lib_handle_kperfdata, symbol->name);
+    if (!*symbol->impl) {
+      snprintf(lib_err_msg, sizeof(lib_err_msg),
+               "Failed to load kperfdata function: %s.", symbol->name);
+      return_err();
+    }
+  }
+
+  lib_inited = true;
+  lib_has_err = false;
+  return true;
+
+#undef return_err
+}
+
+// -----------------------------------------------------------------------------
+// kdebug private structs
+// https://github.com/apple/darwin-xnu/blob/main/bsd/sys_private/kdebug_private.h
+// -----------------------------------------------------------------------------
+
+/*
+ * Ensure that both LP32 and LP64 variants of arm64 use the same kd_buf
+ * structure.
+ */
+#if defined(__arm64__)
+typedef uint64_t kd_buf_argtype;
+#else
+typedef uintptr_t kd_buf_argtype;
+#endif
+
+typedef struct {
+  uint64_t timestamp;
+  kd_buf_argtype arg1;
+  kd_buf_argtype arg2;
+  kd_buf_argtype arg3;
+  kd_buf_argtype arg4;
+  kd_buf_argtype arg5; /* the thread ID */
+  uint32_t debugid;    /* see <sys/kdebug.h> */
+
+/*
+ * Ensure that both LP32 and LP64 variants of arm64 use the same kd_buf
+ * structure.
+ */
+#if defined(__LP64__) || defined(__arm64__)
+  uint32_t cpuid; /* cpu index, from 0 */
+  kd_buf_argtype unused;
+#endif
+} kd_buf;
+
+/* bits for the type field of kd_regtype */
+#define KDBG_CLASSTYPE 0x10000
+#define KDBG_SUBCLSTYPE 0x20000
+#define KDBG_RANGETYPE 0x40000
+#define KDBG_TYPENONE 0x80000
+#define KDBG_CKTYPES 0xF0000
+
+/* only trace at most 4 types of events, at the code granularity */
+#define KDBG_VALCHECK 0x00200000U
+
+typedef struct {
+  unsigned int type;
+  unsigned int value1;
+  unsigned int value2;
+  unsigned int value3;
+  unsigned int value4;
+} kd_regtype;
+
+typedef struct {
+  /* number of events that can fit in the buffers */
+  int nkdbufs;
+  /* set if trace is disabled */
+  int nolog;
+  /* kd_ctrl_page.flags */
+  unsigned int flags;
+  /* number of threads in thread map */
+  int nkdthreads;
+  /* the owning pid */
+  int bufid;
+} kbufinfo_t;
+
+
+// -----------------------------------------------------------------------------
+// Demo
+// -----------------------------------------------------------------------------
+
+#define EVENT_NAME_MAX 8
+typedef struct {
+  const char *alias;                  /// name for print
+  const char *names[EVENT_NAME_MAX];  /// name from pmc db
+} event_alias;
+
+/// Event names from /usr/share/kpep/<name>.plist
+static const event_alias profile_events[] = {
+    {"cycles",
+     {
+         "FIXED_CYCLES",             // Apple A7-A15
+         "CPU_CLK_UNHALTED.THREAD",  // Intel Core 1th-10th
+         "CPU_CLK_UNHALTED.CORE",    // Intel Yonah, Merom
+     }},
+    {"instructions",
+     {
+         "FIXED_INSTRUCTIONS",  // Apple A7-A15
+         "INST_RETIRED.ANY"     // Intel Yonah, Merom, Core 1th-10th
+     }},
+    {"branches",
+     {
+         "INST_BRANCH",                   // Apple A7-A15
+         "BR_INST_RETIRED.ALL_BRANCHES",  // Intel Core 1th-10th
+         "INST_RETIRED.ANY",              // Intel Yonah, Merom
+     }},
+    {"branch-misses",
+     {
+         "BRANCH_MISPRED_NONSPEC",  // Apple A7-A15, since iOS 15, macOS 12
+         "BRANCH_MISPREDICT",       // Apple A7-A14
+         "BR_MISP_RETIRED.ALL_BRANCHES",  // Intel Core 2th-10th
+         "BR_INST_RETIRED.MISPRED",       // Intel Yonah, Merom
+     }},
+};
+
+static kpep_event *get_event(kpep_db *db, const event_alias *alias) {
+  for (usize j = 0; j < EVENT_NAME_MAX; j++) {
+    const char *name = alias->names[j];
+    if (!name) break;
+    kpep_event *ev = NULL;
+    if (kpep_db_event(db, name, &ev) == 0) {
+      return ev;
+    }
+  }
+  return NULL;
+}
+
+struct AppleEvents {
+  kpc_config_t regs[KPC_MAX_COUNTERS] = {0};
+  usize counter_map[KPC_MAX_COUNTERS] = {0};
+  u64 counters_0[KPC_MAX_COUNTERS] = {0};
+  u64 counters_1[KPC_MAX_COUNTERS] = {0};
+  static constexpr usize ev_count =
+      sizeof(profile_events) / sizeof(profile_events[0]);
+
+  inline bool setup_performance_counters() {
+    static bool init = false;
+    static bool worked = false;
+
+    if (init) {
+      return worked;
+    }
+    init = true;
+
+    // load dylib
+    if (!lib_init()) {
+      printf("Error: %s\n", lib_err_msg);
+      return (worked = false);
+    }
+
+    // check permission
+    int force_ctrs = 0;
+    if (kpc_force_all_ctrs_get(&force_ctrs)) {
+      printf("Permission denied, xnu/kpc requires root privileges.\n");
+      return (worked = false);
+    }
+    int ret;
+    // load pmc db
+    kpep_db *db = NULL;
+    if ((ret = kpep_db_create(NULL, &db))) {
+      printf("Error: cannot load pmc database: %d.\n", ret);
+      return (worked = false);
+    }
+    // printf("loaded db: %s (%s)\n", db->name, db->marketing_name);
+    // printf("number of fixed counters: %zu\n", db->fixed_counter_count);
+    // printf("number of configurable counters: %zu\n",
+    // db->config_counter_count);
+
+    // create a config
+    kpep_config *cfg = NULL;
+    if ((ret = kpep_config_create(db, &cfg))) {
+      printf("Failed to create kpep config: %d (%s).\n", ret,
+             kpep_config_error_desc(ret));
+      return (worked = false);
+    }
+    if ((ret = kpep_config_force_counters(cfg))) {
+      printf("Failed to force counters: %d (%s).\n", ret,
+             kpep_config_error_desc(ret));
+      return (worked = false);
+    }
+
+    // get events
+    kpep_event *ev_arr[ev_count] = {0};
+    for (usize i = 0; i < ev_count; i++) {
+      const event_alias *alias = profile_events + i;
+      ev_arr[i] = get_event(db, alias);
+      if (!ev_arr[i]) {
+        printf("Cannot find event: %s.\n", alias->alias);
+        return (worked = false);
+      }
+    }
+
+    // add event to config
+    for (usize i = 0; i < ev_count; i++) {
+      kpep_event *ev = ev_arr[i];
+      if ((ret = kpep_config_add_event(cfg, &ev, 0, NULL))) {
+        printf("Failed to add event: %d (%s).\n", ret,
+               kpep_config_error_desc(ret));
+        return (worked = false);
+      }
+    }
+
+    // prepare buffer and config
+    u32 classes = 0;
+    usize reg_count = 0;
+    if ((ret = kpep_config_kpc_classes(cfg, &classes))) {
+      printf("Failed get kpc classes: %d (%s).\n", ret,
+             kpep_config_error_desc(ret));
+      return (worked = false);
+    }
+    if ((ret = kpep_config_kpc_count(cfg, &reg_count))) {
+      printf("Failed get kpc count: %d (%s).\n", ret,
+             kpep_config_error_desc(ret));
+      return (worked = false);
+    }
+    if ((ret = kpep_config_kpc_map(cfg, counter_map, sizeof(counter_map)))) {
+      printf("Failed get kpc map: %d (%s).\n", ret,
+             kpep_config_error_desc(ret));
+      return (worked = false);
+    }
+    if ((ret = kpep_config_kpc(cfg, regs, sizeof(regs)))) {
+      printf("Failed get kpc registers: %d (%s).\n", ret,
+             kpep_config_error_desc(ret));
+      return (worked = false);
+    }
+
+    // set config to kernel
+    if ((ret = kpc_force_all_ctrs_set(1))) {
+      printf("Failed force all ctrs: %d.\n", ret);
+      return (worked = false);
+    }
+    if ((classes & KPC_CLASS_CONFIGURABLE_MASK) && reg_count) {
+      if ((ret = kpc_set_config(classes, regs))) {
+        printf("Failed set kpc config: %d.\n", ret);
+        return (worked = false);
+      }
+    }
+
+    // start counting
+    if ((ret = kpc_set_counting(classes))) {
+      printf("Failed set counting: %d.\n", ret);
+      return (worked = false);
+    }
+    if ((ret = kpc_set_thread_counting(classes))) {
+      printf("Failed set thread counting: %d.\n", ret);
+      return (worked = false);
+    }
+
+    return (worked = true);
+  }
+
+  inline performance_counters get_counters() {
+    static bool warned = false;
+    int ret;
+    // get counters before
+    if ((ret = kpc_get_thread_counters(0, KPC_MAX_COUNTERS, counters_0))) {
+      if (!warned) {
+        printf("Failed get thread counters before: %d.\n", ret);
+        warned = true;
+      }
+      return 1;
+    }
+    return performance_counters{
+        counters_0[counter_map[0]], counters_0[counter_map[3]],
+        counters_0[counter_map[2]], counters_0[counter_map[1]]};
+  }
+};
+
+#endif
diff --git a/microbenchmarks/performancecounters/event_counter.h b/microbenchmarks/performancecounters/event_counter.h
new file mode 100644
index 000000000..63e605690
--- /dev/null
+++ b/microbenchmarks/performancecounters/event_counter.h
@@ -0,0 +1,150 @@
+#ifndef __EVENT_COUNTER_H
+#define __EVENT_COUNTER_H
+
+#include <cctype>
+#ifndef _MSC_VER
+#include <dirent.h>
+#endif
+#include <cinttypes>
+
+#include <cstring>
+
+#include <chrono>
+#include <vector>
+
+#include "linux-perf-events.h"
+#ifdef __linux__
+#include <libgen.h>
+#endif
+
+#if __APPLE__ && __aarch64__
+#include "apple_arm_events.h"
+#endif
+
+struct event_count {
+  std::chrono::duration<double> elapsed;
+  std::vector<unsigned long long> event_counts;
+  event_count() : elapsed(0), event_counts{0, 0, 0, 0, 0} {}
+  event_count(const std::chrono::duration<double> _elapsed,
+              const std::vector<unsigned long long> _event_counts)
+      : elapsed(_elapsed), event_counts(_event_counts) {}
+  event_count(const event_count& other)
+      : elapsed(other.elapsed), event_counts(other.event_counts) {}
+
+  // The types of counters (so we can read the getter more easily)
+  enum event_counter_types {
+    CPU_CYCLES,
+    INSTRUCTIONS,
+  };
+
+  double elapsed_sec() const {
+    return std::chrono::duration<double>(elapsed).count();
+  }
+  double elapsed_ns() const {
+    return std::chrono::duration<double, std::nano>(elapsed).count();
+  }
+  double cycles() const {
+    return static_cast<double>(event_counts[CPU_CYCLES]);
+  }
+  double instructions() const {
+    return static_cast<double>(event_counts[INSTRUCTIONS]);
+  }
+
+  event_count& operator=(const event_count& other) {
+    this->elapsed = other.elapsed;
+    this->event_counts = other.event_counts;
+    return *this;
+  }
+  event_count operator+(const event_count& other) const {
+    return event_count(elapsed + other.elapsed,
+                       {
+                           event_counts[0] + other.event_counts[0],
+                           event_counts[1] + other.event_counts[1],
+                           event_counts[2] + other.event_counts[2],
+                           event_counts[3] + other.event_counts[3],
+                           event_counts[4] + other.event_counts[4],
+                       });
+  }
+
+  void operator+=(const event_count& other) { *this = *this + other; }
+};
+
+struct event_aggregate {
+  bool has_events = false;
+  int iterations = 0;
+  event_count total{};
+  event_count best{};
+  event_count worst{};
+
+  event_aggregate() = default;
+
+  void operator<<(const event_count& other) {
+    if (iterations == 0 || other.elapsed < best.elapsed) {
+      best = other;
+    }
+    if (iterations == 0 || other.elapsed > worst.elapsed) {
+      worst = other;
+    }
+    iterations++;
+    total += other;
+  }
+
+  double elapsed_sec() const { return total.elapsed_sec() / iterations; }
+  double elapsed_ns() const { return total.elapsed_ns() / iterations; }
+  double cycles() const { return total.cycles() / iterations; }
+  double instructions() const { return total.instructions() / iterations; }
+};
+
+struct event_collector {
+  event_count count{};
+  std::chrono::time_point<std::chrono::steady_clock> start_clock{};
+
+#if defined(__linux__)
+  LinuxEvents<PERF_TYPE_HARDWARE> linux_events;
+  event_collector()
+      : linux_events(std::vector<int>{
+            PERF_COUNT_HW_CPU_CYCLES,
+            PERF_COUNT_HW_INSTRUCTIONS,
+        }) {}
+  bool has_events() { return linux_events.is_working(); }
+#elif __APPLE__ && __aarch64__
+  AppleEvents apple_events;
+  performance_counters diff;
+  event_collector() : diff(0) { apple_events.setup_performance_counters(); }
+  bool has_events() { return apple_events.setup_performance_counters(); }
+#else
+  event_collector() {}
+  bool has_events() { return false; }
+#endif
+
+  inline void start() {
+#if defined(__linux)
+    linux_events.start();
+#elif __APPLE__ && __aarch64__
+    if (has_events()) {
+      diff = apple_events.get_counters();
+    }
+#endif
+    start_clock = std::chrono::steady_clock::now();
+  }
+  inline event_count& end() {
+    const auto end_clock = std::chrono::steady_clock::now();
+#if defined(__linux)
+    linux_events.end(count.event_counts);
+#elif __APPLE__ && __aarch64__
+    if (has_events()) {
+      performance_counters end = apple_events.get_counters();
+      diff = end - diff;
+    }
+    count.event_counts[0] = diff.cycles;
+    count.event_counts[1] = diff.instructions;
+    count.event_counts[2] = diff.missed_branches;
+    count.event_counts[3] = 0;
+    count.event_counts[4] = diff.branches;
+#endif
+    count.elapsed = end_clock - start_clock;
+    return count;
+  }
+};
+
+#endif
diff --git a/microbenchmarks/performancecounters/ibireme.h b/microbenchmarks/performancecounters/ibireme.h
new file mode 100644
index 000000000..363d5d03b
--- /dev/null
+++ b/microbenchmarks/performancecounters/ibireme.h
@@ -0,0 +1,917 @@
+// =============================================================================
+// XNU kperf/kpc
+// Available for 64-bit Intel/Apple Silicon, macOS/iOS, with root privileges
+//
+// References:
+//
+// XNU source (since xnu 2422.1.72):
+// https://github.com/apple/darwin-xnu/blob/main/osfmk/kern/kpc.h
+// https://github.com/apple/darwin-xnu/blob/main/bsd/kern/kern_kpc.c
+//
+// Lightweight PET (Profile Every Thread, since xnu 3789.1.32):
+// https://github.com/apple/darwin-xnu/blob/main/osfmk/kperf/pet.c
+// https://github.com/apple/darwin-xnu/blob/main/osfmk/kperf/kperf_kpc.c
+//
+// System Private frameworks (since macOS 10.11, iOS 8.0):
+// /System/Library/PrivateFrameworks/kperf.framework
+// /System/Library/PrivateFrameworks/kperfdata.framework
+//
+// Xcode framework (since Xcode 7.0):
+// /Applications/Xcode.app/Contents/SharedFrameworks/DVTInstrumentsFoundation.framework
+//
+// CPU database (plist files)
+// macOS (since macOS 10.11):
+//     /usr/share/kpep/<name>.plist
+// iOS (copied from Xcode, since iOS 10.0, Xcode 8.0):
+//     /Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform
+//     /DeviceSupport/<version>/DeveloperDiskImage.dmg/usr/share/kpep/<name>.plist
+//
+//
+// Created by YaoYuan <ibireme@gmail.com> on 2021.
+// Released into the public domain (unlicense.org).
+// =============================================================================
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <dlfcn.h>          // for dlopen() and dlsym()
+#include <mach/mach_time.h> // for mach_absolute_time()
+#include <sys/kdebug.h>     // for kdebug trace decode
+#include <sys/sysctl.h>     // for sysctl()
+#include <unistd.h>         // for usleep()
+
+typedef float f32;
+typedef double f64;
+typedef int8_t i8;
+typedef uint8_t u8;
+typedef int16_t i16;
+typedef uint16_t u16;
+typedef int32_t i32;
+typedef uint32_t u32;
+typedef int64_t i64;
+typedef uint64_t u64;
+typedef size_t usize;
+
+// -----------------------------------------------------------------------------
+// <kperf.framework> header (reverse engineered)
+// This framework wraps some sysctl calls to communicate with the kpc in kernel.
+// Most functions requires root privileges, or process is "blessed".
+// -----------------------------------------------------------------------------
+
+// Cross-platform class constants.
+#define KPC_CLASS_FIXED (0)
+#define KPC_CLASS_CONFIGURABLE (1)
+#define KPC_CLASS_POWER (2)
+#define KPC_CLASS_RAWPMU (3)
+
+// Cross-platform class mask constants.
+#define KPC_CLASS_FIXED_MASK (1u << KPC_CLASS_FIXED)               // 1
+#define KPC_CLASS_CONFIGURABLE_MASK (1u << KPC_CLASS_CONFIGURABLE) // 2
+#define KPC_CLASS_POWER_MASK (1u << KPC_CLASS_POWER)               // 4
+#define KPC_CLASS_RAWPMU_MASK (1u << KPC_CLASS_RAWPMU)             // 8
+
+// PMU version constants.
+#define KPC_PMU_ERROR (0)     // Error
+#define KPC_PMU_INTEL_V3 (1)  // Intel
+#define KPC_PMU_ARM_APPLE (2) // ARM64
+#define KPC_PMU_INTEL_V2 (3)  // Old Intel
+#define KPC_PMU_ARM_V2 (4)    // Old ARM
+
+// The maximum number of counters we could read from every class in one go.
+// ARMV7: FIXED: 1, CONFIGURABLE: 4
+// ARM32: FIXED: 2, CONFIGURABLE: 6
+// ARM64: FIXED: 2, CONFIGURABLE: CORE_NCTRS - FIXED (6 or 8)
+// x86: 32
+#define KPC_MAX_COUNTERS 32
+
+// Bits for defining what to do on an action.
+// Defined in https://github.com/apple/darwin-xnu/blob/main/osfmk/kperf/action.h
+#define KPERF_SAMPLER_TH_INFO (1U << 0)
+#define KPERF_SAMPLER_TH_SNAPSHOT (1U << 1)
+#define KPERF_SAMPLER_KSTACK (1U << 2)
+#define KPERF_SAMPLER_USTACK (1U << 3)
+#define KPERF_SAMPLER_PMC_THREAD (1U << 4)
+#define KPERF_SAMPLER_PMC_CPU (1U << 5)
+#define KPERF_SAMPLER_PMC_CONFIG (1U << 6)
+#define KPERF_SAMPLER_MEMINFO (1U << 7)
+#define KPERF_SAMPLER_TH_SCHEDULING (1U << 8)
+#define KPERF_SAMPLER_TH_DISPATCH (1U << 9)
+#define KPERF_SAMPLER_TK_SNAPSHOT (1U << 10)
+#define KPERF_SAMPLER_SYS_MEM (1U << 11)
+#define KPERF_SAMPLER_TH_INSCYC (1U << 12)
+#define KPERF_SAMPLER_TK_INFO (1U << 13)
+
+// Maximum number of kperf action ids.
+#define KPERF_ACTION_MAX (32)
+
+// Maximum number of kperf timer ids.
+#define KPERF_TIMER_MAX (8)
+
+// x86/arm config registers are 64-bit
+typedef u64 kpc_config_t;
+
+/// Print current CPU identification string to the buffer (same as snprintf),
+/// such as "cpu_7_8_10b282dc_46". This string can be used to locate the PMC
+/// database in /usr/share/kpep.
+/// @return string's length, or negative value if error occurs.
+/// @note This method does not requires root privileges.
+/// @details sysctl get(hw.cputype), get(hw.cpusubtype),
+///                 get(hw.cpufamily), get(machdep.cpu.model)
+static int (*kpc_cpu_string)(char *buf, usize buf_size);
+
+/// Get the version of KPC that's being run.
+/// @return See `PMU version constants` above.
+/// @details sysctl get(kpc.pmu_version)
+static u32 (*kpc_pmu_version)(void);
+
+/// Get running PMC classes.
+/// @return See `class mask constants` above,
+///         0 if error occurs or no class is set.
+/// @details sysctl get(kpc.counting)
+static u32 (*kpc_get_counting)(void);
+
+/// Set PMC classes to enable counting.
+/// @param classes See `class mask constants` above, set 0 to shutdown counting.
+/// @return 0 for success.
+/// @details sysctl set(kpc.counting)
+static int (*kpc_set_counting)(u32 classes);
+
+/// Get running PMC classes for current thread.
+/// @return See `class mask constants` above,
+///         0 if error occurs or no class is set.
+/// @details sysctl get(kpc.thread_counting)
+static u32 (*kpc_get_thread_counting)(void);
+
+/// Set PMC classes to enable counting for current thread.
+/// @param classes See `class mask constants` above, set 0 to shutdown counting.
+/// @return 0 for success.
+/// @details sysctl set(kpc.thread_counting)
+static int (*kpc_set_thread_counting)(u32 classes);
+
+/// Get how many config registers there are for a given mask.
+/// For example: Intel may returns 1 for `KPC_CLASS_FIXED_MASK`,
+///                        returns 4 for `KPC_CLASS_CONFIGURABLE_MASK`.
+/// @param classes See `class mask constants` above.
+/// @return 0 if error occurs or no class is set.
+/// @note This method does not requires root privileges.
+/// @details sysctl get(kpc.config_count)
+static u32 (*kpc_get_config_count)(u32 classes);
+
+/// Get config registers.
+/// @param classes see `class mask constants` above.
+/// @param config Config buffer to receive values, should not smaller than
+///               kpc_get_config_count(classes) * sizeof(kpc_config_t).
+/// @return 0 for success.
+/// @details sysctl get(kpc.config_count), get(kpc.config)
+static int (*kpc_get_config)(u32 classes, kpc_config_t *config);
+
+/// Set config registers.
+/// @param classes see `class mask constants` above.
+/// @param config Config buffer, should not smaller than
+///               kpc_get_config_count(classes) * sizeof(kpc_config_t).
+/// @return 0 for success.
+/// @details sysctl get(kpc.config_count), set(kpc.config)
+static int (*kpc_set_config)(u32 classes, kpc_config_t *config);
+
+/// Get how many counters there are for a given mask.
+/// For example: Intel may returns 3 for `KPC_CLASS_FIXED_MASK`,
+///                        returns 4 for `KPC_CLASS_CONFIGURABLE_MASK`.
+/// @param classes See `class mask constants` above.
+/// @note This method does not requires root privileges.
+/// @details sysctl get(kpc.counter_count)
+static u32 (*kpc_get_counter_count)(u32 classes);
+
+/// Get counter accumulations.
+/// If `all_cpus` is true, the buffer count should not smaller than
+/// (cpu_count * counter_count). Otherwize, the buffer count should not smaller
+/// than (counter_count).
+/// @see kpc_get_counter_count(), kpc_cpu_count().
+/// @param all_cpus true for all CPUs, false for current cpu.
+/// @param classes See `class mask constants` above.
+/// @param curcpu A pointer to receive current cpu id, can be NULL.
+/// @param buf Buffer to receive counter's value.
+/// @return 0 for success.
+/// @details sysctl get(hw.ncpu), get(kpc.counter_count), get(kpc.counters)
+static int (*kpc_get_cpu_counters)(bool all_cpus, u32 classes, int *curcpu,
+                                   u64 *buf);
+
+/// Get counter accumulations for current thread.
+/// @param tid Thread id, should be 0.
+/// @param buf_count The number of buf's elements (not bytes),
+///                  should not smaller than kpc_get_counter_count().
+/// @param buf Buffer to receive counter's value.
+/// @return 0 for success.
+/// @details sysctl get(kpc.thread_counters)
+static int (*kpc_get_thread_counters)(u32 tid, u32 buf_count, u64 *buf);
+
+/// Acquire/release the counters used by the Power Manager.
+/// @param val 1:acquire, 0:release
+/// @return 0 for success.
+/// @details sysctl set(kpc.force_all_ctrs)
+static int (*kpc_force_all_ctrs_set)(int val);
+
+/// Get the state of all_ctrs.
+/// @return 0 for success.
+/// @details sysctl get(kpc.force_all_ctrs)
+static int (*kpc_force_all_ctrs_get)(int *val_out);
+
+/// Set number of actions, should be `KPERF_ACTION_MAX`.
+/// @details sysctl set(kperf.action.count)
+static int (*kperf_action_count_set)(u32 count);
+
+/// Get number of actions.
+/// @details sysctl get(kperf.action.count)
+static int (*kperf_action_count_get)(u32 *count);
+
+/// Set what to sample when a trigger fires an action, e.g.
+/// `KPERF_SAMPLER_PMC_CPU`.
+/// @details sysctl set(kperf.action.samplers)
+static int (*kperf_action_samplers_set)(u32 actionid, u32 sample);
+
+/// Get what to sample when a trigger fires an action.
+/// @details sysctl get(kperf.action.samplers)
+static int (*kperf_action_samplers_get)(u32 actionid, u32 *sample);
+
+/// Apply a task filter to the action, -1 to disable filter.
+/// @details sysctl set(kperf.action.filter_by_task)
+static int (*kperf_action_filter_set_by_task)(u32 actionid, i32 port);
+
+/// Apply a pid filter to the action, -1 to disable filter.
+/// @details sysctl set(kperf.action.filter_by_pid)
+static int (*kperf_action_filter_set_by_pid)(u32 actionid, i32 pid);
+
+/// Set number of time triggers, should be `KPERF_TIMER_MAX`.
+/// @details sysctl set(kperf.timer.count)
+static int (*kperf_timer_count_set)(u32 count);
+
+/// Get number of time triggers.
+/// @details sysctl get(kperf.timer.count)
+static int (*kperf_timer_count_get)(u32 *count);
+
+/// Set timer number and period.
+/// @details sysctl set(kperf.timer.period)
+static int (*kperf_timer_period_set)(u32 actionid, u64 tick);
+
+/// Get timer number and period.
+/// @details sysctl get(kperf.timer.period)
+static int (*kperf_timer_period_get)(u32 actionid, u64 *tick);
+
+/// Set timer number and actionid.
+/// @details sysctl set(kperf.timer.action)
+static int (*kperf_timer_action_set)(u32 actionid, u32 timerid);
+
+/// Get timer number and actionid.
+/// @details sysctl get(kperf.timer.action)
+static int (*kperf_timer_action_get)(u32 actionid, u32 *timerid);
+
+/// Set which timer ID does PET (Profile Every Thread).
+/// @details sysctl set(kperf.timer.pet_timer)
+static int (*kperf_timer_pet_set)(u32 timerid);
+
+/// Get which timer ID does PET (Profile Every Thread).
+/// @details sysctl get(kperf.timer.pet_timer)
+static int (*kperf_timer_pet_get)(u32 *timerid);
+
+/// Enable or disable sampling.
+/// @details sysctl set(kperf.sampling)
+static int (*kperf_sample_set)(u32 enabled);
+
+/// Get is currently sampling.
+/// @details sysctl get(kperf.sampling)
+static int (*kperf_sample_get)(u32 *enabled);
+
+/// Reset kperf: stop sampling, kdebug, timers and actions.
+/// @return 0 for success.
+static int (*kperf_reset)(void);
+
+/// Nanoseconds to CPU ticks.
+static u64 (*kperf_ns_to_ticks)(u64 ns);
+
+/// CPU ticks to nanoseconds.
+static u64 (*kperf_ticks_to_ns)(u64 ticks);
+
+/// CPU ticks frequency (mach_absolute_time).
+static u64 (*kperf_tick_frequency)(void);
+
+/// Get lightweight PET mode (not in kperf.framework).
+static int kperf_lightweight_pet_get(u32 *enabled) {
+  if (!enabled)
+    return -1;
+  usize size = 4;
+  return sysctlbyname("kperf.lightweight_pet", enabled, &size, NULL, 0);
+}
+
+/// Set lightweight PET mode (not in kperf.framework).
+static int kperf_lightweight_pet_set(u32 enabled) {
+  return sysctlbyname("kperf.lightweight_pet", NULL, NULL, &enabled, 4);
+}
+
+// -----------------------------------------------------------------------------
+// <kperfdata.framework> header (reverse engineered)
+// This framework provides some functions to access the local CPU database.
+// These functions do not require root privileges.
+// -----------------------------------------------------------------------------
+
+// KPEP CPU archtecture constants.
+#define KPEP_ARCH_I386 0
+#define KPEP_ARCH_X86_64 1
+#define KPEP_ARCH_ARM 2
+#define KPEP_ARCH_ARM64 3
+
+/// KPEP event (size: 48/28 bytes on 64/32 bit OS)
+typedef struct kpep_event {
+  const char *name; ///< Unique name of a event, such as "INST_RETIRED.ANY".
+  const char *description; ///< Description for this event.
+  const char *errata;      ///< Errata, currently NULL.
+  const char *alias;       ///< Alias name, such as "Instructions", "Cycles".
+  const char *fallback;    ///< Fallback event name for fixed counter.
+  u32 mask;
+  u8 number;
+  u8 umask;
+  u8 reserved;
+  u8 is_fixed;
+} kpep_event;
+
+/// KPEP database (size: 144/80 bytes on 64/32 bit OS)
+typedef struct kpep_db {
+  const char *name;           ///< Database name, such as "haswell".
+  const char *cpu_id;         ///< Plist name, such as "cpu_7_8_10b282dc".
+  const char *marketing_name; ///< Marketing name, such as "Intel Haswell".
+  void *plist_data;           ///< Plist data (CFDataRef), currently NULL.
+  void *event_map; ///< All events (CFDict<CFSTR(event_name), kpep_event *>).
+  kpep_event
+      *event_arr; ///< Event struct buffer (sizeof(kpep_event) * events_count).
+  kpep_event **fixed_event_arr; ///< Fixed counter events (sizeof(kpep_event *)
+                                ///< * fixed_counter_count)
+  void *alias_map; ///< All aliases (CFDict<CFSTR(event_name), kpep_event *>).
+  usize reserved_1;
+  usize reserved_2;
+  usize reserved_3;
+  usize event_count; ///< All events count.
+  usize alias_count;
+  usize fixed_counter_count;
+  usize config_counter_count;
+  usize power_counter_count;
+  u32 archtecture; ///< see `KPEP CPU archtecture constants` above.
+  u32 fixed_counter_bits;
+  u32 config_counter_bits;
+  u32 power_counter_bits;
+} kpep_db;
+
+/// KPEP config (size: 80/44 bytes on 64/32 bit OS)
+typedef struct kpep_config {
+  kpep_db *db;
+  kpep_event **ev_arr; ///< (sizeof(kpep_event *) * counter_count), init NULL
+  usize *ev_map;       ///< (sizeof(usize *) * counter_count), init 0
+  usize *ev_idx;       ///< (sizeof(usize *) * counter_count), init -1
+  u32 *flags;          ///< (sizeof(u32 *) * counter_count), init 0
+  u64 *kpc_periods;    ///< (sizeof(u64 *) * counter_count), init 0
+  usize event_count;   /// kpep_config_events_count()
+  usize counter_count;
+  u32 classes; ///< See `class mask constants` above.
+  u32 config_counter;
+  u32 power_counter;
+  u32 reserved;
+} kpep_config;
+
+/// Error code for kpep_config_xxx() and kpep_db_xxx() functions.
+typedef enum {
+  KPEP_CONFIG_ERROR_NONE = 0,
+  KPEP_CONFIG_ERROR_INVALID_ARGUMENT = 1,
+  KPEP_CONFIG_ERROR_OUT_OF_MEMORY = 2,
+  KPEP_CONFIG_ERROR_IO = 3,
+  KPEP_CONFIG_ERROR_BUFFER_TOO_SMALL = 4,
+  KPEP_CONFIG_ERROR_CUR_SYSTEM_UNKNOWN = 5,
+  KPEP_CONFIG_ERROR_DB_PATH_INVALID = 6,
+  KPEP_CONFIG_ERROR_DB_NOT_FOUND = 7,
+  KPEP_CONFIG_ERROR_DB_ARCH_UNSUPPORTED = 8,
+  KPEP_CONFIG_ERROR_DB_VERSION_UNSUPPORTED = 9,
+  KPEP_CONFIG_ERROR_DB_CORRUPT = 10,
+  KPEP_CONFIG_ERROR_EVENT_NOT_FOUND = 11,
+  KPEP_CONFIG_ERROR_CONFLICTING_EVENTS = 12,
+  KPEP_CONFIG_ERROR_COUNTERS_NOT_FORCED = 13,
+  KPEP_CONFIG_ERROR_EVENT_UNAVAILABLE = 14,
+  KPEP_CONFIG_ERROR_ERRNO = 15,
+  KPEP_CONFIG_ERROR_MAX
+} kpep_config_error_code;
+
+/// Error description for kpep_config_error_code.
+static const char *kpep_config_error_names[KPEP_CONFIG_ERROR_MAX] = {
+    "none",
+    "invalid argument",
+    "out of memory",
+    "I/O",
+    "buffer too small",
+    "current system unknown",
+    "database path invalid",
+    "database not found",
+    "database architecture unsupported",
+    "database version unsupported",
+    "database corrupt",
+    "event not found",
+    "conflicting events",
+    "all counters must be forced",
+    "event unavailable",
+    "check errno"};
+
+/// Error description.
+static const char *kpep_config_error_desc(int code) {
+  if (0 <= code && code < KPEP_CONFIG_ERROR_MAX) {
+    return kpep_config_error_names[code];
+  }
+  return "unknown error";
+}
+
+/// Create a config.
+/// @param db A kpep db, see kpep_db_create()
+/// @param cfg_ptr A pointer to receive the new config.
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_config_create)(kpep_db *db, kpep_config **cfg_ptr);
+
+/// Free the config.
+static void (*kpep_config_free)(kpep_config *cfg);
+
+/// Add an event to config.
+/// @param cfg The config.
+/// @param ev_ptr A event pointer.
+/// @param flag 0: all, 1: user space only
+/// @param err Error bitmap pointer, can be NULL.
+///            If return value is `CONFLICTING_EVENTS`, this bitmap contains
+///            the conflicted event indices, e.g. "1 << 2" means index 2.
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_config_add_event)(kpep_config *cfg, kpep_event **ev_ptr,
+                                    u32 flag, u32 *err);
+
+/// Remove event at index.
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_config_remove_event)(kpep_config *cfg, usize idx);
+
+/// Force all counters.
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_config_force_counters)(kpep_config *cfg);
+
+/// Get events count.
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_config_events_count)(kpep_config *cfg, usize *count_ptr);
+
+/// Get all event pointers.
+/// @param buf A buffer to receive event pointers.
+/// @param buf_size The buffer's size in bytes, should not smaller than
+///                 kpep_config_events_count() * sizeof(void *).
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_config_events)(kpep_config *cfg, kpep_event **buf,
+                                 usize buf_size);
+
+/// Get kpc register configs.
+/// @param buf A buffer to receive kpc register configs.
+/// @param buf_size The buffer's size in bytes, should not smaller than
+///                 kpep_config_kpc_count() * sizeof(kpc_config_t).
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_config_kpc)(kpep_config *cfg, kpc_config_t *buf,
+                              usize buf_size);
+
+/// Get kpc register config count.
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_config_kpc_count)(kpep_config *cfg, usize *count_ptr);
+
+/// Get kpc classes.
+/// @param classes See `class mask constants` above.
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_config_kpc_classes)(kpep_config *cfg, u32 *classes_ptr);
+
+/// Get the index mapping from event to counter.
+/// @param buf A buffer to receive indexes.
+/// @param buf_size The buffer's size in bytes, should not smaller than
+///                 kpep_config_events_count() * sizeof(kpc_config_t).
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_config_kpc_map)(kpep_config *cfg, usize *buf, usize buf_size);
+
+/// Open a kpep database file in "/usr/share/kpep/" or "/usr/local/share/kpep/".
+/// @param name File name, for example "haswell", "cpu_100000c_1_92fb37c8".
+///             Pass NULL for current CPU.
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_db_create)(const char *name, kpep_db **db_ptr);
+
+/// Free the kpep database.
+static void (*kpep_db_free)(kpep_db *db);
+
+/// Get the database's name.
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_db_name)(kpep_db *db, const char **name);
+
+/// Get the event alias count.
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_db_aliases_count)(kpep_db *db, usize *count);
+
+/// Get all alias.
+/// @param buf A buffer to receive all alias strings.
+/// @param buf_size The buffer's size in bytes,
+///        should not smaller than kpep_db_aliases_count() * sizeof(void *).
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_db_aliases)(kpep_db *db, const char **buf, usize buf_size);
+
+/// Get counters count for given classes.
+/// @param classes 1: Fixed, 2: Configurable.
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_db_counters_count)(kpep_db *db, u8 classes, usize *count);
+
+/// Get all event count.
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_db_events_count)(kpep_db *db, usize *count);
+
+/// Get all events.
+/// @param buf A buffer to receive all event pointers.
+/// @param buf_size The buffer's size in bytes,
+///        should not smaller than kpep_db_events_count() * sizeof(void *).
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_db_events)(kpep_db *db, kpep_event **buf, usize buf_size);
+
+/// Get one event by name.
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_db_event)(kpep_db *db, const char *name, kpep_event **ev_ptr);
+
+/// Get event's name.
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_event_name)(kpep_event *ev, const char **name_ptr);
+
+/// Get event's alias.
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_event_alias)(kpep_event *ev, const char **alias_ptr);
+
+/// Get event's description.
+/// @return kpep_config_error_code, 0 for success.
+static int (*kpep_event_description)(kpep_event *ev, const char **str_ptr);
+
+// -----------------------------------------------------------------------------
+// load kperf/kperfdata dynamic library
+// -----------------------------------------------------------------------------
+
+typedef struct {
+  const char *name;
+  void **impl;
+} lib_symbol;
+
+#define lib_nelems(x) (sizeof(x) / sizeof((x)[0]))
+#define lib_symbol_def(name)                                                   \
+  {                                                                            \
+#name, (void **)&name                                                      \
+  }
+
+static const lib_symbol lib_symbols_kperf[] = {
+    lib_symbol_def(kpc_pmu_version),
+    lib_symbol_def(kpc_cpu_string),
+    lib_symbol_def(kpc_set_counting),
+    lib_symbol_def(kpc_get_counting),
+    lib_symbol_def(kpc_set_thread_counting),
+    lib_symbol_def(kpc_get_thread_counting),
+    lib_symbol_def(kpc_get_config_count),
+    lib_symbol_def(kpc_get_counter_count),
+    lib_symbol_def(kpc_set_config),
+    lib_symbol_def(kpc_get_config),
+    lib_symbol_def(kpc_get_cpu_counters),
+    lib_symbol_def(kpc_get_thread_counters),
+    lib_symbol_def(kpc_force_all_ctrs_set),
+    lib_symbol_def(kpc_force_all_ctrs_get),
+    lib_symbol_def(kperf_action_count_set),
+    lib_symbol_def(kperf_action_count_get),
+    lib_symbol_def(kperf_action_samplers_set),
+    lib_symbol_def(kperf_action_samplers_get),
+    lib_symbol_def(kperf_action_filter_set_by_task),
+    lib_symbol_def(kperf_action_filter_set_by_pid),
+    lib_symbol_def(kperf_timer_count_set),
+    lib_symbol_def(kperf_timer_count_get),
+    lib_symbol_def(kperf_timer_period_set),
+    lib_symbol_def(kperf_timer_period_get),
+    lib_symbol_def(kperf_timer_action_set),
+    lib_symbol_def(kperf_timer_action_get),
+    lib_symbol_def(kperf_sample_set),
+    lib_symbol_def(kperf_sample_get),
+    lib_symbol_def(kperf_reset),
+    lib_symbol_def(kperf_timer_pet_set),
+    lib_symbol_def(kperf_timer_pet_get),
+    lib_symbol_def(kperf_ns_to_ticks),
+    lib_symbol_def(kperf_ticks_to_ns),
+    lib_symbol_def(kperf_tick_frequency),
+};
+
+static const lib_symbol lib_symbols_kperfdata[] = {
+    lib_symbol_def(kpep_config_create),
+    lib_symbol_def(kpep_config_free),
+    lib_symbol_def(kpep_config_add_event),
+    lib_symbol_def(kpep_config_remove_event),
+    lib_symbol_def(kpep_config_force_counters),
+    lib_symbol_def(kpep_config_events_count),
+    lib_symbol_def(kpep_config_events),
+    lib_symbol_def(kpep_config_kpc),
+    lib_symbol_def(kpep_config_kpc_count),
+    lib_symbol_def(kpep_config_kpc_classes),
+    lib_symbol_def(kpep_config_kpc_map),
+    lib_symbol_def(kpep_db_create),
+    lib_symbol_def(kpep_db_free),
+    lib_symbol_def(kpep_db_name),
+    lib_symbol_def(kpep_db_aliases_count),
+    lib_symbol_def(kpep_db_aliases),
+    lib_symbol_def(kpep_db_counters_count),
+    lib_symbol_def(kpep_db_events_count),
+    lib_symbol_def(kpep_db_events),
+    lib_symbol_def(kpep_db_event),
+    lib_symbol_def(kpep_event_name),
+    lib_symbol_def(kpep_event_alias),
+    lib_symbol_def(kpep_event_description),
+};
+
+#define lib_path_kperf "/System/Library/PrivateFrameworks/kperf.framework/kperf"
+#define lib_path_kperfdata                                                     \
+  "/System/Library/PrivateFrameworks/kperfdata.framework/kperfdata"
+
+static bool lib_inited = false;
+static bool lib_has_err = false;
+static char lib_err_msg[256];
+
+static void *lib_handle_kperf = NULL;
+static void *lib_handle_kperfdata = NULL;
+
+static void lib_deinit(void) {
+  lib_inited = false;
+  lib_has_err = false;
+  if (lib_handle_kperf)
+    dlclose(lib_handle_kperf);
+  if (lib_handle_kperfdata)
+    dlclose(lib_handle_kperfdata);
+  lib_handle_kperf = NULL;
+  lib_handle_kperfdata = NULL;
+  for (usize i = 0; i < lib_nelems(lib_symbols_kperf); i++) {
+    const lib_symbol *symbol = &lib_symbols_kperf[i];
+    *symbol->impl = NULL;
+  }
+  for (usize i = 0; i < lib_nelems(lib_symbols_kperfdata); i++) {
+    const lib_symbol *symbol = &lib_symbols_kperfdata[i];
+    *symbol->impl = NULL;
+  }
+}
+
+static bool lib_init(void) {
+#define return_err()                                                           \
+  do {                                                                         \
+    lib_deinit();                                                              \
+    lib_inited = true;                                                         \
+    lib_has_err = true;                                                        \
+    return false;                                                              \
+  } while (false)
+
+  if (lib_inited)
+    return !lib_has_err;
+
+  // load dynamic library
+  lib_handle_kperf = dlopen(lib_path_kperf, RTLD_LAZY);
+  if (!lib_handle_kperf) {
+    snprintf(lib_err_msg, sizeof(lib_err_msg),
+             "Failed to load kperf.framework, message: %s.", dlerror());
+    return_err();
+  }
+  lib_handle_kperfdata = dlopen(lib_path_kperfdata, RTLD_LAZY);
+  if (!lib_handle_kperfdata) {
+    snprintf(lib_err_msg, sizeof(lib_err_msg),
+             "Failed to load kperfdata.framework, message: %s.", dlerror());
+    return_err();
+  }
+
+  // load symbol address from dynamic library
+  for (usize i = 0; i < lib_nelems(lib_symbols_kperf); i++) {
+    const lib_symbol *symbol = &lib_symbols_kperf[i];
+    *symbol->impl = dlsym(lib_handle_kperf, symbol->name);
+    if (!*symbol->impl) {
+      snprintf(lib_err_msg, sizeof(lib_err_msg),
+               "Failed to load kperf function: %s.", symbol->name);
+      return_err();
+    }
+  }
+  for (usize i = 0; i < lib_nelems(lib_symbols_kperfdata); i++) {
+    const lib_symbol *symbol = &lib_symbols_kperfdata[i];
+    *symbol->impl = dlsym(lib_handle_kperfdata, symbol->name);
+    if (!*symbol->impl) {
+      snprintf(lib_err_msg, sizeof(lib_err_msg),
+               "Failed to load kperfdata function: %s.", symbol->name);
+      return_err();
+    }
+  }
+
+  lib_inited = true;
+  lib_has_err = false;
+  return true;
+
+#undef return_err
+}
+
+// -----------------------------------------------------------------------------
+// kdebug private structs
+// https://github.com/apple/darwin-xnu/blob/main/bsd/sys_private/kdebug_private.h
+// -----------------------------------------------------------------------------
+
+/*
+ * Ensure that both LP32 and LP64 variants of arm64 use the same kd_buf
+ * structure.
+ */
+#if defined(__arm64__)
+typedef uint64_t kd_buf_argtype;
+#else
+typedef uintptr_t kd_buf_argtype;
+#endif
+
+typedef struct {
+  uint64_t timestamp;
+  kd_buf_argtype arg1;
+  kd_buf_argtype arg2;
+  kd_buf_argtype arg3;
+  kd_buf_argtype arg4;
+  kd_buf_argtype arg5; /* the thread ID */
+  uint32_t debugid;    /* see <sys/kdebug.h> */
+
+/*
+ * Ensure that both LP32 and LP64 variants of arm64 use the same kd_buf
+ * structure.
+ */
+#if defined(__LP64__) || defined(__arm64__)
+  uint32_t cpuid; /* cpu index, from 0 */
+  kd_buf_argtype unused;
+#endif
+} kd_buf;
+
+/* bits for the type field of kd_regtype */
+#define KDBG_CLASSTYPE 0x10000
+#define KDBG_SUBCLSTYPE 0x20000
+#define KDBG_RANGETYPE 0x40000
+#define KDBG_TYPENONE 0x80000
+#define KDBG_CKTYPES 0xF0000
+
+/* only trace at most 4 types of events, at the code granularity */
+#define KDBG_VALCHECK 0x00200000U
+
+typedef struct {
+  unsigned int type;
+  unsigned int value1;
+  unsigned int value2;
+  unsigned int value3;
+  unsigned int value4;
+} kd_regtype;
+
+typedef struct {
+  /* number of events that can fit in the buffers */
+  int nkdbufs;
+  /* set if trace is disabled */
+  int nolog;
+  /* kd_ctrl_page.flags */
+  unsigned int flags;
+  /* number of threads in thread map */
+  int nkdthreads;
+  /* the owning pid */
+  int bufid;
+} kbufinfo_t;
+
+// -----------------------------------------------------------------------------
+// kdebug utils
+// -----------------------------------------------------------------------------
+
+/// Clean up trace buffers and reset ktrace/kdebug/kperf.
+/// @return 0 on success.
+static int kdebug_reset(void) {
+  int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDREMOVE};
+  return sysctl(mib, 3, NULL, NULL, NULL, 0);
+}
+
+/// Disable and reinitialize the trace buffers.
+/// @return 0 on success.
+static int kdebug_reinit(void) {
+  int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDSETUP};
+  return sysctl(mib, 3, NULL, NULL, NULL, 0);
+}
+
+/// Set debug filter.
+static int kdebug_setreg(kd_regtype *kdr) {
+  int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDSETREG};
+  usize size = sizeof(kd_regtype);
+  return sysctl(mib, 3, kdr, &size, NULL, 0);
+}
+
+/// Set maximum number of trace entries (kd_buf).
+/// Only allow allocation up to half the available memory (sane_size).
+/// @return 0 on success.
+static int kdebug_trace_setbuf(int nbufs) {
+  int mib[4] = {CTL_KERN, KERN_KDEBUG, KERN_KDSETBUF, nbufs};
+  return sysctl(mib, 4, NULL, NULL, NULL, 0);
+}
+
+/// Enable or disable kdebug trace.
+/// Trace buffer must already be initialized.
+/// @return 0 on success.
+static int kdebug_trace_enable(bool enable) {
+  int mib[4] = {CTL_KERN, KERN_KDEBUG, KERN_KDENABLE, enable};
+  return sysctl(mib, 4, NULL, 0, NULL, 0);
+}
+
+/// Retrieve trace buffer information from kernel.
+/// @return 0 on success.
+static int kdebug_get_bufinfo(kbufinfo_t *info) {
+  if (!info)
+    return -1;
+  int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDGETBUF};
+  size_t needed = sizeof(kbufinfo_t);
+  return sysctl(mib, 3, info, &needed, NULL, 0);
+}
+
+/// Retrieve trace buffers from kernel.
+/// @param buf Memory to receive buffer data, array of `kd_buf`.
+/// @param len Length of `buf` in bytes.
+/// @param count Number of trace entries (kd_buf) obtained.
+/// @return 0 on success.
+static int kdebug_trace_read(void *buf, usize len, usize *count) {
+  if (count)
+    *count = 0;
+  if (!buf || !len)
+    return -1;
+
+  // Note: the input and output units are not the same.
+  // input: bytes
+  // output: number of kd_buf
+  int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDREADTR};
+  int ret = sysctl(mib, 3, buf, &len, NULL, 0);
+  if (ret != 0)
+    return ret;
+  *count = len;
+  return 0;
+}
+
+/// Block until there are new buffers filled or `timeout_ms` have passed.
+/// @param timeout_ms timeout milliseconds, 0 means wait forever.
+/// @param suc set true if new buffers filled.
+/// @return 0 on success.
+static int kdebug_wait(usize timeout_ms, bool *suc) {
+  if (timeout_ms == 0)
+    return -1;
+  int mib[3] = {CTL_KERN, KERN_KDEBUG, KERN_KDBUFWAIT};
+  usize val = timeout_ms;
+  int ret = sysctl(mib, 3, NULL, &val, NULL, 0);
+  if (suc)
+    *suc = !!val;
+  return ret;
+}
+
+// -----------------------------------------------------------------------------
+// Demo
+// -----------------------------------------------------------------------------
+
+#define EVENT_NAME_MAX 8
+typedef struct {
+  const char *alias;                 /// name for print
+  const char *names[EVENT_NAME_MAX]; /// name from pmc db
+} event_alias;
+
+/// Event names from /usr/share/kpep/<name>.plist
+static const event_alias profile_events[] = {
+    {"cycles",
+     {
+         "FIXED_CYCLES",            // Apple A7-A15
+         "CPU_CLK_UNHALTED.THREAD", // Intel Core 1th-10th
+         "CPU_CLK_UNHALTED.CORE",   // Intel Yonah, Merom
+     }},
+    {"instructions",
+     {
+         "FIXED_INSTRUCTIONS", // Apple A7-A15
+         "INST_RETIRED.ANY"    // Intel Yonah, Merom, Core 1th-10th
+     }},
+    {"branches",
+     {
+         "INST_BRANCH",                  // Apple A7-A15
+         "BR_INST_RETIRED.ALL_BRANCHES", // Intel Core 1th-10th
+         "INST_RETIRED.ANY",             // Intel Yonah, Merom
+     }},
+    {"branch-misses",
+     {
+         "BRANCH_MISPRED_NONSPEC",       // Apple A7-A15, since iOS 15, macOS 12
+         "BRANCH_MISPREDICT",            // Apple A7-A14
+         "BR_MISP_RETIRED.ALL_BRANCHES", // Intel Core 2th-10th
+         "BR_INST_RETIRED.MISPRED",      // Intel Yonah, Merom
+     }},
+};
+
+static kpep_event *get_event(kpep_db *db, const event_alias *alias) {
+  for (usize j = 0; j < EVENT_NAME_MAX; j++) {
+    const char *name = alias->names[j];
+    if (!name)
+      break;
+    kpep_event *ev = NULL;
+    if (kpep_db_event(db, name, &ev) == 0) {
+      return ev;
+    }
+  }
+  return NULL;
+}
+
+kpc_config_t regs[KPC_MAX_COUNTERS] = {0};
+usize counter_map[KPC_MAX_COUNTERS] = {0};
+u64 counters_0[KPC_MAX_COUNTERS] = {0};
+u64 counters_1[KPC_MAX_COUNTERS] = {0};
+const usize ev_count = sizeof(profile_events) / sizeof(profile_events[0]);
diff --git a/microbenchmarks/performancecounters/linux-perf-events.h b/microbenchmarks/performancecounters/linux-perf-events.h
new file mode 100644
index 000000000..494aeb738
--- /dev/null
+++ b/microbenchmarks/performancecounters/linux-perf-events.h
@@ -0,0 +1,101 @@
+// https://github.com/WojciechMula/toys/blob/master/000helpers/linux-perf-events.h
+#pragma once
+#ifdef __linux__
+
+#include <asm/unistd.h>        // for __NR_perf_event_open
+#include <linux/perf_event.h>  // for perf event constants
+#include <sys/ioctl.h>         // for ioctl
+#include <unistd.h>            // for syscall
+
+#include <cerrno>   // for errno
+#include <cstring>  // for memset
+#include <stdexcept>
+
+#include <iostream>
+#include <vector>
+
+template <int TYPE = PERF_TYPE_HARDWARE>
+class LinuxEvents {
+  int fd;
+  bool working;
+  perf_event_attr attribs{};
+  size_t num_events{};
+  std::vector<uint64_t> temp_result_vec{};
+  std::vector<uint64_t> ids{};
+
+ public:
+  explicit LinuxEvents(std::vector<int> config_vec) : fd(0), working(true) {
+    memset(&attribs, 0, sizeof(attribs));
+    attribs.type = TYPE;
+    attribs.size = sizeof(attribs);
+    attribs.disabled = 1;
+    attribs.exclude_kernel = 1;
+    attribs.exclude_hv = 1;
+
+    attribs.sample_period = 0;
+    attribs.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
+    const int pid = 0;   // the current process
+    const int cpu = -1;  // all CPUs
+    const unsigned long flags = 0;
+
+    int group = -1;  // no group
+    num_events = config_vec.size();
+    ids.resize(config_vec.size());
+    uint32_t i = 0;
+    for (auto config : config_vec) {
+      attribs.config = config;
+      fd = static_cast<int>(
+          syscall(__NR_perf_event_open, &attribs, pid, cpu, group, flags));
+      if (fd == -1) {
+        report_error("perf_event_open");
+      }
+      ioctl(fd, PERF_EVENT_IOC_ID, &ids[i++]);
+      if (group == -1) {
+        group = fd;
+      }
+    }
+
+    temp_result_vec.resize(num_events * 2 + 1);
+  }
+
+  ~LinuxEvents() {
+    if (fd != -1) {
+      close(fd);
+    }
+  }
+
+  inline void start() {
+    if (fd != -1) {
+      if (ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == -1) {
+        report_error("ioctl(PERF_EVENT_IOC_RESET)");
+      }
+
+      if (ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == -1) {
+        report_error("ioctl(PERF_EVENT_IOC_ENABLE)");
+      }
+    }
+  }
+
+  inline void end(std::vector<unsigned long long> &results) {
+    if (fd != -1) {
+      if (ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) == -1) {
+        report_error("ioctl(PERF_EVENT_IOC_DISABLE)");
+      }
+
+      if (read(fd, temp_result_vec.data(), temp_result_vec.size() * 8) == -1) {
+        report_error("read");
+      }
+    }
+    // our actual results are in slots 1,3,5, ... of this structure
+    // we really should be checking our ids obtained earlier to be safe
+    for (uint32_t i = 1; i < temp_result_vec.size(); i += 2) {
+      results[i / 2] = temp_result_vec[i];
+    }
+  }
+
+  bool is_working() { return working; }
+
+ private:
+  void report_error(const std::string &) { working = false; }
+};
+#endif
diff --git a/microbenchmarks/toni_ronnko_dirent.h b/microbenchmarks/toni_ronnko_dirent.h
new file mode 100644
index 000000000..a9356644f
--- /dev/null
+++ b/microbenchmarks/toni_ronnko_dirent.h
@@ -0,0 +1,1075 @@
+/*
+ * Dirent interface for Microsoft Visual Studio
+ *
+ * Copyright (C) 1998-2019 Toni Ronkko
+ * This file is part of dirent.  Dirent may be freely distributed
+ * under the MIT license.  For all details and documentation, see
+ * https://github.com/tronkko/dirent
+ */
+#ifndef DIRENT_H
+#define DIRENT_H
+
+/* Hide warnings about unreferenced local functions */
+#if defined(__clang__)
+#pragma clang diagnostic ignored "-Wunused-function"
+#elif defined(_MSC_VER)
+#pragma warning(disable : 4505)
+#elif defined(__GNUC__)
+#pragma GCC diagnostic ignored "-Wunused-function"
+#endif
+
+/*
+ * Include windows.h without Windows Sockets 1.1 to prevent conflicts with
+ * Windows Sockets 2.0.
+ */
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN
+#endif
+#include <windows.h>
+
+#include <errno.h>
+#include <malloc.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <wchar.h>
+
+/* Indicates that d_type field is available in dirent structure */
+#define _DIRENT_HAVE_D_TYPE
+
+/* Indicates that d_namlen field is available in dirent structure */
+#define _DIRENT_HAVE_D_NAMLEN
+
+/* Entries missing from MSVC 6.0 */
+#if !defined(FILE_ATTRIBUTE_DEVICE)
+#define FILE_ATTRIBUTE_DEVICE 0x40
+#endif
+
+/* File type and permission flags for stat(), general mask */
+#if !defined(S_IFMT)
+#define S_IFMT _S_IFMT
+#endif
+
+/* Directory bit */
+#if !defined(S_IFDIR)
+#define S_IFDIR _S_IFDIR
+#endif
+
+/* Character device bit */
+#if !defined(S_IFCHR)
+#define S_IFCHR _S_IFCHR
+#endif
+
+/* Pipe bit */
+#if !defined(S_IFFIFO)
+#define S_IFFIFO _S_IFFIFO
+#endif
+
+/* Regular file bit */
+#if !defined(S_IFREG)
+#define S_IFREG _S_IFREG
+#endif
+
+/* Read permission */
+#if !defined(S_IREAD)
+#define S_IREAD _S_IREAD
+#endif
+
+/* Write permission */
+#if !defined(S_IWRITE)
+#define S_IWRITE _S_IWRITE
+#endif
+
+/* Execute permission */
+#if !defined(S_IEXEC)
+#define S_IEXEC _S_IEXEC
+#endif
+
+/* Pipe */
+#if !defined(S_IFIFO)
+#define S_IFIFO _S_IFIFO
+#endif
+
+/* Block device */
+#if !defined(S_IFBLK)
+#define S_IFBLK 0
+#endif
+
+/* Link */
+#if !defined(S_IFLNK)
+#define S_IFLNK 0
+#endif
+
+/* Socket */
+#if !defined(S_IFSOCK)
+#define S_IFSOCK 0
+#endif
+
+/* Read user permission */
+#if !defined(S_IRUSR)
+#define S_IRUSR S_IREAD
+#endif
+
+/* Write user permission */
+#if !defined(S_IWUSR)
+#define S_IWUSR S_IWRITE
+#endif
+
+/* Execute user permission */
+#if !defined(S_IXUSR)
+#define S_IXUSR 0
+#endif
+
+/* Read group permission */
+#if !defined(S_IRGRP)
+#define S_IRGRP 0
+#endif
+
+/* Write group permission */
+#if !defined(S_IWGRP)
+#define S_IWGRP 0
+#endif
+
+/* Execute group permission */
+#if !defined(S_IXGRP)
+#define S_IXGRP 0
+#endif
+
+/* Read others permission */
+#if !defined(S_IROTH)
+#define S_IROTH 0
+#endif
+
+/* Write others permission */
+#if !defined(S_IWOTH)
+#define S_IWOTH 0
+#endif
+
+/* Execute others permission */
+#if !defined(S_IXOTH)
+#define S_IXOTH 0
+#endif
+
+/* Maximum length of file name */
+#if !defined(PATH_MAX)
+#define PATH_MAX MAX_PATH
+#endif
+#if !defined(FILENAME_MAX)
+#define FILENAME_MAX MAX_PATH
+#endif
+#if !defined(NAME_MAX)
+#define NAME_MAX FILENAME_MAX
+#endif
+
+/* File type flags for d_type */
+#define DT_UNKNOWN 0
+#define DT_REG S_IFREG
+#define DT_DIR S_IFDIR
+#define DT_FIFO S_IFIFO
+#define DT_SOCK S_IFSOCK
+#define DT_CHR S_IFCHR
+#define DT_BLK S_IFBLK
+#define DT_LNK S_IFLNK
+
+/* Macros for converting between st_mode and d_type */
+#define IFTODT(mode) ((mode)&S_IFMT)
+#define DTTOIF(type) (type)
+
+/*
+ * File type macros.  Note that block devices, sockets and links cannot be
+ * distinguished on Windows and the macros S_ISBLK, S_ISSOCK and S_ISLNK are
+ * only defined for compatibility.  These macros should always return false
+ * on Windows.
+ */
+#if !defined(S_ISFIFO)
+#define S_ISFIFO(mode) (((mode)&S_IFMT) == S_IFIFO)
+#endif
+#if !defined(S_ISDIR)
+#define S_ISDIR(mode) (((mode)&S_IFMT) == S_IFDIR)
+#endif
+#if !defined(S_ISREG)
+#define S_ISREG(mode) (((mode)&S_IFMT) == S_IFREG)
+#endif
+#if !defined(S_ISLNK)
+#define S_ISLNK(mode) (((mode)&S_IFMT) == S_IFLNK)
+#endif
+#if !defined(S_ISSOCK)
+#define S_ISSOCK(mode) (((mode)&S_IFMT) == S_IFSOCK)
+#endif
+#if !defined(S_ISCHR)
+#define S_ISCHR(mode) (((mode)&S_IFMT) == S_IFCHR)
+#endif
+#if !defined(S_ISBLK)
+#define S_ISBLK(mode) (((mode)&S_IFMT) == S_IFBLK)
+#endif
+
+/* Return the exact length of the file name without zero terminator */
+#define _D_EXACT_NAMLEN(p) ((p)->d_namlen)
+
+/* Return the maximum size of a file name */
+#define _D_ALLOC_NAMLEN(p) ((PATH_MAX) + 1)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Wide-character version */
+struct _wdirent {
+  /* Always zero */
+  long d_ino;
+
+  /* File position within stream */
+  long d_off;
+
+  /* Structure size */
+  unsigned short d_reclen;
+
+  /* Length of name without \0 */
+  size_t d_namlen;
+
+  /* File type */
+  int d_type;
+
+  /* File name */
+  wchar_t d_name[PATH_MAX + 1];
+};
+typedef struct _wdirent _wdirent;
+
+struct _WDIR {
+  /* Current directory entry */
+  struct _wdirent ent;
+
+  /* Private file data */
+  WIN32_FIND_DATAW data;
+
+  /* True if data is valid */
+  int cached;
+
+  /* Win32 search handle */
+  HANDLE handle;
+
+  /* Initial directory name */
+  wchar_t *patt;
+};
+typedef struct _WDIR _WDIR;
+
+/* Multi-byte character version */
+struct dirent {
+  /* Always zero */
+  long d_ino;
+
+  /* File position within stream */
+  long d_off;
+
+  /* Structure size */
+  unsigned short d_reclen;
+
+  /* Length of name without \0 */
+  size_t d_namlen;
+
+  /* File type */
+  int d_type;
+
+  /* File name */
+  char d_name[PATH_MAX + 1];
+};
+typedef struct dirent dirent;
+
+struct DIR {
+  struct dirent ent;
+  struct _WDIR *wdirp;
+};
+typedef struct DIR DIR;
+
+/* Dirent functions */
+static DIR *opendir(const char *dirname);
+static _WDIR *_wopendir(const wchar_t *dirname);
+
+static struct dirent *readdir(DIR *dirp);
+static struct _wdirent *_wreaddir(_WDIR *dirp);
+
+static int readdir_r(DIR *dirp, struct dirent *entry, struct dirent **result);
+static int _wreaddir_r(_WDIR *dirp, struct _wdirent *entry,
+                       struct _wdirent **result);
+
+static int closedir(DIR *dirp);
+static int _wclosedir(_WDIR *dirp);
+
+static void rewinddir(DIR *dirp);
+static void _wrewinddir(_WDIR *dirp);
+
+static int scandir(const char *dirname, struct dirent ***namelist,
+                   int (*filter)(const struct dirent *),
+                   int (*compare)(const struct dirent **,
+                                  const struct dirent **));
+
+static int alphasort(const struct dirent **a, const struct dirent **b);
+
+static int versionsort(const struct dirent **a, const struct dirent **b);
+
+/* For compatibility with Symbian */
+#define wdirent _wdirent
+#define WDIR _WDIR
+#define wopendir _wopendir
+#define wreaddir _wreaddir
+#define wclosedir _wclosedir
+#define wrewinddir _wrewinddir
+
+/* Internal utility functions */
+static WIN32_FIND_DATAW *dirent_first(_WDIR *dirp);
+static WIN32_FIND_DATAW *dirent_next(_WDIR *dirp);
+
+static int dirent_mbstowcs_s(size_t *pReturnValue, wchar_t *wcstr,
+                             size_t sizeInWords, const char *mbstr,
+                             size_t count);
+
+static int dirent_wcstombs_s(size_t *pReturnValue, char *mbstr,
+                             size_t sizeInBytes, const wchar_t *wcstr,
+                             size_t count);
+
+static void dirent_set_errno(int error);
+
+/*
+ * Open directory stream DIRNAME for read and return a pointer to the
+ * internal working area that is used to retrieve individual directory
+ * entries.
+ */
+static _WDIR *_wopendir(const wchar_t *dirname) {
+  _WDIR *dirp;
+#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
+  /* Desktop */
+  DWORD n;
+#else
+  /* WinRT */
+  size_t n;
+#endif
+  wchar_t *p;
+
+  /* Must have directory name */
+  if (dirname == NULL || dirname[0] == '\0') {
+    dirent_set_errno(ENOENT);
+    return NULL;
+  }
+
+  /* Allocate new _WDIR structure */
+  dirp = (_WDIR *)malloc(sizeof(struct _WDIR));
+  if (!dirp) {
+    return NULL;
+  }
+
+  /* Reset _WDIR structure */
+  dirp->handle = INVALID_HANDLE_VALUE;
+  dirp->patt = NULL;
+  dirp->cached = 0;
+
+  /*
+   * Compute the length of full path plus zero terminator
+   *
+   * Note that on WinRT there's no way to convert relative paths
+   * into absolute paths, so just assume it is an absolute path.
+   */
+#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
+  /* Desktop */
+  n = GetFullPathNameW(dirname, 0, NULL, NULL);
+#else
+  /* WinRT */
+  n = wcslen(dirname);
+#endif
+
+  /* Allocate room for absolute directory name and search pattern */
+  dirp->patt = (wchar_t *)malloc(sizeof(wchar_t) * n + 16);
+  if (dirp->patt == NULL) {
+    goto exit_closedir;
+  }
+
+  /*
+   * Convert relative directory name to an absolute one.  This
+   * allows rewinddir() to function correctly even when current
+   * working directory is changed between opendir() and rewinddir().
+   *
+   * Note that on WinRT there's no way to convert relative paths
+   * into absolute paths, so just assume it is an absolute path.
+   */
+#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
+  /* Desktop */
+  n = GetFullPathNameW(dirname, n, dirp->patt, NULL);
+  if (n <= 0) {
+    goto exit_closedir;
+  }
+#else
+  /* WinRT */
+  wcsncpy_s(dirp->patt, n + 1, dirname, n);
+#endif
+
+  /* Append search pattern \* to the directory name */
+  p = dirp->patt + n;
+  switch (p[-1]) {
+  case '\\':
+  case '/':
+  case ':':
+      /* Directory ends in path separator, e.g. c:\temp\ */
+      /*NOP*/;
+    break;
+
+  default:
+    /* Directory name doesn't end in path separator */
+    *p++ = '\\';
+  }
+  *p++ = '*';
+  *p = '\0';
+
+  /* Open directory stream and retrieve the first entry */
+  if (!dirent_first(dirp)) {
+    goto exit_closedir;
+  }
+
+  /* Success */
+  return dirp;
+
+  /* Failure */
+exit_closedir:
+  _wclosedir(dirp);
+  return NULL;
+}
+
+/*
+ * Read next directory entry.
+ *
+ * Returns pointer to static directory entry which may be overwritten by
+ * subsequent calls to _wreaddir().
+ */
+static struct _wdirent *_wreaddir(_WDIR *dirp) {
+  struct _wdirent *entry;
+
+  /*
+   * Read directory entry to buffer.  We can safely ignore the return value
+   * as entry will be set to NULL in case of error.
+   */
+  (void)_wreaddir_r(dirp, &dirp->ent, &entry);
+
+  /* Return pointer to statically allocated directory entry */
+  return entry;
+}
+
+/*
+ * Read next directory entry.
+ *
+ * Returns zero on success.  If end of directory stream is reached, then sets
+ * result to NULL and returns zero.
+ */
+static int _wreaddir_r(_WDIR *dirp, struct _wdirent *entry,
+                       struct _wdirent **result) {
+  WIN32_FIND_DATAW *datap;
+
+  /* Read next directory entry */
+  datap = dirent_next(dirp);
+  if (datap) {
+    size_t n;
+    DWORD attr;
+
+    /*
+     * Copy file name as wide-character string.  If the file name is too
+     * long to fit in to the destination buffer, then truncate file name
+     * to PATH_MAX characters and zero-terminate the buffer.
+     */
+    n = 0;
+    while (n < PATH_MAX && datap->cFileName[n] != 0) {
+      entry->d_name[n] = datap->cFileName[n];
+      n++;
+    }
+    entry->d_name[n] = 0;
+
+    /* Length of file name excluding zero terminator */
+    entry->d_namlen = n;
+
+    /* File type */
+    attr = datap->dwFileAttributes;
+    if ((attr & FILE_ATTRIBUTE_DEVICE) != 0) {
+      entry->d_type = DT_CHR;
+    } else if ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) {
+      entry->d_type = DT_DIR;
+    } else {
+      entry->d_type = DT_REG;
+    }
+
+    /* Reset dummy fields */
+    entry->d_ino = 0;
+    entry->d_off = 0;
+    entry->d_reclen = sizeof(struct _wdirent);
+
+    /* Set result address */
+    *result = entry;
+
+  } else {
+
+    /* Return NULL to indicate end of directory */
+    *result = NULL;
+  }
+
+  return /*OK*/ 0;
+}
+
+/*
+ * Close directory stream opened by opendir() function.  This invalidates the
+ * DIR structure as well as any directory entry read previously by
+ * _wreaddir().
+ */
+static int _wclosedir(_WDIR *dirp) {
+  int ok;
+  if (dirp) {
+
+    /* Release search handle */
+    if (dirp->handle != INVALID_HANDLE_VALUE) {
+      FindClose(dirp->handle);
+    }
+
+    /* Release search pattern */
+    free(dirp->patt);
+
+    /* Release directory structure */
+    free(dirp);
+    ok = /*success*/ 0;
+
+  } else {
+
+    /* Invalid directory stream */
+    dirent_set_errno(EBADF);
+    ok = /*failure*/ -1;
+  }
+  return ok;
+}
+
+/*
+ * Rewind directory stream such that _wreaddir() returns the very first
+ * file name again.
+ */
+static void _wrewinddir(_WDIR *dirp) {
+  if (dirp) {
+    /* Release existing search handle */
+    if (dirp->handle != INVALID_HANDLE_VALUE) {
+      FindClose(dirp->handle);
+    }
+
+    /* Open new search handle */
+    dirent_first(dirp);
+  }
+}
+
+/* Get first directory entry (internal) */
+static WIN32_FIND_DATAW *dirent_first(_WDIR *dirp) {
+  WIN32_FIND_DATAW *datap;
+  DWORD error;
+
+  /* Open directory and retrieve the first entry */
+  dirp->handle = FindFirstFileExW(dirp->patt, FindExInfoStandard, &dirp->data,
+                                  FindExSearchNameMatch, NULL, 0);
+  if (dirp->handle != INVALID_HANDLE_VALUE) {
+
+    /* a directory entry is now waiting in memory */
+    datap = &dirp->data;
+    dirp->cached = 1;
+
+  } else {
+
+    /* Failed to open directory: no directory entry in memory */
+    dirp->cached = 0;
+    datap = NULL;
+
+    /* Set error code */
+    error = GetLastError();
+    switch (error) {
+    case ERROR_ACCESS_DENIED:
+      /* No read access to directory */
+      dirent_set_errno(EACCES);
+      break;
+
+    case ERROR_DIRECTORY:
+      /* Directory name is invalid */
+      dirent_set_errno(ENOTDIR);
+      break;
+
+    case ERROR_PATH_NOT_FOUND:
+    default:
+      /* Cannot find the file */
+      dirent_set_errno(ENOENT);
+    }
+  }
+  return datap;
+}
+
+/*
+ * Get next directory entry (internal).
+ *
+ * Returns
+ */
+static WIN32_FIND_DATAW *dirent_next(_WDIR *dirp) {
+  WIN32_FIND_DATAW *p;
+
+  /* Get next directory entry */
+  if (dirp->cached != 0) {
+
+    /* A valid directory entry already in memory */
+    p = &dirp->data;
+    dirp->cached = 0;
+
+  } else if (dirp->handle != INVALID_HANDLE_VALUE) {
+
+    /* Get the next directory entry from stream */
+    if (FindNextFileW(dirp->handle, &dirp->data) != FALSE) {
+      /* Got a file */
+      p = &dirp->data;
+    } else {
+      /* The very last entry has been processed or an error occurred */
+      FindClose(dirp->handle);
+      dirp->handle = INVALID_HANDLE_VALUE;
+      p = NULL;
+    }
+
+  } else {
+
+    /* End of directory stream reached */
+    p = NULL;
+  }
+
+  return p;
+}
+
+/*
+ * Open directory stream using plain old C-string.
+ */
+static DIR *opendir(const char *dirname) {
+  struct DIR *dirp;
+
+  /* Must have directory name */
+  if (dirname == NULL || dirname[0] == '\0') {
+    dirent_set_errno(ENOENT);
+    return NULL;
+  }
+
+  /* Allocate memory for DIR structure */
+  dirp = (DIR *)malloc(sizeof(struct DIR));
+  if (!dirp) {
+    return NULL;
+  }
+  {
+    int error;
+    wchar_t wname[PATH_MAX + 1];
+    size_t n;
+
+    /* Convert directory name to wide-character string */
+    error = dirent_mbstowcs_s(&n, wname, PATH_MAX + 1, dirname, PATH_MAX + 1);
+    if (error) {
+      /*
+       * Cannot convert file name to wide-character string.  This
+       * occurs if the string contains invalid multi-byte sequences or
+       * the output buffer is too small to contain the resulting
+       * string.
+       */
+      goto exit_free;
+    }
+
+    /* Open directory stream using wide-character name */
+    dirp->wdirp = _wopendir(wname);
+    if (!dirp->wdirp) {
+      goto exit_free;
+    }
+  }
+
+  /* Success */
+  return dirp;
+
+  /* Failure */
+exit_free:
+  free(dirp);
+  return NULL;
+}
+
+/*
+ * Read next directory entry.
+ */
+static struct dirent *readdir(DIR *dirp) {
+  struct dirent *entry;
+
+  /*
+   * Read directory entry to buffer.  We can safely ignore the return value
+   * as entry will be set to NULL in case of error.
+   */
+  (void)readdir_r(dirp, &dirp->ent, &entry);
+
+  /* Return pointer to statically allocated directory entry */
+  return entry;
+}
+
+/*
+ * Read next directory entry into called-allocated buffer.
+ *
+ * Returns zero on success.  If the end of directory stream is reached, then
+ * sets result to NULL and returns zero.
+ */
+static int readdir_r(DIR *dirp, struct dirent *entry, struct dirent **result) {
+  WIN32_FIND_DATAW *datap;
+
+  /* Read next directory entry */
+  datap = dirent_next(dirp->wdirp);
+  if (datap) {
+    size_t n;
+    int error;
+
+    /* Attempt to convert file name to multi-byte string */
+    error = dirent_wcstombs_s(&n, entry->d_name, PATH_MAX + 1, datap->cFileName,
+                              PATH_MAX + 1);
+
+    /*
+     * If the file name cannot be represented by a multi-byte string,
+     * then attempt to use old 8+3 file name.  This allows traditional
+     * Unix-code to access some file names despite of unicode
+     * characters, although file names may seem unfamiliar to the user.
+     *
+     * Be ware that the code below cannot come up with a short file
+     * name unless the file system provides one.  At least
+     * VirtualBox shared folders fail to do this.
+     */
+    if (error && datap->cAlternateFileName[0] != '\0') {
+      error = dirent_wcstombs_s(&n, entry->d_name, PATH_MAX + 1,
+                                datap->cAlternateFileName, PATH_MAX + 1);
+    }
+
+    if (!error) {
+      DWORD attr;
+
+      /* Length of file name excluding zero terminator */
+      entry->d_namlen = n - 1;
+
+      /* File attributes */
+      attr = datap->dwFileAttributes;
+      if ((attr & FILE_ATTRIBUTE_DEVICE) != 0) {
+        entry->d_type = DT_CHR;
+      } else if ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) {
+        entry->d_type = DT_DIR;
+      } else {
+        entry->d_type = DT_REG;
+      }
+
+      /* Reset dummy fields */
+      entry->d_ino = 0;
+      entry->d_off = 0;
+      entry->d_reclen = sizeof(struct dirent);
+
+    } else {
+
+      /*
+       * Cannot convert file name to multi-byte string so construct
+       * an erroneous directory entry and return that.  Note that
+       * we cannot return NULL as that would stop the processing
+       * of directory entries completely.
+       */
+      entry->d_name[0] = '?';
+      entry->d_name[1] = '\0';
+      entry->d_namlen = 1;
+      entry->d_type = DT_UNKNOWN;
+      entry->d_ino = 0;
+      entry->d_off = -1;
+      entry->d_reclen = 0;
+    }
+
+    /* Return pointer to directory entry */
+    *result = entry;
+
+  } else {
+
+    /* No more directory entries */
+    *result = NULL;
+  }
+
+  return /*OK*/ 0;
+}
+
+/*
+ * Close directory stream.
+ */
+static int closedir(DIR *dirp) {
+  int ok;
+  if (dirp) {
+
+    /* Close wide-character directory stream */
+    ok = _wclosedir(dirp->wdirp);
+    dirp->wdirp = NULL;
+
+    /* Release multi-byte character version */
+    free(dirp);
+
+  } else {
+
+    /* Invalid directory stream */
+    dirent_set_errno(EBADF);
+    ok = /*failure*/ -1;
+  }
+  return ok;
+}
+
+/*
+ * Rewind directory stream to beginning.
+ */
+static void rewinddir(DIR *dirp) {
+  /* Rewind wide-character string directory stream */
+  _wrewinddir(dirp->wdirp);
+}
+
+/*
+ * Scan directory for entries.
+ */
+static int scandir(const char *dirname, struct dirent ***namelist,
+                   int (*filter)(const struct dirent *),
+                   int (*compare)(const struct dirent **,
+                                  const struct dirent **)) {
+  struct dirent **files = NULL;
+  size_t size = 0;
+  size_t allocated = 0;
+  const size_t init_size = 1;
+  DIR *dir = NULL;
+  struct dirent *entry;
+  struct dirent *tmp = NULL;
+  size_t i;
+  int result = 0;
+
+  /* Open directory stream */
+  dir = opendir(dirname);
+  if (dir) {
+
+    /* Read directory entries to memory */
+    while (1) {
+
+      /* Enlarge pointer table to make room for another pointer */
+      if (size >= allocated) {
+        void *p;
+        size_t num_entries;
+
+        /* Compute number of entries in the enlarged pointer table */
+        if (size < init_size) {
+          /* Allocate initial pointer table */
+          num_entries = init_size;
+        } else {
+          /* Double the size */
+          num_entries = size * 2;
+        }
+
+        /* Allocate first pointer table or enlarge existing table */
+        p = realloc(files, sizeof(void *) * num_entries);
+        if (p != NULL) {
+          /* Got the memory */
+          files = (dirent **)p;
+          allocated = num_entries;
+        } else {
+          /* Out of memory */
+          result = -1;
+          break;
+        }
+      }
+
+      /* Allocate room for temporary directory entry */
+      if (tmp == NULL) {
+        tmp = (struct dirent *)malloc(sizeof(struct dirent));
+        if (tmp == NULL) {
+          /* Cannot allocate temporary directory entry */
+          result = -1;
+          break;
+        }
+      }
+
+      /* Read directory entry to temporary area */
+      if (readdir_r(dir, tmp, &entry) == /*OK*/ 0) {
+
+        /* Did we get an entry? */
+        if (entry != NULL) {
+          int pass;
+
+          /* Determine whether to include the entry in result */
+          if (filter) {
+            /* Let the filter function decide */
+            pass = filter(tmp);
+          } else {
+            /* No filter function, include everything */
+            pass = 1;
+          }
+
+          if (pass) {
+            /* Store the temporary entry to pointer table */
+            files[size++] = tmp;
+            tmp = NULL;
+
+            /* Keep up with the number of files */
+            result++;
+          }
+
+        } else {
+
+          /*
+           * End of directory stream reached => sort entries and
+           * exit.
+           */
+          qsort(files, size, sizeof(void *),
+                (int (*)(const void *, const void *))compare);
+          break;
+        }
+
+      } else {
+        /* Error reading directory entry */
+        result = /*Error*/ -1;
+        break;
+      }
+    }
+
+  } else {
+    /* Cannot open directory */
+    result = /*Error*/ -1;
+  }
+
+  /* Release temporary directory entry */
+  free(tmp);
+
+  /* Release allocated memory on error */
+  if (result < 0) {
+    for (i = 0; i < size; i++) {
+      free(files[i]);
+    }
+    free(files);
+    files = NULL;
+  }
+
+  /* Close directory stream */
+  if (dir) {
+    closedir(dir);
+  }
+
+  /* Pass pointer table to caller */
+  if (namelist) {
+    *namelist = files;
+  }
+  return result;
+}
+
+/* Alphabetical sorting */
+static int alphasort(const struct dirent **a, const struct dirent **b) {
+  return strcoll((*a)->d_name, (*b)->d_name);
+}
+
+/* Sort versions */
+static int versionsort(const struct dirent **a, const struct dirent **b) {
+  /* FIXME: implement strverscmp and use that */
+  return alphasort(a, b);
+}
+
+/* Convert multi-byte string to wide character string */
+static int dirent_mbstowcs_s(size_t *pReturnValue, wchar_t *wcstr,
+                             size_t sizeInWords, const char *mbstr,
+                             size_t count) {
+  int error;
+
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+
+  /* Microsoft Visual Studio 2005 or later */
+  error = mbstowcs_s(pReturnValue, wcstr, sizeInWords, mbstr, count);
+
+#else
+
+  /* Older Visual Studio or non-Microsoft compiler */
+  size_t n;
+
+  /* Convert to wide-character string (or count characters) */
+  n = mbstowcs(wcstr, mbstr, sizeInWords);
+  if (!wcstr || n < count) {
+
+    /* Zero-terminate output buffer */
+    if (wcstr && sizeInWords) {
+      if (n >= sizeInWords) {
+        n = sizeInWords - 1;
+      }
+      wcstr[n] = 0;
+    }
+
+    /* Length of resulting multi-byte string WITH zero terminator */
+    if (pReturnValue) {
+      *pReturnValue = n + 1;
+    }
+
+    /* Success */
+    error = 0;
+
+  } else {
+
+    /* Could not convert string */
+    error = 1;
+  }
+
+#endif
+  return error;
+}
+
+/* Convert wide-character string to multi-byte string */
+static int dirent_wcstombs_s(size_t *pReturnValue, char *mbstr,
+                             size_t sizeInBytes, /* max size of mbstr */
+                             const wchar_t *wcstr, size_t count) {
+  int error;
+
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+
+  /* Microsoft Visual Studio 2005 or later */
+  error = wcstombs_s(pReturnValue, mbstr, sizeInBytes, wcstr, count);
+
+#else
+
+  /* Older Visual Studio or non-Microsoft compiler */
+  size_t n;
+
+  /* Convert to multi-byte string (or count the number of bytes needed) */
+  n = wcstombs(mbstr, wcstr, sizeInBytes);
+  if (!mbstr || n < count) {
+
+    /* Zero-terminate output buffer */
+    if (mbstr && sizeInBytes) {
+      if (n >= sizeInBytes) {
+        n = sizeInBytes - 1;
+      }
+      mbstr[n] = '\0';
+    }
+
+    /* Length of resulting multi-bytes string WITH zero-terminator */
+    if (pReturnValue) {
+      *pReturnValue = n + 1;
+    }
+
+    /* Success */
+    error = 0;
+
+  } else {
+
+    /* Cannot convert string */
+    error = 1;
+  }
+
+#endif
+  return error;
+}
+
+/* Set errno variable */
+static void dirent_set_errno(int error) {
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+
+  /* Microsoft Visual Studio 2005 and later */
+  _set_errno(error);
+
+#else
+
+  /* Non-Microsoft compiler or older Microsoft compiler */
+  errno = error;
+
+#endif
+}
+
+#ifdef __cplusplus
+}
+#endif
+#endif /*DIRENT_H*/
\ No newline at end of file
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 7b3e0d02a..8ad9d7835 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -15,6 +15,7 @@ endif()
 
 MESSAGE( STATUS "ROARING_LIB_TYPE: " ${ROARING_LIB_TYPE})
 set(ROARING_SRC
+    isadetection.c
     array_util.c
     bitset_util.c
     bitset.c
@@ -39,34 +40,38 @@ if(ROARING_BUILD_C_AS_CPP)  # more checks and tools, e.g. <type_traits> analysis
   SET_SOURCE_FILES_PROPERTIES(${ROARING_SRC} PROPERTIES LANGUAGE CXX)
 endif()
 
-add_library(${ROARING_LIB_NAME} ${ROARING_LIB_TYPE} ${ROARING_SRC})
-target_include_directories(${ROARING_LIB_NAME}
+add_library(roaring ${ROARING_LIB_TYPE} ${ROARING_SRC})
+if(ROARING_DISABLE_AVX512)
+  target_compile_definitions(roaring PUBLIC CROARING_COMPILER_SUPPORTS_AVX512=0)
+endif(ROARING_DISABLE_AVX512)
+
+target_include_directories(roaring
   PUBLIC
    $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
    $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
 )
-target_link_libraries(${ROARING_LIB_NAME} PUBLIC roaring-headers)
-target_link_libraries(${ROARING_LIB_NAME} PUBLIC roaring-headers-cpp)
+target_link_libraries(roaring PUBLIC roaring-headers)
+target_link_libraries(roaring PUBLIC roaring-headers-cpp)
 #
-#install(TARGETS ${ROARING_LIB_NAME} DESTINATION lib)
+#install(TARGETS roaring DESTINATION lib)
 #
-install(TARGETS ${ROARING_LIB_NAME} 
-   EXPORT ${ROARING_LIB_NAME}-config
+install(TARGETS roaring 
+   EXPORT roaring-config
    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
    RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
    INCLUDES DESTINATION ${CMAKE_INSTALL_INCDIR}
 )
-install(EXPORT ${ROARING_LIB_NAME}-config
-   FILE ${ROARING_LIB_NAME}-config.cmake
-   NAMESPACE ${ROARING_LIB_NAME}::
-   DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${ROARING_LIB_NAME}
+install(EXPORT roaring-config
+   FILE roaring-config.cmake
+   NAMESPACE roaring::
+   DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/roaring
  )
 
 if(NOT MSVC)
 ## We output the library at the root of the current directory where cmake is invoked
 ## This is handy but Visual Studio will happily ignore us
-set_target_properties(${ROARING_LIB_NAME} PROPERTIES
+set_target_properties(roaring PROPERTIES
   LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}
   VERSION ${ROARING_LIB_VERSION}
   SOVERSION ${ROARING_LIB_SOVERSION})
@@ -78,6 +83,6 @@ if(MSVC AND (ROARING_LIB_TYPE STREQUAL "SHARED"))
     MESSAGE( STATUS "To build  a Windows DLL using Visual Studio, you may need cmake 3.4 or better." )
   endif()
   MESSAGE( STATUS "Building a Windows DLL using Visual Studio, exporting all symbols automatically." )
- set_target_properties(${ROARING_LIB_NAME}
+ set_target_properties(roaring
     PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS 1)
 endif()
diff --git a/src/array_util.c b/src/array_util.c
index cadb76821..f19b9ac5a 100644
--- a/src/array_util.c
+++ b/src/array_util.c
@@ -9,7 +9,14 @@
 #include <roaring/portability.h>
 #include <roaring/utilasm.h>
 
+#if CROARING_IS_X64
+#ifndef CROARING_COMPILER_SUPPORTS_AVX512
+#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined."
+#endif // CROARING_COMPILER_SUPPORTS_AVX512
+#endif
+
 #ifdef __cplusplus
+using namespace ::roaring::internal;
 extern "C" { namespace roaring { namespace internal {
 #endif
 
@@ -1953,7 +1960,7 @@ size_t union_uint32_card(const uint32_t *set_1, size_t size_1,
 size_t fast_union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2,
                     size_t size_2, uint16_t *buffer) {
 #ifdef CROARING_IS_X64
-    if( croaring_avx2() ) {
+    if( croaring_hardware_support() & ROARING_SUPPORTS_AVX2 ) {
         // compute union with smallest array first
       if (size_1 < size_2) {
         return union_vector16(set_1, (uint32_t)size_1,
@@ -2092,12 +2099,13 @@ bool memequals(const void *s1, const void *s2, size_t n) {
         return true;
     }
 #ifdef CROARING_IS_X64
+    int support = croaring_hardware_support();
 #if CROARING_COMPILER_SUPPORTS_AVX512
-    if( croaring_avx512() ) {
+    if( support & ROARING_SUPPORTS_AVX512 ) {
       return _avx512_memequals(s1, s2, n);
     } else
 #endif // CROARING_COMPILER_SUPPORTS_AVX512
-    if( croaring_avx2() ) {
+    if( support & ROARING_SUPPORTS_AVX2 ) {
       return _avx2_memequals(s1, s2, n);
     } else {
       return memcmp(s1, s2, n) == 0;
diff --git a/src/bitset_util.c b/src/bitset_util.c
index 331c65620..427d95901 100644
--- a/src/bitset_util.c
+++ b/src/bitset_util.c
@@ -6,7 +6,14 @@
 
 #include <roaring/bitset_util.h>
 
+#if CROARING_IS_X64
+#ifndef CROARING_COMPILER_SUPPORTS_AVX512
+#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined."
+#endif // CROARING_COMPILER_SUPPORTS_AVX512
+#endif
+
 #ifdef __cplusplus
+using namespace ::roaring::internal;
 extern "C" { namespace roaring { namespace api {
 #endif
 
@@ -1006,7 +1013,7 @@ static inline void _scalar_bitset_set_list(uint64_t *words, const uint16_t *list
 
 uint64_t bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list,
                            uint64_t length) {
-    if( croaring_avx2() ) {
+    if( croaring_hardware_support() & ROARING_SUPPORTS_AVX2 ) {
         return _asm_bitset_clear_list(words, card, list, length);
     } else {
         return _scalar_bitset_clear_list(words, card, list, length);
@@ -1015,7 +1022,7 @@ uint64_t bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list,
 
 uint64_t bitset_set_list_withcard(uint64_t *words, uint64_t card,
                                   const uint16_t *list, uint64_t length) {
-    if( croaring_avx2() ) {
+    if( croaring_hardware_support() & ROARING_SUPPORTS_AVX2 ) {
         return _asm_bitset_set_list_withcard(words, card, list, length);
     } else {
         return _scalar_bitset_set_list_withcard(words, card, list, length);
@@ -1023,7 +1030,7 @@ uint64_t bitset_set_list_withcard(uint64_t *words, uint64_t card,
 }
 
 void bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length) {
-    if( croaring_avx2() ) {
+    if( croaring_hardware_support() & ROARING_SUPPORTS_AVX2 ) {
         _asm_bitset_set_list(words, list, length);
     } else {
         _scalar_bitset_set_list(words, list, length);
diff --git a/src/containers/array.c b/src/containers/array.c
index 8e3c053f2..199c20ceb 100644
--- a/src/containers/array.c
+++ b/src/containers/array.c
@@ -217,7 +217,7 @@ void array_container_andnot(const array_container_t *array_1,
     if (out->capacity < array_1->cardinality)
         array_container_grow(out, array_1->cardinality, false);
 #ifdef CROARING_IS_X64
-    if(( croaring_avx2() ) && (out != array_1) && (out != array_2)) {
+    if(( croaring_hardware_support() & ROARING_SUPPORTS_AVX2 ) && (out != array_1) && (out != array_2)) {
       out->cardinality =
           difference_vector16(array_1->array, array_1->cardinality,
                             array_2->array, array_2->cardinality, out->array);
@@ -248,7 +248,7 @@ void array_container_xor(const array_container_t *array_1,
     }
 
 #ifdef CROARING_IS_X64
-    if( croaring_avx2() ) {
+    if( croaring_hardware_support() & ROARING_SUPPORTS_AVX2 ) {
       out->cardinality =
         xor_vector16(array_1->array, array_1->cardinality, array_2->array,
                      array_2->cardinality, out->array);
@@ -297,7 +297,7 @@ void array_container_intersection(const array_container_t *array1,
             array2->array, card_2, array1->array, card_1, out->array);
     } else {
 #ifdef CROARING_IS_X64
-       if( croaring_avx2() ) {
+       if( croaring_hardware_support() & ROARING_SUPPORTS_AVX2 ) {
         out->cardinality = intersect_vector16(
             array1->array, card_1, array2->array, card_2, out->array);
        } else {
@@ -325,7 +325,7 @@ int array_container_intersection_cardinality(const array_container_t *array1,
                                                    array1->array, card_1);
     } else {
 #ifdef CROARING_IS_X64
-    if( croaring_avx2() ) {
+    if( croaring_hardware_support() & ROARING_SUPPORTS_AVX2 ) {
         return intersect_vector16_cardinality(array1->array, card_1,
                                               array2->array, card_2);
     } else {
@@ -371,7 +371,7 @@ void array_container_intersection_inplace(array_container_t *src_1,
             src_2->array, card_2, src_1->array, card_1, src_1->array);
     } else {
 #ifdef CROARING_IS_X64
-        if (croaring_avx2()) {
+        if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) {
             src_1->cardinality = intersect_vector16_inplace(
                 src_1->array, card_1, src_2->array, card_2);
         } else {
diff --git a/src/containers/bitset.c b/src/containers/bitset.c
index a20741117..86375ea0d 100644
--- a/src/containers/bitset.c
+++ b/src/containers/bitset.c
@@ -16,6 +16,12 @@
 #include <roaring/memory.h>
 #include <roaring/utilasm.h>
 
+#if CROARING_IS_X64
+#ifndef CROARING_COMPILER_SUPPORTS_AVX512
+#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined."
+#endif // CROARING_COMPILER_SUPPORTS_AVX512
+#endif
+
 #ifdef __cplusplus
 extern "C" { namespace roaring { namespace internal {
 #endif
@@ -56,7 +62,8 @@ bitset_container_t *bitset_container_create(void) {
 
     size_t align_size = 32;
 #ifdef CROARING_IS_X64
-    if ( croaring_avx512() ) {
+    int support = croaring_hardware_support();
+    if ( support & ROARING_SUPPORTS_AVX512 ) {
 	    // sizeof(__m512i) == 64
 	    align_size = 64;
     }
@@ -131,7 +138,7 @@ bitset_container_t *bitset_container_clone(const bitset_container_t *src) {
 
     size_t align_size = 32;
 #ifdef CROARING_IS_X64
-    if ( croaring_avx512() ) {
+    if ( croaring_hardware_support() & ROARING_SUPPORTS_AVX512 ) {
 	    // sizeof(__m512i) == 64
 	    align_size = 64;
     }
@@ -257,14 +264,15 @@ static inline int _scalar_bitset_container_compute_cardinality(const bitset_cont
 }
 /* Get the number of bits set (force computation) */
 int bitset_container_compute_cardinality(const bitset_container_t *bitset) {
+    int support = croaring_hardware_support();
 #if CROARING_COMPILER_SUPPORTS_AVX512
-    if( croaring_avx512() ) {
+    if( support & ROARING_SUPPORTS_AVX512 ) {
       return (int) avx512_vpopcount(
         (const __m512i *)bitset->words,
         BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX512_REG));
     } else
 #endif // CROARING_COMPILER_SUPPORTS_AVX512
-    if( croaring_avx2() ) {
+    if( support & ROARING_SUPPORTS_AVX2 ) {
       return (int) avx2_harley_seal_popcount256(
         (const __m256i *)bitset->words,
         BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG));
@@ -321,10 +329,7 @@ int bitset_container_compute_cardinality(const bitset_container_t *bitset) {
 #ifndef WORDS_IN_AVX512_REG
 #define WORDS_IN_AVX512_REG sizeof(__m512i) / sizeof(uint64_t)
 #endif // WORDS_IN_AVX512_REG
-/*#define LOOP_SIZE                    \
-    BITSET_CONTAINER_SIZE_IN_WORDS / \
-        ((WORDS_IN_AVX512_REG)*BITSET_CONTAINER_FN_REPEAT)
-*/
+
 /* Computes a binary operation (eg union) on bitset1 and bitset2 and write the
    result to bitsetout */
 // clang-format off
@@ -698,15 +703,15 @@ SCALAR_BITSET_CONTAINER_FN(xor,    ^,  _mm256_xor_si256,    veorq_u64)
 SCALAR_BITSET_CONTAINER_FN(andnot, &~, _mm256_andnot_si256, vbicq_u64)
 
 #if CROARING_COMPILER_SUPPORTS_AVX512
-
 #define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic)   \
   int bitset_container_##opname(const bitset_container_t *src_1,               \
                                 const bitset_container_t *src_2,               \
                                 bitset_container_t *dst) {                     \
-    if ( croaring_avx512() ) {                                                 \
+    int support = croaring_hardware_support();                                 \
+    if ( support & ROARING_SUPPORTS_AVX512 ) {                                 \
       return _avx512_bitset_container_##opname(src_1, src_2, dst);             \
     }                                                                          \
-    else if ( croaring_avx2() ) {                                              \
+    else if ( support & ROARING_SUPPORTS_AVX2 ) {                              \
       return _avx2_bitset_container_##opname(src_1, src_2, dst);               \
     } else {                                                                   \
       return _scalar_bitset_container_##opname(src_1, src_2, dst);             \
@@ -715,10 +720,11 @@ SCALAR_BITSET_CONTAINER_FN(andnot, &~, _mm256_andnot_si256, vbicq_u64)
   int bitset_container_##opname##_nocard(const bitset_container_t *src_1,      \
                                          const bitset_container_t *src_2,      \
                                          bitset_container_t *dst) {            \
-    if ( croaring_avx512() ) {                                                 \
+    int support = croaring_hardware_support();                                 \
+    if ( support & ROARING_SUPPORTS_AVX512 ) {                                 \
       return _avx512_bitset_container_##opname##_nocard(src_1, src_2, dst);    \
     }                                                                          \
-    else if ( croaring_avx2() ) {                                              \
+    else if ( support & ROARING_SUPPORTS_AVX2 ) {                              \
       return _avx2_bitset_container_##opname##_nocard(src_1, src_2, dst);      \
     } else {                                                                   \
       return _scalar_bitset_container_##opname##_nocard(src_1, src_2, dst);    \
@@ -726,11 +732,11 @@ SCALAR_BITSET_CONTAINER_FN(andnot, &~, _mm256_andnot_si256, vbicq_u64)
   }                                                                            \
   int bitset_container_##opname##_justcard(const bitset_container_t *src_1,    \
                                            const bitset_container_t *src_2) {  \
-    if ( croaring_avx512() ) {                                                 \
+     int support = croaring_hardware_support();                                \
+    if ( support & ROARING_SUPPORTS_AVX512 ) {                                 \
       return _avx512_bitset_container_##opname##_justcard(src_1, src_2);       \
     }                                                                          \
-    else if ((croaring_detect_supported_architectures() & CROARING_AVX2) ==    \
-        CROARING_AVX2) {                                                       \
+    else if ( support & ROARING_SUPPORTS_AVX2 ) {                              \
       return _avx2_bitset_container_##opname##_justcard(src_1, src_2);         \
     } else {                                                                   \
       return _scalar_bitset_container_##opname##_justcard(src_1, src_2);       \
@@ -744,7 +750,7 @@ SCALAR_BITSET_CONTAINER_FN(andnot, &~, _mm256_andnot_si256, vbicq_u64)
   int bitset_container_##opname(const bitset_container_t *src_1,               \
                                 const bitset_container_t *src_2,               \
                                 bitset_container_t *dst) {                     \
-    if ( croaring_avx2() ) {                                                   \
+    if ( croaring_hardware_support() & ROARING_SUPPORTS_AVX2 ) {               \
       return _avx2_bitset_container_##opname(src_1, src_2, dst);               \
     } else {                                                                   \
       return _scalar_bitset_container_##opname(src_1, src_2, dst);             \
@@ -753,7 +759,7 @@ SCALAR_BITSET_CONTAINER_FN(andnot, &~, _mm256_andnot_si256, vbicq_u64)
   int bitset_container_##opname##_nocard(const bitset_container_t *src_1,      \
                                          const bitset_container_t *src_2,      \
                                          bitset_container_t *dst) {            \
-    if ( croaring_avx2() ) {                                                   \
+    if ( croaring_hardware_support() & ROARING_SUPPORTS_AVX2 ) {               \
       return _avx2_bitset_container_##opname##_nocard(src_1, src_2, dst);      \
     } else {                                                                   \
       return _scalar_bitset_container_##opname##_nocard(src_1, src_2, dst);    \
@@ -761,7 +767,7 @@ SCALAR_BITSET_CONTAINER_FN(andnot, &~, _mm256_andnot_si256, vbicq_u64)
   }                                                                            \
   int bitset_container_##opname##_justcard(const bitset_container_t *src_1,    \
                                            const bitset_container_t *src_2) {  \
-    if ( croaring_avx2() ) {                                                   \
+    if ( croaring_hardware_support() & ROARING_SUPPORTS_AVX2 ) {               \
       return _avx2_bitset_container_##opname##_justcard(src_1, src_2);         \
     } else {                                                                   \
       return _scalar_bitset_container_##opname##_justcard(src_1, src_2);       \
@@ -893,7 +899,7 @@ int bitset_container_##opname##_nocard(const bitset_container_t *src_1,   \
 }                                                                         \
 int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \
                               const bitset_container_t *src_2) {          \
-    const uint64_t * __restrict__ words_1 = src_1->words;                 \
+   printf("A1\n"); const uint64_t * __restrict__ words_1 = src_1->words;                 \
     const uint64_t * __restrict__ words_2 = src_2->words;                 \
     int32_t sum = 0;                                                      \
     for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) {      \
@@ -927,13 +933,14 @@ int bitset_container_to_uint32_array(
     uint32_t base
 ){
 #ifdef CROARING_IS_X64
+   int support = croaring_hardware_support();
 #if CROARING_COMPILER_SUPPORTS_AVX512
-   if(( croaring_avx512() ) &&  (bc->cardinality >= 8192))  // heuristic
+   if(( support & ROARING_SUPPORTS_AVX512 ) &&  (bc->cardinality >= 8192))  // heuristic
 		return (int) bitset_extract_setbits_avx512(bc->words,
                 BITSET_CONTAINER_SIZE_IN_WORDS, out, bc->cardinality, base);
    else
 #endif
-   if(( croaring_avx2() ) &&  (bc->cardinality >= 8192))  // heuristic
+   if(( support & ROARING_SUPPORTS_AVX2 ) &&  (bc->cardinality >= 8192))  // heuristic
 		return (int) bitset_extract_setbits_avx2(bc->words,
                 BITSET_CONTAINER_SIZE_IN_WORDS, out, bc->cardinality, base);
 	else
@@ -1103,13 +1110,14 @@ bool bitset_container_equals(const bitset_container_t *container1, const bitset_
     }
   }
 #ifdef CROARING_IS_X64
+  int support = croaring_hardware_support();
 #if CROARING_COMPILER_SUPPORTS_AVX512
-  if( croaring_avx512() ) {
+  if( support & ROARING_SUPPORTS_AVX512 ) {
     return _avx512_bitset_container_equals(container1, container2);
   }
   else
 #endif
-  if( croaring_avx2() ) {
+  if( support & ROARING_SUPPORTS_AVX2 ) {
     return _avx2_bitset_container_equals(container1, container2);
   }
 #endif
diff --git a/src/containers/convert.c b/src/containers/convert.c
index ec3b94ceb..743f62184 100644
--- a/src/containers/convert.c
+++ b/src/containers/convert.c
@@ -5,6 +5,12 @@
 #include <roaring/containers/convert.h>
 #include <roaring/containers/perfparameters.h>
 
+#if CROARING_IS_X64
+#ifndef CROARING_COMPILER_SUPPORTS_AVX512
+#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined."
+#endif // CROARING_COMPILER_SUPPORTS_AVX512
+#endif
+
 #ifdef __cplusplus
 extern "C" { namespace roaring { namespace internal {
 #endif
@@ -50,7 +56,7 @@ array_container_t *array_container_from_bitset(const bitset_container_t *bits) {
     result->cardinality = bits->cardinality;
 #if CROARING_IS_X64
 #if CROARING_COMPILER_SUPPORTS_AVX512
-    if( croaring_avx512() ) {
+    if( croaring_hardware_support() & ROARING_SUPPORTS_AVX512 ) {
         bitset_extract_setbits_avx512_uint16(bits->words, BITSET_CONTAINER_SIZE_IN_WORDS,
                                   result->array, bits->cardinality , 0);
     } else
diff --git a/src/containers/run.c b/src/containers/run.c
index a32e476f0..ed3c6c4f8 100644
--- a/src/containers/run.c
+++ b/src/containers/run.c
@@ -5,6 +5,12 @@
 #include <roaring/portability.h>
 #include <roaring/memory.h>
 
+#if CROARING_IS_X64
+#ifndef CROARING_COMPILER_SUPPORTS_AVX512
+#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined."
+#endif // CROARING_COMPILER_SUPPORTS_AVX512
+#endif
+
 #ifdef __cplusplus
 extern "C" { namespace roaring { namespace internal {
 #endif
@@ -924,12 +930,12 @@ static inline int _scalar_run_container_cardinality(const run_container_t *run)
 
 int run_container_cardinality(const run_container_t *run) {
 #if CROARING_COMPILER_SUPPORTS_AVX512
-  if( croaring_avx512() ) {
+  if( croaring_hardware_support() & ROARING_SUPPORTS_AVX512 ) {
     return _avx512_run_container_cardinality(run);
   }
   else
 #endif
-  if( croaring_avx2() ) {
+  if( croaring_hardware_support() & ROARING_SUPPORTS_AVX2 ) {
     return _avx2_run_container_cardinality(run);
   } else {
     return _scalar_run_container_cardinality(run);
diff --git a/src/isadetection.c b/src/isadetection.c
new file mode 100644
index 000000000..272577aa0
--- /dev/null
+++ b/src/isadetection.c
@@ -0,0 +1,268 @@
+
+/* From
+https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h
+Highly modified.
+
+Copyright (c) 2016-     Facebook, Inc            (Adam Paszke)
+Copyright (c) 2014-     Facebook, Inc            (Soumith Chintala)
+Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
+Copyright (c) 2012-2014 Deepmind Technologies    (Koray Kavukcuoglu)
+Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
+Copyright (c) 2011-2013 NYU                      (Clement Farabet)
+Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou,
+Iain Melvin, Jason Weston) Copyright (c) 2006      Idiap Research Institute
+(Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert,
+Samy Bengio, Johnny Mariethoz)
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories
+America and IDIAP Research Institute nor the names of its contributors may be
+   used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+
+// We need portability.h to be included first, see
+// https://github.com/RoaringBitmap/CRoaring/issues/394
+#include <roaring/portability.h>
+#if CROARING_REGULAR_VISUAL_STUDIO
+#include <intrin.h>
+#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
+#include <cpuid.h>
+#endif // CROARING_REGULAR_VISUAL_STUDIO
+#include <roaring/isadetection.h>
+
+#if CROARING_IS_X64
+#ifndef CROARING_COMPILER_SUPPORTS_AVX512
+#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined."
+#endif // CROARING_COMPILER_SUPPORTS_AVX512
+#endif
+
+#ifdef __cplusplus
+extern "C" { namespace roaring { namespace internal {
+#endif
+enum croaring_instruction_set {
+  CROARING_DEFAULT = 0x0,
+  CROARING_NEON = 0x1,
+  CROARING_AVX2 = 0x4,
+  CROARING_SSE42 = 0x8,
+  CROARING_PCLMULQDQ = 0x10,
+  CROARING_BMI1 = 0x20,
+  CROARING_BMI2 = 0x40,
+  CROARING_ALTIVEC = 0x80,
+  CROARING_AVX512F = 0x100,
+  CROARING_AVX512DQ = 0x200,
+  CROARING_AVX512BW = 0x400,
+  CROARING_AVX512VBMI2 = 0x800,
+  CROARING_AVX512BITALG = 0x1000,
+  CROARING_AVX512VPOPCNTDQ = 0x2000,
+  CROARING_UNINITIALIZED = 0x8000
+};
+
+#if CROARING_COMPILER_SUPPORTS_AVX512
+static unsigned int CROARING_AVX512_REQUIRED = (CROARING_AVX512F | CROARING_AVX512DQ | CROARING_AVX512BW | CROARING_AVX512VBMI2 | CROARING_AVX512BITALG | CROARING_AVX512VPOPCNTDQ);
+#endif
+
+#if defined(__x86_64__) || defined(_M_AMD64) // x64
+
+
+static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx,
+                         uint32_t *edx) {
+#if CROARING_REGULAR_VISUAL_STUDIO
+  int cpu_info[4];
+  __cpuid(cpu_info, *eax);
+  *eax = cpu_info[0];
+  *ebx = cpu_info[1];
+  *ecx = cpu_info[2];
+  *edx = cpu_info[3];
+#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
+  uint32_t level = *eax;
+  __get_cpuid(level, eax, ebx, ecx, edx);
+#else
+  uint32_t a = *eax, b, c = *ecx, d;
+  __asm__("cpuid\n\t" : "+a"(a), "=b"(b), "+c"(c), "=d"(d));
+  *eax = a;
+  *ebx = b;
+  *ecx = c;
+  *edx = d;
+#endif
+}
+
+/**
+ * This is a relatively expensive function but it will get called at most
+ * *once* per compilation units. Normally, the CRoaring library is built
+ * as one compilation unit.
+ */
+static inline uint32_t dynamic_croaring_detect_supported_architectures() {
+  uint32_t eax, ebx, ecx, edx;
+  uint32_t host_isa = 0x0;
+  // Can be found on Intel ISA Reference for CPUID
+  static uint32_t cpuid_avx2_bit = 1 << 5;      ///< @private Bit 5 of EBX for EAX=0x7
+  static uint32_t cpuid_bmi1_bit = 1 << 3;      ///< @private bit 3 of EBX for EAX=0x7
+  static uint32_t cpuid_bmi2_bit = 1 << 8;      ///< @private bit 8 of EBX for EAX=0x7
+  static uint32_t cpuid_avx512f_bit = 1 << 16;  ///< @private bit 16 of EBX for EAX=0x7
+  static uint32_t cpuid_avx512dq_bit = 1 << 17; ///< @private bit 17 of EBX for EAX=0x7
+  static uint32_t cpuid_avx512bw_bit = 1 << 30; ///< @private bit 30 of EBX for EAX=0x7
+  static uint32_t cpuid_avx512vbmi2_bit = 1 << 6; ///< @private bit 6 of ECX for EAX=0x7
+  static uint32_t cpuid_avx512bitalg_bit = 1 << 12; ///< @private bit 12 of ECX for EAX=0x7
+  static uint32_t cpuid_avx512vpopcntdq_bit = 1 << 14; ///< @private bit 14 of ECX for EAX=0x7
+  static uint32_t cpuid_sse42_bit = 1 << 20;    ///< @private bit 20 of ECX for EAX=0x1
+  static uint32_t cpuid_pclmulqdq_bit = 1 << 1; ///< @private bit  1 of ECX for EAX=0x1
+  // ECX for EAX=0x7
+  eax = 0x7;
+  ecx = 0x0;
+  cpuid(&eax, &ebx, &ecx, &edx);
+  if (ebx & cpuid_avx2_bit) {
+    host_isa |= CROARING_AVX2;
+  }
+  if (ebx & cpuid_bmi1_bit) {
+    host_isa |= CROARING_BMI1;
+  }
+
+  if (ebx & cpuid_bmi2_bit) {
+    host_isa |= CROARING_BMI2;
+  }
+  
+  if (ebx & cpuid_avx512f_bit) {
+    host_isa |= CROARING_AVX512F;
+  }
+  
+  if (ebx & cpuid_avx512bw_bit) {
+    host_isa |= CROARING_AVX512BW;
+  }
+  
+  if (ebx & cpuid_avx512dq_bit) {
+    host_isa |= CROARING_AVX512DQ;
+  }
+  
+  if (ecx & cpuid_avx512vbmi2_bit) {
+    host_isa |= CROARING_AVX512VBMI2;
+  }
+  
+  if (ecx & cpuid_avx512bitalg_bit) {
+    host_isa |= CROARING_AVX512BITALG;
+  }
+  
+  if (ecx & cpuid_avx512vpopcntdq_bit) {
+    host_isa |= CROARING_AVX512VPOPCNTDQ;
+  }
+  
+  // EBX for EAX=0x1
+  eax = 0x1;
+  cpuid(&eax, &ebx, &ecx, &edx);
+
+  if (ecx & cpuid_sse42_bit) {
+    host_isa |= CROARING_SSE42;
+  }
+
+  if (ecx & cpuid_pclmulqdq_bit) {
+    host_isa |= CROARING_PCLMULQDQ;
+  }
+
+  return host_isa;
+}
+
+#endif // end SIMD extension detection code
+
+
+#if defined(__x86_64__) || defined(_M_AMD64) // x64
+
+#if defined(__cplusplus)
+static inline uint32_t croaring_detect_supported_architectures() {
+    // thread-safe as per the C++11 standard.
+    static uint32_t buffer = dynamic_croaring_detect_supported_architectures();
+    return buffer;
+}
+#elif CROARING_VISUAL_STUDIO
+// Visual Studio does not support C11 atomics.
+static inline uint32_t croaring_detect_supported_architectures() {
+    static int buffer = CROARING_UNINITIALIZED;
+    if (buffer == CROARING_UNINITIALIZED) {
+      buffer = dynamic_croaring_detect_supported_architectures();
+    }
+    return buffer;
+}
+#else // CROARING_VISUAL_STUDIO
+#include <stdatomic.h>
+uint32_t croaring_detect_supported_architectures() {
+    // we use an atomic for thread safety
+    static _Atomic uint32_t buffer = CROARING_UNINITIALIZED;
+    if (buffer == CROARING_UNINITIALIZED) {
+      // atomicity is sufficient
+      buffer = dynamic_croaring_detect_supported_architectures();
+    }
+    return buffer;
+}
+#endif // CROARING_REGULAR_VISUAL_STUDIO
+
+#ifdef ROARING_DISABLE_AVX
+
+int croaring_hardware_support() {
+    return 0;
+}
+
+#elif defined(__AVX512F__) && defined(__AVX512DQ__) && defined(__AVX512BW__) && defined(__AVX512VBMI2__) && defined(__AVX512BITALG__) && defined(__AVX512VPOPCNTDQ__)
+int croaring_hardware_support() {
+    return  ROARING_SUPPORTS_AVX2 | ROARING_SUPPORTS_AVX512
+}
+#elif defined(__AVX2__)
+
+int croaring_hardware_support() {
+  static int support = 0xFFFFFFF;
+  if(support == 0xFFFFFFF) {
+    bool avx512_support = false;
+#if CROARING_COMPILER_SUPPORTS_AVX512
+    avx512_support =  ( (croaring_detect_supported_architectures() & CROARING_AVX512_REQUIRED)
+	                        == CROARING_AVX512_REQUIRED);
+#endif
+    support = ROARING_SUPPORTS_AVX2 | (croaring_has_avx512() ? ROARING_SUPPORTS_AVX512 : 0);
+  }
+  return support;
+}
+#else
+
+int croaring_hardware_support() {
+  static int support = 0xFFFFFFF;
+  if(support == 0xFFFFFFF) {
+    bool has_avx2 = (croaring_detect_supported_architectures() & CROARING_AVX2) == CROARING_AVX2;
+    bool has_avx512 = false;
+#if CROARING_COMPILER_SUPPORTS_AVX512
+    has_avx512 = (croaring_detect_supported_architectures() & CROARING_AVX512_REQUIRED) == CROARING_AVX512_REQUIRED;
+#endif // CROARING_COMPILER_SUPPORTS_AVX512
+    support = (has_avx2 ? ROARING_SUPPORTS_AVX2 : 0) | (has_avx512 ? ROARING_SUPPORTS_AVX512 : 0);
+  }
+  return support;
+}
+#endif
+
+#ifdef __cplusplus
+} } }  // extern "C" { namespace roaring { namespace internal {
+#endif
+#endif // defined(__x86_64__) || defined(_M_AMD64) // x64
\ No newline at end of file
diff --git a/src/roaring.c b/src/roaring.c
index 6557ff45a..234356e0f 100644
--- a/src/roaring.c
+++ b/src/roaring.c
@@ -2790,7 +2790,6 @@ uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *x1,
               length2 = x2->high_low_container.size;
     uint64_t answer = 0;
     int pos1 = 0, pos2 = 0;
-
     while (pos1 < length1 && pos2 < length2) {
         const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
         const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 41fe0e6c2..28177c01a 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -40,8 +40,3 @@ endif()
 
 
 configure_file(${CMAKE_SOURCE_DIR}/tools/cmake/CTestCustom.cmake ${CMAKE_BINARY_DIR})
-
-set(BUILD_STATIC_LIB ON)
-import_dependency(vendor/cmocka https://cmocka.org/files/1.1/cmocka-1.1.5.tar.xz)
-add_subdirectory(vendor/cmocka)
-
diff --git a/tests/bitset_container_unit.c b/tests/bitset_container_unit.c
index b92279893..4e3f1bc97 100644
--- a/tests/bitset_container_unit.c
+++ b/tests/bitset_container_unit.c
@@ -116,8 +116,8 @@ DEFINE_TEST(and_or_test) {
 
     size_t max_value = 60000;
 
-    size_t b1_count = 0;
-    size_t bi_count = 0;
+    int b1_count = 0;
+    int bi_count = 0;
     for (size_t x = 0; x < max_value; x += 3) {
         bitset_container_set(B1, x);
         bitset_container_set(BI, x);
@@ -132,7 +132,7 @@ DEFINE_TEST(and_or_test) {
     assert_true(bitset_container_compute_cardinality(B1) == b1_count);
     assert_true(bitset_container_compute_cardinality(BI) == bi_count);
 
-    size_t b2_count = 0;
+    int b2_count = 0;
     // important: 62 is not divisible by 3
     for (size_t x = 0; x < max_value; x += 62) {
         bi_count += !bitset_container_get(BI, x);
@@ -144,7 +144,7 @@ DEFINE_TEST(and_or_test) {
 
     assert_true(bitset_container_compute_cardinality(B2) == b2_count);
     assert_true(bitset_container_compute_cardinality(BI) == bi_count);
-    size_t bo_count = 0;
+    int bo_count = 0;
     for (size_t x = 0; x < max_value; x += 62 * 3) {
         bitset_container_set(BO, x);
         bo_count++;
@@ -168,7 +168,7 @@ DEFINE_TEST(and_or_test) {
     bitset_container_printf(B1);  // does it crash?
     bitset_container_printf(B2);  // does it crash?
     bitset_container_printf(BI);  // does it crash?
-    size_t interc = 0;
+    int interc = 0;
     for (size_t x = 0; x < max_value; x ++) {
         bool in1 = bitset_container_get(B1, x);
         bool in2 = bitset_container_get(B2, x);
diff --git a/tests/cbitset_unit.c b/tests/cbitset_unit.c
index 512854410..d1b3f8edd 100644
--- a/tests/cbitset_unit.c
+++ b/tests/cbitset_unit.c
@@ -76,10 +76,10 @@ void test_shift_left() {
         for (size_t k = s1; k < s2; ++k) {
             bitset_set(b, power * k);
         }
-        size_t mycount = bitset_count(b);
+        int mycount = bitset_count(b);
         assert_true(compute_cardinality(b) == mycount);
         bitset_shift_left(b, sh);
-        assert_true(bitset_count(b) == mycount);
+        assert_true(bitset_count(b) == (size_t)mycount);
         assert_true(compute_cardinality(b) == mycount);
         for (size_t k = s1; k < s2; ++k) {
             assert_true(bitset_get(b, power * k + sh));
diff --git a/tools/cmake/FindCTargets.cmake b/tools/cmake/FindCTargets.cmake
index 97f2b64d6..341f9b2c2 100644
--- a/tools/cmake/FindCTargets.cmake
+++ b/tools/cmake/FindCTargets.cmake
@@ -1,6 +1,10 @@
 if (CMAKE_VERSION VERSION_GREATER 3.0.0)
   cmake_policy(VERSION 3.0.0)
 endif ()
+include(${PROJECT_SOURCE_DIR}/tools/cmake/Import.cmake)
+set(BUILD_STATIC_LIB ON)
+import_dependency(cmocka clibs/cmocka  f5e2cd7)
+add_dependency(cmocka)
 
 function(add_c_test TEST_NAME)
   if(ROARING_BUILD_C_TESTS_AS_CPP)  # under C++, container_t* != void*
@@ -9,8 +13,7 @@ function(add_c_test TEST_NAME)
 
   add_executable(${TEST_NAME} ${TEST_NAME}.c)
 
-  include_directories(${TEST_NAME} PRIVATE ${CMAKE_SOURCE_DIR}/vendor/cmocka)
-  target_link_libraries(${TEST_NAME} ${ROARING_LIB_NAME} cmocka-static)
+  target_link_libraries(${TEST_NAME} roaring cmocka-static)
 
   add_test(${TEST_NAME} ${TEST_NAME})
 endfunction(add_c_test)
@@ -26,8 +29,7 @@ if (CMAKE_VERSION VERSION_GREATER 2.8.10)
     endif()
     target_include_directories(${TEST_NAME} PRIVATE ${CMAKE_SOURCE_DIR}/cpp)
 
-    include_directories(${TEST_NAME} PRIVATE ${CMAKE_SOURCE_DIR}/vendor/cmocka)
-    target_link_libraries(${TEST_NAME} ${ROARING_LIB_NAME} cmocka-static)
+    target_link_libraries(${TEST_NAME} roaring cmocka-static)
 
     add_test(${TEST_NAME} ${TEST_NAME})
   endfunction(add_cpp_test)
@@ -39,12 +41,12 @@ endif()
 
 function(add_c_benchmark BENCH_NAME)
   add_executable(${BENCH_NAME} ${BENCH_NAME}.c)
-  target_link_libraries(${BENCH_NAME} ${ROARING_LIB_NAME})
+  target_link_libraries(${BENCH_NAME} roaring)
 endfunction(add_c_benchmark)
 
 function(add_cpp_benchmark BENCH_NAME)
   add_executable(${BENCH_NAME} ${BENCH_NAME}.cpp)
-  target_link_libraries(${BENCH_NAME} ${ROARING_LIB_NAME})
+  target_link_libraries(${BENCH_NAME} roaring)
   if(ROARING_EXCEPTIONS)
     target_compile_definitions(${BENCH_NAME} PUBLIC ROARING_EXCEPTIONS=1)
   else()
diff --git a/tools/cmake/Import.cmake b/tools/cmake/Import.cmake
index a79ed8f58..2b4e3e755 100644
--- a/tools/cmake/Import.cmake
+++ b/tools/cmake/Import.cmake
@@ -1,10 +1,9 @@
-# Based on github.com/simdjson/simdjson/blob/master/dependencies/import.cmocka by @friendlyanon
+set(dep_root "${PROJECT_SOURCE_DIR}/dependencies/.cache")
 
-set(dep_root "${CMAKE_CURRENT_SOURCE_DIR}/.cache")
 
-function(import_dependency NAME URL)
-  message(STATUS "Importing ${NAME} (${URL})")
-  set(target "${CMAKE_CURRENT_SOURCE_DIR}/${NAME}")
+function(import_dependency NAME GITHUB_REPO COMMIT)
+  message(STATUS "Importing ${NAME} (${GITHUB_REPO}@${COMMIT})")
+  set(target "${dep_root}/${NAME}")
 
   # If the folder exists in the cache, then we assume that everything is as
   # should be and do nothing
@@ -13,12 +12,12 @@ function(import_dependency NAME URL)
     return()
   endif()
 
-  set(archive "${dep_root}/archive.tar.xz")
+  set(zip_url "https://github.com/${GITHUB_REPO}/archive/${COMMIT}.zip")
+  set(archive "${dep_root}/archive.zip")
   set(dest "${dep_root}/_extract")
 
-  file(DOWNLOAD "${URL}" "${archive}")
+  file(DOWNLOAD "${zip_url}" "${archive}")
   file(MAKE_DIRECTORY "${dest}")
-  file(GLOB dir LIST_DIRECTORIES YES "${dep_root}/*")
   execute_process(
           WORKING_DIRECTORY "${dest}"
           COMMAND "${CMAKE_COMMAND}" -E tar xf "${archive}")
@@ -32,3 +31,20 @@ function(import_dependency NAME URL)
 
   set("${NAME}_SOURCE_DIR" "${target}" PARENT_SCOPE)
 endfunction()
+
+# Delegates to the dependency
+macro(add_dependency NAME)
+  if(NOT DEFINED "${NAME}_SOURCE_DIR")
+    message(FATAL_ERROR "Missing ${NAME}_SOURCE_DIR variable")
+  endif()
+
+  add_subdirectory("${${NAME}_SOURCE_DIR}" "${PROJECT_BINARY_DIR}/_deps/${NAME}" EXCLUDE_FROM_ALL)
+endmacro()
+
+function(set_off NAME)
+  set("${NAME}" OFF CACHE INTERNAL "")
+endfunction()
+
+function(set_on NAME)
+  set("${NAME}" ON CACHE INTERNAL "")
+endfunction()
\ No newline at end of file

From e9c28ecde95f4f4b6940b8e89ef54c11658bbc59 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <lemire@gmail.com>
Date: Mon, 3 Apr 2023 08:56:04 -0400
Subject: [PATCH 105/162] fixing
 https://github.com/RoaringBitmap/CRoaring/issues/461

---
 src/isadetection.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/isadetection.c b/src/isadetection.c
index 272577aa0..4b9f90a6c 100644
--- a/src/isadetection.c
+++ b/src/isadetection.c
@@ -242,7 +242,7 @@ int croaring_hardware_support() {
     avx512_support =  ( (croaring_detect_supported_architectures() & CROARING_AVX512_REQUIRED)
 	                        == CROARING_AVX512_REQUIRED);
 #endif
-    support = ROARING_SUPPORTS_AVX2 | (croaring_has_avx512() ? ROARING_SUPPORTS_AVX512 : 0);
+    support = ROARING_SUPPORTS_AVX2 | (avx512_support ? ROARING_SUPPORTS_AVX512 : 0);
   }
   return support;
 }

From 6f1a94e38765d230c94068953e7d4c78ea705da2 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Mon, 3 Apr 2023 09:26:06 -0400
Subject: [PATCH 106/162] Update README.md

---
 README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 2d91e0668..170cfcd2e 100644
--- a/README.md
+++ b/README.md
@@ -186,6 +186,7 @@ We have microbenchmarks constructed with the Google Benchmarks.
 Under Linux or macOS, you may run them as follows:
 
 ```
+cmake -B build
 cmake --build build
 ./build/microbenchmarks/bench
 ```
@@ -201,7 +202,8 @@ You may disable some functionality for the purpose of benchmarking. For example,
 benchmark the code without AVX-512 even if both your processor and compiler supports it:
 
 ```
-cmake --buildnoavx512 -D ROARING_DISABLE_AVX512=OFF
+cmake -B buildnoavx512 -D ROARING_DISABLE_AVX512=OFF
+cmake --build buildnoavx512
 ./buildnoavx512/microbenchmarks/bench
 ```
 

From 1b5aa991b817bc1f89ca00958b5aa13e639ea113 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Mon, 3 Apr 2023 10:49:40 -0400
Subject: [PATCH 107/162] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 170cfcd2e..2e8e25c82 100644
--- a/README.md
+++ b/README.md
@@ -202,7 +202,7 @@ You may disable some functionality for the purpose of benchmarking. For example,
 benchmark the code without AVX-512 even if both your processor and compiler supports it:
 
 ```
-cmake -B buildnoavx512 -D ROARING_DISABLE_AVX512=OFF
+cmake -B buildnoavx512 -D ROARING_DISABLE_AVX512=ON
 cmake --build buildnoavx512
 ./buildnoavx512/microbenchmarks/bench
 ```

From c04bd8ab909c422d2360e43371fdd4301af53535 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Mon, 3 Apr 2023 15:06:40 +0000
Subject: [PATCH 108/162] Various tweaks.

---
 README.md                     | 13 ++++++++--
 include/roaring/array_util.h  | 16 ++++++++++++
 microbenchmarks/bench.cpp     |  2 +-
 src/CMakeLists.txt            | 13 ++++++++++
 src/array_util.c              | 46 +++++++++++++++++++++++++++++++++++
 src/containers/array.c        | 21 +++++++++++++++-
 src/isadetection.c            |  2 +-
 tools/cmake/FindOptions.cmake | 11 ---------
 8 files changed, 108 insertions(+), 16 deletions(-)

diff --git a/README.md b/README.md
index 2e8e25c82..285515824 100644
--- a/README.md
+++ b/README.md
@@ -198,8 +198,9 @@ We have several data sets and you may pick others:
 ./build/microbenchmarks/bench benchmarks/realdata/wikileaks-noquotes 
 ```
 
-You may disable some functionality for the purpose of benchmarking. For example, you could
-benchmark the code without AVX-512 even if both your processor and compiler supports it:
+You may disable some functionality for the purpose of benchmarking. For example, assuming you
+have an x64 processor, you could benchmark the code without AVX-512 even if both your processor 
+and compiler supports it:
 
 ```
 cmake -B buildnoavx512 -D ROARING_DISABLE_AVX512=ON
@@ -207,6 +208,14 @@ cmake --build buildnoavx512
 ./buildnoavx512/microbenchmarks/bench
 ```
 
+You can benchmark without AVX or AVX-512 as well:
+
+```
+cmake -B buildnoavx -D ROARING_DISABLE_AVX=ON
+cmake --build buildnoavx
+./buildnoavx/microbenchmarks/bench
+```
+
 # Custom memory allocators
 For general users, CRoaring would apply default allocator without extra codes. But global memory hook is also provided for those who want a custom memory allocator. Here is an example:
 ```C
diff --git a/include/roaring/array_util.h b/include/roaring/array_util.h
index 46e57b715..ee088bfd3 100644
--- a/include/roaring/array_util.h
+++ b/include/roaring/array_util.h
@@ -6,6 +6,12 @@
 
 #include <roaring/portability.h>
 
+#if CROARING_IS_X64
+#ifndef CROARING_COMPILER_SUPPORTS_AVX512
+#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined."
+#endif // CROARING_COMPILER_SUPPORTS_AVX512
+#endif
+
 #ifdef __cplusplus
 extern "C" { namespace roaring { namespace internal {
 #endif
@@ -128,6 +134,16 @@ int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a,
 int32_t intersect_vector16_inplace(uint16_t *__restrict__ A, size_t s_a,
                            const uint16_t *__restrict__ B, size_t s_b);
 
+/**
+ * Take an array container and write it out to a 32-bit array, using base
+ * as the offset.
+ */
+int array_container_to_uint32_array_vector16(void *vout, const uint16_t* array, size_t cardinality,
+                                    uint32_t base);
+#if CROARING_COMPILER_SUPPORTS_AVX512
+int avx512_array_container_to_uint32_array(void *vout, const uint16_t* array, size_t cardinality,
+                                    uint32_t base);
+#endif
 /**
  * Compute the cardinality of the intersection using SSE4 instructions
  */
diff --git a/microbenchmarks/bench.cpp b/microbenchmarks/bench.cpp
index 1616e5b3d..3f013b8cd 100644
--- a/microbenchmarks/bench.cpp
+++ b/microbenchmarks/bench.cpp
@@ -152,7 +152,7 @@ struct compute_cardinality {
 };
 
 auto ComputeCardinality = BasicBench<compute_cardinality>;
-BENCHMARK(ComputeCardinality)->MinTime(2);
+BENCHMARK(ComputeCardinality);
 
 int main(int argc, char **argv) {
     const char *dir_name;
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 8ad9d7835..16007664d 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -45,6 +45,19 @@ if(ROARING_DISABLE_AVX512)
   target_compile_definitions(roaring PUBLIC CROARING_COMPILER_SUPPORTS_AVX512=0)
 endif(ROARING_DISABLE_AVX512)
 
+if(ROARING_DISABLE_AVX)
+  target_compile_definitions(roaring PUBLIC ROARING_DISABLE_AVX=1)
+endif(ROARING_DISABLE_AVX)
+
+if(ROARING_DISABLE_X64)
+  target_compile_definitions(roaring PUBLIC ROARING_DISABLE_X64=1)
+endif(ROARING_DISABLE_X64)
+
+if(ROARING_DISABLE_NEON)
+  target_compile_definitions(roaring PUBLIC DISABLENEON=1)
+endif(ROARING_DISABLE_NEON)
+
+
 target_include_directories(roaring
   PUBLIC
    $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
diff --git a/src/array_util.c b/src/array_util.c
index f19b9ac5a..2c74e24b3 100644
--- a/src/array_util.c
+++ b/src/array_util.c
@@ -452,6 +452,52 @@ int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a,
     return (int32_t)count;
 }
 
+ALLOW_UNALIGNED
+int array_container_to_uint32_array_vector16(void *vout, const uint16_t* array, size_t cardinality,
+                                    uint32_t base) {
+    int outpos = 0;
+    uint32_t *out = (uint32_t *)vout;
+    size_t i = 0;
+    for ( ;i + sizeof(__m128i)/sizeof(uint16_t) <= cardinality; i += sizeof(__m128i)/sizeof(uint16_t)) {
+        __m128i vinput = _mm_loadu_si128((const __m128i*) (array + i));
+        __m256i voutput = _mm256_add_epi32(_mm256_cvtepu16_epi32(vinput), _mm256_set1_epi32(base));
+        _mm256_storeu_si256((__m256i*)(out + outpos), voutput);
+        outpos += sizeof(__m256i)/sizeof(uint32_t);
+    }
+    for ( ; i < cardinality; ++i) {
+        const uint32_t val = base + array[i];
+        memcpy(out + outpos, &val,
+               sizeof(uint32_t));  // should be compiled as a MOV on x64
+        outpos++;
+    }
+    return outpos;
+}
+#if CROARING_COMPILER_SUPPORTS_AVX512
+CROARING_TARGET_AVX512
+
+ALLOW_UNALIGNED
+int avx512_array_container_to_uint32_array(void *vout, const uint16_t* array, size_t cardinality,
+                                    uint32_t base) {
+    int outpos = 0;
+    uint32_t *out = (uint32_t *)vout;
+    size_t i = 0;
+    for ( ;i + sizeof(__m256i)/sizeof(uint16_t) <= cardinality; i += sizeof(__m256i)/sizeof(uint16_t)) {
+        __m256i vinput = _mm256_loadu_si256((const __m256i*) (array + i));
+        __m512i voutput = _mm512_add_epi32(_mm512_cvtepu16_epi32(vinput), _mm512_set1_epi32(base));
+        _mm512_storeu_si512((__m512i*)(out + outpos), voutput);
+        outpos += sizeof(__m512i)/sizeof(uint32_t);
+    }
+    for ( ; i < cardinality; ++i) {
+        const uint32_t val = base + array[i];
+        memcpy(out + outpos, &val,
+               sizeof(uint32_t));  // should be compiled as a MOV on x64
+        outpos++;
+    }
+    return outpos;
+}
+CROARING_UNTARGET_AVX512
+#endif
+
 int32_t intersect_vector16_inplace(uint16_t *__restrict__ A, size_t s_a,
                            const uint16_t *__restrict__ B, size_t s_b) {
     size_t count = 0;
diff --git a/src/containers/array.c b/src/containers/array.c
index 199c20ceb..2d812ef77 100644
--- a/src/containers/array.c
+++ b/src/containers/array.c
@@ -9,6 +9,12 @@
 #include <stdio.h>
 #include <stdlib.h>
 
+#if CROARING_IS_X64
+#ifndef CROARING_COMPILER_SUPPORTS_AVX512
+#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined."
+#endif // CROARING_COMPILER_SUPPORTS_AVX512
+#endif
+
 #ifdef __cplusplus
 extern "C" { namespace roaring { namespace internal {
 #endif
@@ -388,9 +394,22 @@ void array_container_intersection_inplace(array_container_t *src_1,
 ALLOW_UNALIGNED
 int array_container_to_uint32_array(void *vout, const array_container_t *cont,
                                     uint32_t base) {
+
+#ifdef CROARING_IS_X64
+    int support = croaring_hardware_support();
+#if CROARING_COMPILER_SUPPORTS_AVX512
+    if (support & ROARING_SUPPORTS_AVX512) {
+        return avx512_array_container_to_uint32_array(vout, cont->array, cont->cardinality, base);
+    }
+#endif
+    if (support & ROARING_SUPPORTS_AVX2) {
+        return array_container_to_uint32_array_vector16(vout, cont->array, cont->cardinality, base);
+    }
+#endif // CROARING_IS_X64
     int outpos = 0;
     uint32_t *out = (uint32_t *)vout;
-    for (int i = 0; i < cont->cardinality; ++i) {
+    size_t i = 0;
+    for ( ; i < (size_t)cont->cardinality; ++i) {
         const uint32_t val = base + cont->array[i];
         memcpy(out + outpos, &val,
                sizeof(uint32_t));  // should be compiled as a MOV on x64
diff --git a/src/isadetection.c b/src/isadetection.c
index 4b9f90a6c..f75b8487c 100644
--- a/src/isadetection.c
+++ b/src/isadetection.c
@@ -87,7 +87,7 @@ enum croaring_instruction_set {
 };
 
 #if CROARING_COMPILER_SUPPORTS_AVX512
-static unsigned int CROARING_AVX512_REQUIRED = (CROARING_AVX512F | CROARING_AVX512DQ | CROARING_AVX512BW | CROARING_AVX512VBMI2 | CROARING_AVX512BITALG | CROARING_AVX512VPOPCNTDQ);
+unsigned int CROARING_AVX512_REQUIRED = (CROARING_AVX512F | CROARING_AVX512DQ | CROARING_AVX512BW | CROARING_AVX512VBMI2 | CROARING_AVX512BITALG | CROARING_AVX512VPOPCNTDQ);
 #endif
 
 #if defined(__x86_64__) || defined(_M_AMD64) // x64
diff --git a/tools/cmake/FindOptions.cmake b/tools/cmake/FindOptions.cmake
index 0ee4e3752..f7b8f9675 100644
--- a/tools/cmake/FindOptions.cmake
+++ b/tools/cmake/FindOptions.cmake
@@ -16,17 +16,6 @@ endif()
 if((NOT MSVC) AND ROARING_ARCH)
 set(OPT_FLAGS "-march=${ROARING_ARCH}")
 endif()
-if(ROARING_DISABLE_X64)
-  # we can manually disable any optimization for x64
-  set (OPT_FLAGS "${OPT_FLAGS} -DROARING_DISABLE_X64" )
-endif()
-if(ROARING_DISABLE_AVX)
-   # we can manually disable AVX by defining DISABLEAVX
-   set (OPT_FLAGS "${OPT_FLAGS} -DROARING_DISABLE_AVX" )
- endif()
-if(ROARING_DISABLE_NEON)
-  set (OPT_FLAGS "${OPT_FLAGS} -DDISABLENEON" )
-endif()
 
 if(FORCE_AVX) # some compilers like clang do not automagically define __AVX2__ and __BMI2__ even when the hardware supports it
 if(NOT MSVC)

From 8c93fa3abe0dd2824e8e9be17c473060b1c33ace Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Mon, 3 Apr 2023 11:14:08 -0400
Subject: [PATCH 109/162] Removing old code

---
 .github/workflows/ubuntu-18-ci.yml          | 27 ------------------
 .github/workflows/ubuntu-oldclang-18-ci.yml | 31 ---------------------
 2 files changed, 58 deletions(-)
 delete mode 100644 .github/workflows/ubuntu-18-ci.yml
 delete mode 100644 .github/workflows/ubuntu-oldclang-18-ci.yml

diff --git a/.github/workflows/ubuntu-18-ci.yml b/.github/workflows/ubuntu-18-ci.yml
deleted file mode 100644
index 225533734..000000000
--- a/.github/workflows/ubuntu-18-ci.yml
+++ /dev/null
@@ -1,27 +0,0 @@
-name: Ubuntu-18-CI
-
-'on':
-  - push
-  - pull_request
-
-permissions:
-  contents: read
-
-jobs:
-  ci:
-    name: ubuntu-gcc
-    runs-on: ubuntu-18.04
-
-    env:
-      CC: gcc
-      CXX: g++
-
-    steps: 
-      - uses: actions/checkout@v2
-      - name: Build and Test
-        run: |
-          mkdir build
-          cd build
-          cmake  ..
-          cmake --build . 
-          ctest . --output-on-failure
diff --git a/.github/workflows/ubuntu-oldclang-18-ci.yml b/.github/workflows/ubuntu-oldclang-18-ci.yml
deleted file mode 100644
index 8fc5825d9..000000000
--- a/.github/workflows/ubuntu-oldclang-18-ci.yml
+++ /dev/null
@@ -1,31 +0,0 @@
-name: Ubuntu-CI (old llvm)
-
-'on':
-  - push
-  - pull_request
-
-permissions:
-  contents: read
-
-jobs:
-  ci:
-    name: ubuntu-clangold-gcc
-    runs-on: ubuntu-18.04
-
-    env:
-      CC: clang-7
-      CXX: clang++-7
-
-    steps: 
-      - uses: actions/checkout@v2
-      - name: install clang 7
-        run: |
-          sudo apt update
-          sudo apt install clang-7
-      - name: Build and Test
-        run: |
-          mkdir build
-          cd build
-          cmake  ..
-          cmake --build . 
-          ctest . --output-on-failure

From c3011e41118380089fa51cb849ae4a146b68b316 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Mon, 3 Apr 2023 18:07:28 +0000
Subject: [PATCH 110/162] Moving AVX-512 to the end.

---
 src/array_util.c | 54 ++++++++++++++++++++++++++----------------------
 1 file changed, 29 insertions(+), 25 deletions(-)

diff --git a/src/array_util.c b/src/array_util.c
index 2c74e24b3..3f4f4bf17 100644
--- a/src/array_util.c
+++ b/src/array_util.c
@@ -472,31 +472,6 @@ int array_container_to_uint32_array_vector16(void *vout, const uint16_t* array,
     }
     return outpos;
 }
-#if CROARING_COMPILER_SUPPORTS_AVX512
-CROARING_TARGET_AVX512
-
-ALLOW_UNALIGNED
-int avx512_array_container_to_uint32_array(void *vout, const uint16_t* array, size_t cardinality,
-                                    uint32_t base) {
-    int outpos = 0;
-    uint32_t *out = (uint32_t *)vout;
-    size_t i = 0;
-    for ( ;i + sizeof(__m256i)/sizeof(uint16_t) <= cardinality; i += sizeof(__m256i)/sizeof(uint16_t)) {
-        __m256i vinput = _mm256_loadu_si256((const __m256i*) (array + i));
-        __m512i voutput = _mm512_add_epi32(_mm512_cvtepu16_epi32(vinput), _mm512_set1_epi32(base));
-        _mm512_storeu_si512((__m512i*)(out + outpos), voutput);
-        outpos += sizeof(__m512i)/sizeof(uint32_t);
-    }
-    for ( ; i < cardinality; ++i) {
-        const uint32_t val = base + array[i];
-        memcpy(out + outpos, &val,
-               sizeof(uint32_t));  // should be compiled as a MOV on x64
-        outpos++;
-    }
-    return outpos;
-}
-CROARING_UNTARGET_AVX512
-#endif
 
 int32_t intersect_vector16_inplace(uint16_t *__restrict__ A, size_t s_a,
                            const uint16_t *__restrict__ B, size_t s_b) {
@@ -2161,6 +2136,35 @@ bool memequals(const void *s1, const void *s2, size_t n) {
 #endif
 }
 
+
+#ifdef CROARING_IS_X64
+#if CROARING_COMPILER_SUPPORTS_AVX512
+CROARING_TARGET_AVX512
+ALLOW_UNALIGNED
+int avx512_array_container_to_uint32_array(void *vout, const uint16_t* array, size_t cardinality,
+                                    uint32_t base) {
+    int outpos = 0;
+    uint32_t *out = (uint32_t *)vout;
+    size_t i = 0;
+    for ( ;i + sizeof(__m256i)/sizeof(uint16_t) <= cardinality; i += sizeof(__m256i)/sizeof(uint16_t)) {
+        __m256i vinput = _mm256_loadu_si256((const __m256i*) (array + i));
+        __m512i voutput = _mm512_add_epi32(_mm512_cvtepu16_epi32(vinput), _mm512_set1_epi32(base));
+        _mm512_storeu_si512((__m512i*)(out + outpos), voutput);
+        outpos += sizeof(__m512i)/sizeof(uint32_t);
+    }
+    for ( ; i < cardinality; ++i) {
+        const uint32_t val = base + array[i];
+        memcpy(out + outpos, &val,
+               sizeof(uint32_t));  // should be compiled as a MOV on x64
+        outpos++;
+    }
+    return outpos;
+}
+CROARING_UNTARGET_AVX512
+#endif // #if CROARING_COMPILER_SUPPORTS_AVX512
+#endif // #ifdef CROARING_IS_X64
+
+
 #ifdef __cplusplus
 } } }  // extern "C" { namespace roaring { namespace internal {
 #endif

From a22a1cdac65216aa328f7ae337657770b3d8988b Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Mon, 3 Apr 2023 18:10:14 +0000
Subject: [PATCH 111/162] Simplification.

---
 benchmarks/array_container_benchmark.c  |  2 +-
 benchmarks/bitset_container_benchmark.c |  2 +-
 benchmarks/run_container_benchmark.c    |  2 +-
 include/roaring/bitset_util.h           |  2 +-
 include/roaring/misc/configreport.h     |  2 +-
 include/roaring/portability.h           |  2 +-
 src/array_util.c                        | 14 +++++++-------
 src/bitset_util.c                       | 12 ++++++------
 src/containers/array.c                  | 14 +++++++-------
 src/containers/bitset.c                 | 14 +++++++-------
 10 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/benchmarks/array_container_benchmark.c b/benchmarks/array_container_benchmark.c
index 903d0e4ac..fc6a2f196 100644
--- a/benchmarks/array_container_benchmark.c
+++ b/benchmarks/array_container_benchmark.c
@@ -23,7 +23,7 @@ void array_cache_flush(array_container_t* B) { (void)B; }
 // tries to put the array in cache
 void array_cache_prefetch(array_container_t* B) {
 #if !CROARING_REGULAR_VISUAL_STUDIO
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
     const int32_t CACHELINESIZE =
         computecacheline();  // 64 bytes per cache line
 #else
diff --git a/benchmarks/bitset_container_benchmark.c b/benchmarks/bitset_container_benchmark.c
index bda9cc9d9..932398a97 100644
--- a/benchmarks/bitset_container_benchmark.c
+++ b/benchmarks/bitset_container_benchmark.c
@@ -29,7 +29,7 @@ void bitset_cache_flush(bitset_container_t* B) { (void)B; }
 // tries to put array of words in cache
 void bitset_cache_prefetch(bitset_container_t* B) {
 #if !CROARING_REGULAR_VISUAL_STUDIO
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
     const int32_t CACHELINESIZE =
         computecacheline();  // 64 bytes per cache line
 #else
diff --git a/benchmarks/run_container_benchmark.c b/benchmarks/run_container_benchmark.c
index 9b7256358..e2fdc495a 100644
--- a/benchmarks/run_container_benchmark.c
+++ b/benchmarks/run_container_benchmark.c
@@ -23,7 +23,7 @@ void run_cache_flush(run_container_t* B) { (void)B; }
 // tries to put array in cache
 void run_cache_prefetch(run_container_t* B) {
 #if !CROARING_REGULAR_VISUAL_STUDIO
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
     const int32_t CACHELINESIZE =
         computecacheline();  // 64 bytes per cache line
 #else
diff --git a/include/roaring/bitset_util.h b/include/roaring/bitset_util.h
index 6b5207f96..32bc6798a 100644
--- a/include/roaring/bitset_util.h
+++ b/include/roaring/bitset_util.h
@@ -262,7 +262,7 @@ uint64_t bitset_flip_list_withcard(uint64_t *words, uint64_t card,
 
 void bitset_flip_list(uint64_t *words, const uint16_t *list, uint64_t length);
 
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
 /***
  * BEGIN Harley-Seal popcount functions.
  */
diff --git a/include/roaring/misc/configreport.h b/include/roaring/misc/configreport.h
index 87a6aae8a..1ff937722 100644
--- a/include/roaring/misc/configreport.h
+++ b/include/roaring/misc/configreport.h
@@ -16,7 +16,7 @@
 extern "C" { namespace roaring { namespace misc {
 #endif
 
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
 // useful for basic info (0)
 static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
                                 unsigned int *ecx, unsigned int *edx) {
diff --git a/include/roaring/portability.h b/include/roaring/portability.h
index a726b5ef4..f05206272 100644
--- a/include/roaring/portability.h
+++ b/include/roaring/portability.h
@@ -308,7 +308,7 @@ static inline int roaring_hamming(uint64_t x) {
 //
 
 // We are going to use runtime dispatch.
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
 #ifdef __clang__
 // clang does not have GCC push pop
 // warning: clang attribute push can't be used within a namespace in clang up
diff --git a/src/array_util.c b/src/array_util.c
index 3f4f4bf17..eb7dcbc49 100644
--- a/src/array_util.c
+++ b/src/array_util.c
@@ -23,7 +23,7 @@ extern "C" { namespace roaring { namespace internal {
 extern inline int32_t binarySearch(const uint16_t *array, int32_t lenarray,
                                    uint16_t ikey);
 
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
 // used by intersect_vector16
 ALIGNED(0x1000)
 static const uint8_t shuffle_mask16[] = {
@@ -1227,7 +1227,7 @@ int32_t xor_uint16(const uint16_t *array_1, int32_t card_1,
     return pos_out;
 }
 
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
 
 /***
  * start of the SIMD 16-bit union code
@@ -1980,7 +1980,7 @@ size_t union_uint32_card(const uint32_t *set_1, size_t size_1,
 
 size_t fast_union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2,
                     size_t size_2, uint16_t *buffer) {
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
     if( croaring_hardware_support() & ROARING_SUPPORTS_AVX2 ) {
         // compute union with smallest array first
       if (size_1 < size_2) {
@@ -2011,7 +2011,7 @@ size_t fast_union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *s
     }
 #endif
 }
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
 #if CROARING_COMPILER_SUPPORTS_AVX512
 CROARING_TARGET_AVX512
 static inline bool _avx512_memequals(const void *s1, const void *s2, size_t n) {
@@ -2119,7 +2119,7 @@ bool memequals(const void *s1, const void *s2, size_t n) {
     if (n == 0) {
         return true;
     }
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
     int support = croaring_hardware_support();
 #if CROARING_COMPILER_SUPPORTS_AVX512
     if( support & ROARING_SUPPORTS_AVX512 ) {
@@ -2137,7 +2137,7 @@ bool memequals(const void *s1, const void *s2, size_t n) {
 }
 
 
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
 #if CROARING_COMPILER_SUPPORTS_AVX512
 CROARING_TARGET_AVX512
 ALLOW_UNALIGNED
@@ -2162,7 +2162,7 @@ int avx512_array_container_to_uint32_array(void *vout, const uint16_t* array, si
 }
 CROARING_UNTARGET_AVX512
 #endif // #if CROARING_COMPILER_SUPPORTS_AVX512
-#endif // #ifdef CROARING_IS_X64
+#endif // #if CROARING_IS_X64
 
 
 #ifdef __cplusplus
diff --git a/src/bitset_util.c b/src/bitset_util.c
index 427d95901..7096b27b1 100644
--- a/src/bitset_util.c
+++ b/src/bitset_util.c
@@ -17,7 +17,7 @@ using namespace ::roaring::internal;
 extern "C" { namespace roaring { namespace api {
 #endif
 
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
 static uint8_t lengthTable[256] = {
     0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4,
     2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
@@ -32,7 +32,7 @@ static uint8_t lengthTable[256] = {
     4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
 #endif
 
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
 ALIGNED(32)
 static uint32_t vecDecodeTable[256][8] = {
     {0, 0, 0, 0, 0, 0, 0, 0}, /* 0x00 (00000000) */
@@ -293,9 +293,9 @@ static uint32_t vecDecodeTable[256][8] = {
     {1, 2, 3, 4, 5, 6, 7, 8}  /* 0xFF (11111111) */
 };
 
-#endif  // #ifdef CROARING_IS_X64
+#endif  // #if CROARING_IS_X64
 
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
 // same as vecDecodeTable but in 16 bits
 ALIGNED(32)
 static uint16_t vecDecodeTable_uint16[256][8] = {
@@ -559,7 +559,7 @@ static uint16_t vecDecodeTable_uint16[256][8] = {
 
 #endif
 
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
 #if CROARING_COMPILER_SUPPORTS_AVX512
 CROARING_TARGET_AVX512
 const uint8_t vbmi2_table[64] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
@@ -763,7 +763,7 @@ size_t bitset_extract_intersection_setbits_uint16(const uint64_t * __restrict__
     return outpos;
 }
 
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
 /*
  * Given a bitset containing "length" 64-bit words, write out the position
  * of all the set bits to "out" as 16-bit integers, values start at "base" (can
diff --git a/src/containers/array.c b/src/containers/array.c
index 2d812ef77..5d4017a7a 100644
--- a/src/containers/array.c
+++ b/src/containers/array.c
@@ -222,7 +222,7 @@ void array_container_andnot(const array_container_t *array_1,
                             array_container_t *out) {
     if (out->capacity < array_1->cardinality)
         array_container_grow(out, array_1->cardinality, false);
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
     if(( croaring_hardware_support() & ROARING_SUPPORTS_AVX2 ) && (out != array_1) && (out != array_2)) {
       out->cardinality =
           difference_vector16(array_1->array, array_1->cardinality,
@@ -253,7 +253,7 @@ void array_container_xor(const array_container_t *array_1,
         array_container_grow(out, max_cardinality, false);
     }
 
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
     if( croaring_hardware_support() & ROARING_SUPPORTS_AVX2 ) {
       out->cardinality =
         xor_vector16(array_1->array, array_1->cardinality, array_2->array,
@@ -284,7 +284,7 @@ void array_container_intersection(const array_container_t *array1,
     int32_t card_1 = array1->cardinality, card_2 = array2->cardinality,
             min_card = minimum_int32(card_1, card_2);
     const int threshold = 64;  // subject to tuning
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
     if (out->capacity < min_card) {
       array_container_grow(out, min_card + sizeof(__m128i) / sizeof(uint16_t),
         false);
@@ -302,7 +302,7 @@ void array_container_intersection(const array_container_t *array1,
         out->cardinality = intersect_skewed_uint16(
             array2->array, card_2, array1->array, card_1, out->array);
     } else {
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
        if( croaring_hardware_support() & ROARING_SUPPORTS_AVX2 ) {
         out->cardinality = intersect_vector16(
             array1->array, card_1, array2->array, card_2, out->array);
@@ -330,7 +330,7 @@ int array_container_intersection_cardinality(const array_container_t *array1,
         return intersect_skewed_uint16_cardinality(array2->array, card_2,
                                                    array1->array, card_1);
     } else {
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
     if( croaring_hardware_support() & ROARING_SUPPORTS_AVX2 ) {
         return intersect_vector16_cardinality(array1->array, card_1,
                                               array2->array, card_2);
@@ -376,7 +376,7 @@ void array_container_intersection_inplace(array_container_t *src_1,
         src_1->cardinality = intersect_skewed_uint16(
             src_2->array, card_2, src_1->array, card_1, src_1->array);
     } else {
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
         if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) {
             src_1->cardinality = intersect_vector16_inplace(
                 src_1->array, card_1, src_2->array, card_2);
@@ -395,7 +395,7 @@ ALLOW_UNALIGNED
 int array_container_to_uint32_array(void *vout, const array_container_t *cont,
                                     uint32_t base) {
 
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
     int support = croaring_hardware_support();
 #if CROARING_COMPILER_SUPPORTS_AVX512
     if (support & ROARING_SUPPORTS_AVX512) {
diff --git a/src/containers/bitset.c b/src/containers/bitset.c
index 86375ea0d..30352ee48 100644
--- a/src/containers/bitset.c
+++ b/src/containers/bitset.c
@@ -61,7 +61,7 @@ bitset_container_t *bitset_container_create(void) {
     }
 
     size_t align_size = 32;
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
     int support = croaring_hardware_support();
     if ( support & ROARING_SUPPORTS_AVX512 ) {
 	    // sizeof(__m512i) == 64
@@ -137,7 +137,7 @@ bitset_container_t *bitset_container_clone(const bitset_container_t *src) {
     }
 
     size_t align_size = 32;
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
     if ( croaring_hardware_support() & ROARING_SUPPORTS_AVX512 ) {
 	    // sizeof(__m512i) == 64
 	    align_size = 64;
@@ -243,7 +243,7 @@ bool bitset_container_intersect(const bitset_container_t *src_1,
 }
 
 
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
 #ifndef WORDS_IN_AVX2_REG
 #define WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t)
 #endif
@@ -323,7 +323,7 @@ int bitset_container_compute_cardinality(const bitset_container_t *bitset) {
 
 #endif // CROARING_IS_X64
 
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
 
 #define BITSET_CONTAINER_FN_REPEAT 8
 #ifndef WORDS_IN_AVX512_REG
@@ -932,7 +932,7 @@ int bitset_container_to_uint32_array(
     const bitset_container_t *bc,
     uint32_t base
 ){
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
    int support = croaring_hardware_support();
 #if CROARING_COMPILER_SUPPORTS_AVX512
    if(( support & ROARING_SUPPORTS_AVX512 ) &&  (bc->cardinality >= 8192))  // heuristic
@@ -1062,7 +1062,7 @@ bool bitset_container_iterate64(const bitset_container_t *cont, uint32_t base, r
   return true;
 }
 
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
 #if CROARING_COMPILER_SUPPORTS_AVX512
 CROARING_TARGET_AVX512
 ALLOW_UNALIGNED
@@ -1109,7 +1109,7 @@ bool bitset_container_equals(const bitset_container_t *container1, const bitset_
       return true;
     }
   }
-#ifdef CROARING_IS_X64
+#if CROARING_IS_X64
   int support = croaring_hardware_support();
 #if CROARING_COMPILER_SUPPORTS_AVX512
   if( support & ROARING_SUPPORTS_AVX512 ) {

From 4fdf6ffbc3ee64ae7f909a7ebc1bc9b887dafdef Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Mon, 3 Apr 2023 14:52:31 -0400
Subject: [PATCH 112/162] New version.

---
 CMakeLists.txt                    | 8 ++++----
 doxygen                           | 2 +-
 include/roaring/roaring_version.h | 6 +++---
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9b41269f4..27cac4b88 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -17,10 +17,10 @@ if(CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_C_COMPILER_VERSION VERSIO
 endif()
 set(ROARING_LIB_NAME roaring)
 set(PROJECT_VERSION_MAJOR 1)
-set(PROJECT_VERSION_MINOR 0)
-set(PROJECT_VERSION_PATCH 1)
-set(ROARING_LIB_VERSION "1.0.1" CACHE STRING "Roaring library version")
-set(ROARING_LIB_SOVERSION "8" CACHE STRING "Roaring library soversion")
+set(PROJECT_VERSION_MINOR 1)
+set(PROJECT_VERSION_PATCH 0)
+set(ROARING_LIB_VERSION "1.1.0" CACHE STRING "Roaring library version")
+set(ROARING_LIB_SOVERSION "9" CACHE STRING "Roaring library soversion")
 
 option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON)
 if(NOT ROARING_EXCEPTIONS)
diff --git a/doxygen b/doxygen
index 5d3a3b1df..8a1a6e961 100644
--- a/doxygen
+++ b/doxygen
@@ -48,7 +48,7 @@ PROJECT_NAME           = "CRoaring"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "1.0.1"
+PROJECT_NUMBER         = "1.1.0"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/include/roaring/roaring_version.h b/include/roaring/roaring_version.h
index 037bcb883..6fb984b02 100644
--- a/include/roaring/roaring_version.h
+++ b/include/roaring/roaring_version.h
@@ -1,10 +1,10 @@
 // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand 
 #ifndef ROARING_INCLUDE_ROARING_VERSION 
 #define ROARING_INCLUDE_ROARING_VERSION 
-#define ROARING_VERSION "1.0.1"
+#define ROARING_VERSION "1.1.0"
 enum { 
     ROARING_VERSION_MAJOR = 1,
-    ROARING_VERSION_MINOR = 0,
-    ROARING_VERSION_REVISION = 1
+    ROARING_VERSION_MINOR = 1,
+    ROARING_VERSION_REVISION = 0
 }; 
 #endif // ROARING_INCLUDE_ROARING_VERSION 

From 6e7aa193abd034b31b60904712ee73b284949944 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Tue, 4 Apr 2023 14:22:52 -0400
Subject: [PATCH 113/162] Update README.md

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 285515824..26cef1d6e 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,7 @@
 # CRoaring 
 
 [![Ubuntu-CI](https://github.com/RoaringBitmap/CRoaring/actions/workflows/ubuntu-noexcept-ci.yml/badge.svg)](https://github.com/RoaringBitmap/CRoaring/actions/workflows/ubuntu-noexcept-ci.yml) [![VS17-CI](https://github.com/RoaringBitmap/CRoaring/actions/workflows/vs17-ci.yml/badge.svg)](https://github.com/RoaringBitmap/CRoaring/actions/workflows/vs17-ci.yml)
+[![Fuzzing Status](https://oss-fuzz-build-logs.storage.googleapis.com/badges/croaring.svg)](https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=1&q=proj:croaring)
 
 [![Doxygen Documentation](https://img.shields.io/badge/docs-doxygen-green.svg)](http://roaringbitmap.github.io/CRoaring/)
 

From 09ed0f7899416b9e1dc5633c168546feec54e47c Mon Sep 17 00:00:00 2001
From: Justin Whear <justin.whear@gmail.com>
Date: Wed, 12 Apr 2023 12:37:33 -0700
Subject: [PATCH 114/162] fix missing semicolon (#463)

---
 src/isadetection.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/isadetection.c b/src/isadetection.c
index f75b8487c..0aefde1fd 100644
--- a/src/isadetection.c
+++ b/src/isadetection.c
@@ -230,7 +230,7 @@ int croaring_hardware_support() {
 
 #elif defined(__AVX512F__) && defined(__AVX512DQ__) && defined(__AVX512BW__) && defined(__AVX512VBMI2__) && defined(__AVX512BITALG__) && defined(__AVX512VPOPCNTDQ__)
 int croaring_hardware_support() {
-    return  ROARING_SUPPORTS_AVX2 | ROARING_SUPPORTS_AVX512
+    return  ROARING_SUPPORTS_AVX2 | ROARING_SUPPORTS_AVX512;
 }
 #elif defined(__AVX2__)
 

From f7f12ea7728bc1d8cbcd1076e02c248644d7e769 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Wed, 12 Apr 2023 15:38:13 -0400
Subject: [PATCH 115/162] Bumping version.

---
 CMakeLists.txt                    | 4 ++--
 doxygen                           | 2 +-
 include/roaring/roaring_version.h | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 27cac4b88..a3d1abf0e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,8 +18,8 @@ endif()
 set(ROARING_LIB_NAME roaring)
 set(PROJECT_VERSION_MAJOR 1)
 set(PROJECT_VERSION_MINOR 1)
-set(PROJECT_VERSION_PATCH 0)
-set(ROARING_LIB_VERSION "1.1.0" CACHE STRING "Roaring library version")
+set(PROJECT_VERSION_PATCH 1)
+set(ROARING_LIB_VERSION "1.1.1" CACHE STRING "Roaring library version")
 set(ROARING_LIB_SOVERSION "9" CACHE STRING "Roaring library soversion")
 
 option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON)
diff --git a/doxygen b/doxygen
index 8a1a6e961..9a0c79f17 100644
--- a/doxygen
+++ b/doxygen
@@ -48,7 +48,7 @@ PROJECT_NAME           = "CRoaring"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "1.1.0"
+PROJECT_NUMBER         = "1.1.1"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/include/roaring/roaring_version.h b/include/roaring/roaring_version.h
index 6fb984b02..de87313c9 100644
--- a/include/roaring/roaring_version.h
+++ b/include/roaring/roaring_version.h
@@ -1,10 +1,10 @@
 // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand 
 #ifndef ROARING_INCLUDE_ROARING_VERSION 
 #define ROARING_INCLUDE_ROARING_VERSION 
-#define ROARING_VERSION "1.1.0"
+#define ROARING_VERSION "1.1.1"
 enum { 
     ROARING_VERSION_MAJOR = 1,
     ROARING_VERSION_MINOR = 1,
-    ROARING_VERSION_REVISION = 0
+    ROARING_VERSION_REVISION = 1
 }; 
 #endif // ROARING_INCLUDE_ROARING_VERSION 

From 14aa2c8c5c9f6597d2cee225641112cc4d235dbe Mon Sep 17 00:00:00 2001
From: Salvatore Previti <roorback@gmail.com>
Date: Tue, 18 Apr 2023 23:56:44 +0100
Subject: [PATCH 116/162] fix the order of #endif for when defined(__x86_64__)
 || defined(_M_AMD64) is false and __cplusplus is true (#464)

---
 src/isadetection.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/isadetection.c b/src/isadetection.c
index 0aefde1fd..06866f4cf 100644
--- a/src/isadetection.c
+++ b/src/isadetection.c
@@ -262,7 +262,7 @@ int croaring_hardware_support() {
 }
 #endif
 
+#endif // defined(__x86_64__) || defined(_M_AMD64) // x64
 #ifdef __cplusplus
 } } }  // extern "C" { namespace roaring { namespace internal {
 #endif
-#endif // defined(__x86_64__) || defined(_M_AMD64) // x64
\ No newline at end of file

From af6d081c0d44ad77ec0269f9d1fc384230831c5a Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Tue, 18 Apr 2023 18:57:59 -0400
Subject: [PATCH 117/162] New release

---
 CMakeLists.txt                    | 4 ++--
 doxygen                           | 2 +-
 include/roaring/roaring_version.h | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a3d1abf0e..16f9cf396 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,8 +18,8 @@ endif()
 set(ROARING_LIB_NAME roaring)
 set(PROJECT_VERSION_MAJOR 1)
 set(PROJECT_VERSION_MINOR 1)
-set(PROJECT_VERSION_PATCH 1)
-set(ROARING_LIB_VERSION "1.1.1" CACHE STRING "Roaring library version")
+set(PROJECT_VERSION_PATCH 2)
+set(ROARING_LIB_VERSION "1.1.2" CACHE STRING "Roaring library version")
 set(ROARING_LIB_SOVERSION "9" CACHE STRING "Roaring library soversion")
 
 option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON)
diff --git a/doxygen b/doxygen
index 9a0c79f17..bdd5f7909 100644
--- a/doxygen
+++ b/doxygen
@@ -48,7 +48,7 @@ PROJECT_NAME           = "CRoaring"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "1.1.1"
+PROJECT_NUMBER         = "1.1.2"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/include/roaring/roaring_version.h b/include/roaring/roaring_version.h
index de87313c9..87fae5974 100644
--- a/include/roaring/roaring_version.h
+++ b/include/roaring/roaring_version.h
@@ -1,10 +1,10 @@
 // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand 
 #ifndef ROARING_INCLUDE_ROARING_VERSION 
 #define ROARING_INCLUDE_ROARING_VERSION 
-#define ROARING_VERSION "1.1.1"
+#define ROARING_VERSION "1.1.2"
 enum { 
     ROARING_VERSION_MAJOR = 1,
     ROARING_VERSION_MINOR = 1,
-    ROARING_VERSION_REVISION = 1
+    ROARING_VERSION_REVISION = 2
 }; 
 #endif // ROARING_INCLUDE_ROARING_VERSION 

From 043127d155d8a5edc8de544af0637994412717c6 Mon Sep 17 00:00:00 2001
From: longqimin <long2008920@163.com>
Date: Thu, 20 Apr 2023 21:07:33 +0800
Subject: [PATCH 118/162] ignore dependencies/ (#465)

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 16a15f00d..656ac49c4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 # Downloaded dependencies
 tests/vendor/cmocka
+dependencies
 
 # Object files
 *.o

From 3560883f8c327d16f4477f2986258000c9e52ca7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=BB=84=28Hu=C3=A1ng=29=E7=93=92=28Z=C3=A0n=29?=
 <13297503+pmixer@users.noreply.github.com>
Date: Sat, 29 Apr 2023 05:49:05 +0800
Subject: [PATCH 119/162] type fix: greater/less then -> greater/less than
 (#466)

* type fix: greater/less then -> greater/less than

* rm $ from $ikey
---
 include/roaring/array_util.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/roaring/array_util.h b/include/roaring/array_util.h
index ee088bfd3..d9baa2b3b 100644
--- a/include/roaring/array_util.h
+++ b/include/roaring/array_util.h
@@ -95,7 +95,7 @@ static inline int32_t advanceUntil(const uint16_t *array, int32_t pos,
 }
 
 /**
- * Returns number of elements which are less then $ikey.
+ * Returns number of elements which are less than ikey.
  * Array elements must be unique and sorted.
  */
 static inline int32_t count_less(const uint16_t *array, int32_t lenarray,
@@ -106,7 +106,7 @@ static inline int32_t count_less(const uint16_t *array, int32_t lenarray,
 }
 
 /**
- * Returns number of elements which are greater then $ikey.
+ * Returns number of elements which are greater than ikey.
  * Array elements must be unique and sorted.
  */
 static inline int32_t count_greater(const uint16_t *array, int32_t lenarray,

From 885fb21da4850e95e51426a30a9be52058c6baeb Mon Sep 17 00:00:00 2001
From: Gabriela Gutierrez <gabigutierrez@google.com>
Date: Wed, 3 May 2023 16:05:41 +0000
Subject: [PATCH 120/162] Reference actions by commit SHA (#468)

* Ref actions/checkout@v2 by commit SHA

Signed-off-by: Gabriela Gutierrez <gabigutierrez@google.com>

* Ref actions/checkout@v3 by commit SHA

Signed-off-by: Gabriela Gutierrez <gabigutierrez@google.com>

* Ref google/oss-fuzz by commit SHA

Since google/oss-fuzz does not publish releases, we're referencing the commit SHA from the lastest commit in master from 2023-05-03. The semantic version comment refers to master so later we can tell where did this commit came from.

Signed-off-by: Gabriela Gutierrez <gabigutierrez@google.com>

* Ref actions/upload-artifacts@v3 to commit SHA

Signed-off-by: Gabriela Gutierrez <gabigutierrez@google.com>

* Ref github/codeql-action by commit SHA

Signed-off-by: Gabriela Gutierrez <gabigutierrez@google.com>

* Ref peaceiris/actions-gh-pages@v3 by commit SHA

Signed-off-by: Gabriela Gutierrez <gabigutierrez@google.com>

* Ref uraimo/run-on-arch-action@v2 by commit SHA

Signed-off-by: Gabriela Gutierrez <gabigutierrez@google.com>

---------

Signed-off-by: Gabriela Gutierrez <gabigutierrez@google.com>
---
 .github/workflows/alpine.yml               | 2 +-
 .github/workflows/cifuzz.yml               | 6 +++---
 .github/workflows/codeql.yml               | 8 ++++----
 .github/workflows/documentation.yml        | 4 ++--
 .github/workflows/macos-ci.yml             | 2 +-
 .github/workflows/s390x.yml                | 4 ++--
 .github/workflows/ubuntu-ci.yml            | 2 +-
 .github/workflows/ubuntu-debug-sani-ci.yml | 2 +-
 .github/workflows/ubuntu-gcc10-ci.yml      | 2 +-
 .github/workflows/ubuntu-legacy-ci.yml     | 2 +-
 .github/workflows/ubuntu-noexcept-ci.yml   | 2 +-
 .github/workflows/ubuntu-sani-ci.yml       | 2 +-
 .github/workflows/vs16-arm-ci.yml          | 2 +-
 .github/workflows/vs16-ci.yml              | 2 +-
 .github/workflows/vs17-arm-ci.yml          | 2 +-
 .github/workflows/vs17-ci.yml              | 2 +-
 .github/workflows/vs17-clang-ci.yml        | 2 +-
 17 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/.github/workflows/alpine.yml b/.github/workflows/alpine.yml
index bacf210c3..d01355c78 100644
--- a/.github/workflows/alpine.yml
+++ b/.github/workflows/alpine.yml
@@ -8,7 +8,7 @@ jobs:
   ubuntu-build:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
       - name: start docker
         run: |
           docker run -w /src -dit --name alpine -v $PWD:/src alpine:latest
diff --git a/.github/workflows/cifuzz.yml b/.github/workflows/cifuzz.yml
index 3539dbf1f..9da72b1b9 100644
--- a/.github/workflows/cifuzz.yml
+++ b/.github/workflows/cifuzz.yml
@@ -8,18 +8,18 @@ jobs:
     steps:
     - name: Build Fuzzers
       id: build
-      uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master
+      uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@d318097b285bc695f785b98d40c2d058c0f438b5 # master
       with:
         oss-fuzz-project-name: 'croaring'
         dry-run: false
     - name: Run Fuzzers
-      uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master
+      uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@d318097b285bc695f785b98d40c2d058c0f438b5 # master
       with:
         oss-fuzz-project-name: 'croaring'
         fuzz-seconds: 300
         dry-run: false
     - name: Upload Crash
-      uses: actions/upload-artifact@v3
+      uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # v3.1.2
       if: failure() && steps.build.outcome == 'success'
       with:
         name: artifacts
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
index 8834c90c8..082400aa8 100644
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@@ -27,18 +27,18 @@ jobs:
 
     steps:
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # v3.5.2
 
       - name: Initialize CodeQL
-        uses: github/codeql-action/init@v2
+        uses: github/codeql-action/init@f3feb00acb00f31a6f60280e6ace9ca31d91c76a # v2.3.2
         with:
           languages: ${{ matrix.language }}
 
       - name: Autobuild
-        uses: github/codeql-action/autobuild@v2
+        uses: github/codeql-action/autobuild@f3feb00acb00f31a6f60280e6ace9ca31d91c76a # v2.3.2
         if: ${{ matrix.language == 'cpp' || matrix.language == 'python' }}
 
       - name: Perform CodeQL Analysis
-        uses: github/codeql-action/analyze@v2
+        uses: github/codeql-action/analyze@f3feb00acb00f31a6f60280e6ace9ca31d91c76a # v2.3.2
         with:
           category: "/language:${{ matrix.language }}"
diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
index 72888ae4f..f88d499ea 100644
--- a/.github/workflows/documentation.yml
+++ b/.github/workflows/documentation.yml
@@ -21,7 +21,7 @@ jobs:
       pages: write
       id-token: write
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # v3.5.2
       - name: Install Doxygen
         run: sudo apt-get install doxygen graphviz -y
       - run: mkdir docs
@@ -30,7 +30,7 @@ jobs:
       - name: Generate Doxygen Documentation
         run: doxygen ./doxygen
       - name: Deploy to GitHub Pages
-        uses: peaceiris/actions-gh-pages@v3
+        uses: peaceiris/actions-gh-pages@373f7f263a76c20808c831209c920827a82a2847 # v3.9.3
         with:
           github_token: ${{ secrets.GITHUB_TOKEN }}
           publish_dir: docs/html
diff --git a/.github/workflows/macos-ci.yml b/.github/workflows/macos-ci.yml
index 4edb1de29..b1a7067fa 100644
--- a/.github/workflows/macos-ci.yml
+++ b/.github/workflows/macos-ci.yml
@@ -12,7 +12,7 @@ jobs:
     name: macos-llvm
     runs-on: macos-latest
     steps: 
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
       - name: Build and Test
         run: |
           mkdir build
diff --git a/.github/workflows/s390x.yml b/.github/workflows/s390x.yml
index 4822c4ea9..c0045a235 100644
--- a/.github/workflows/s390x.yml
+++ b/.github/workflows/s390x.yml
@@ -15,8 +15,8 @@ jobs:
   build:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
-      - uses: uraimo/run-on-arch-action@v2
+      - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # v3.5.2
+      - uses: uraimo/run-on-arch-action@a8003307a739516fdd80ee6d3da8924db811b8da # v2.5.0
         name: Test
         id: runcmd
         with:
diff --git a/.github/workflows/ubuntu-ci.yml b/.github/workflows/ubuntu-ci.yml
index 247bdf89c..169124f04 100644
--- a/.github/workflows/ubuntu-ci.yml
+++ b/.github/workflows/ubuntu-ci.yml
@@ -17,7 +17,7 @@ jobs:
       CXX: g++
 
     steps: 
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
       - name: Build and Test
         run: |
           mkdir build
diff --git a/.github/workflows/ubuntu-debug-sani-ci.yml b/.github/workflows/ubuntu-debug-sani-ci.yml
index e828b6220..cbda35a0c 100644
--- a/.github/workflows/ubuntu-debug-sani-ci.yml
+++ b/.github/workflows/ubuntu-debug-sani-ci.yml
@@ -17,7 +17,7 @@ jobs:
       CXX: g++
 
     steps: 
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
       - name: Build and Test
         run: |
           mkdir build
diff --git a/.github/workflows/ubuntu-gcc10-ci.yml b/.github/workflows/ubuntu-gcc10-ci.yml
index 1f976ddc7..f46bfb94f 100644
--- a/.github/workflows/ubuntu-gcc10-ci.yml
+++ b/.github/workflows/ubuntu-gcc10-ci.yml
@@ -15,7 +15,7 @@ jobs:
       CC: gcc-10
       CXX: g++-10
     steps: 
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
       - run:   |
          sudo apt update
          sudo apt install gcc-10 g++-10
diff --git a/.github/workflows/ubuntu-legacy-ci.yml b/.github/workflows/ubuntu-legacy-ci.yml
index 940fe08b1..108a49d68 100644
--- a/.github/workflows/ubuntu-legacy-ci.yml
+++ b/.github/workflows/ubuntu-legacy-ci.yml
@@ -13,7 +13,7 @@ jobs:
     runs-on: ubuntu-latest
 
     steps: 
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
       - name: Build and Test
         run: |
           mkdir build
diff --git a/.github/workflows/ubuntu-noexcept-ci.yml b/.github/workflows/ubuntu-noexcept-ci.yml
index bf5dc7385..889d41ed1 100644
--- a/.github/workflows/ubuntu-noexcept-ci.yml
+++ b/.github/workflows/ubuntu-noexcept-ci.yml
@@ -17,7 +17,7 @@ jobs:
       CXX: g++
 
     steps: 
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
       - name: Build and Test
         run: |
           mkdir build
diff --git a/.github/workflows/ubuntu-sani-ci.yml b/.github/workflows/ubuntu-sani-ci.yml
index d3ecaaf9b..c0f8608e3 100644
--- a/.github/workflows/ubuntu-sani-ci.yml
+++ b/.github/workflows/ubuntu-sani-ci.yml
@@ -17,7 +17,7 @@ jobs:
       CXX: g++
 
     steps: 
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
       - name: Build and Test
         run: |
           mkdir build
diff --git a/.github/workflows/vs16-arm-ci.yml b/.github/workflows/vs16-arm-ci.yml
index 95c783242..f68e049af 100644
--- a/.github/workflows/vs16-arm-ci.yml
+++ b/.github/workflows/vs16-arm-ci.yml
@@ -17,7 +17,7 @@ jobs:
           - {gen: Visual Studio 16 2019, arch: ARM64}
     steps:
       - name: checkout
-        uses: actions/checkout@v2
+        uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
       - name: Configure
         run: |
           mkdir build
diff --git a/.github/workflows/vs16-ci.yml b/.github/workflows/vs16-ci.yml
index dfba9833f..5eeb8b6df 100644
--- a/.github/workflows/vs16-ci.yml
+++ b/.github/workflows/vs16-ci.yml
@@ -17,7 +17,7 @@ jobs:
           - {gen: Visual Studio 16 2019, arch: x64}
     steps:
       - name: checkout
-        uses: actions/checkout@v2
+        uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
       - name: Configure
         run: |
           mkdir build
diff --git a/.github/workflows/vs17-arm-ci.yml b/.github/workflows/vs17-arm-ci.yml
index db087ca79..95ff1015c 100644
--- a/.github/workflows/vs17-arm-ci.yml
+++ b/.github/workflows/vs17-arm-ci.yml
@@ -17,7 +17,7 @@ jobs:
           - {arch: ARM64}
     steps:
       - name: checkout
-        uses: actions/checkout@v2
+        uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
       - name: Use cmake
         run: |
           cmake -A ${{ matrix.arch }} -DCMAKE_CROSSCOMPILING=1 -B build  &&
diff --git a/.github/workflows/vs17-ci.yml b/.github/workflows/vs17-ci.yml
index bad4fda01..eaabb130f 100644
--- a/.github/workflows/vs17-ci.yml
+++ b/.github/workflows/vs17-ci.yml
@@ -17,7 +17,7 @@ jobs:
           - {gen: Visual Studio 17 2022, arch: x64}
     steps:
       - name: checkout
-        uses: actions/checkout@v2
+        uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
       - name: Configure
         run: |
           mkdir build
diff --git a/.github/workflows/vs17-clang-ci.yml b/.github/workflows/vs17-clang-ci.yml
index 49c97b46a..871de7eb7 100644
--- a/.github/workflows/vs17-clang-ci.yml
+++ b/.github/workflows/vs17-clang-ci.yml
@@ -17,7 +17,7 @@ jobs:
           - {gen: Visual Studio 17 2022, arch: x64}
     steps:
       - name: checkout
-        uses: actions/checkout@v2
+        uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
       - name: Configure
         run: |
           mkdir build

From 7174265a35926afecde3fed9fd5943bb28a5ec87 Mon Sep 17 00:00:00 2001
From: bryce <bryce@mail.ustc.edu.cn>
Date: Sat, 13 May 2023 00:29:49 +0800
Subject: [PATCH 121/162] add get_index and it will return the index of the
 element (#470)

* add get_idx

* add get_index

* add get_index and it will return the index of the element

* update get_index test

* add getIndex for Roaring64Map and fix some spelling mistakes and function naming

* add getIndex for Roaring64Map and fix some spelling mistakes and function naming

* fix bug and comments

* change int128_t to int64_t

---------

Co-authored-by: zhangdingyuan03 <zhangdingyuan03@kuaishou.com>
---
 cpp/roaring.hh                          | 11 ++++
 cpp/roaring64map.hh                     | 23 +++++++++
 include/roaring/containers/array.h      | 13 ++++-
 include/roaring/containers/bitset.h     |  3 ++
 include/roaring/containers/containers.h | 20 ++++++++
 include/roaring/containers/run.h        |  3 ++
 include/roaring/roaring.h               |  9 ++++
 src/containers/array.c                  |  2 +
 src/containers/bitset.c                 | 19 +++++++
 src/containers/run.c                    | 21 ++++++++
 src/roaring.c                           | 28 +++++++++++
 tests/toplevel_unit.c                   | 67 +++++++++++++++++++++++++
 12 files changed, 218 insertions(+), 1 deletion(-)

diff --git a/cpp/roaring.hh b/cpp/roaring.hh
index cf453ec71..1fd7a4655 100644
--- a/cpp/roaring.hh
+++ b/cpp/roaring.hh
@@ -499,6 +499,17 @@ public:
         return api::roaring_bitmap_rank(&roaring, x);
     }
 
+    /**
+     * Returns the index of x in the set, index start from 0.
+     * If the set doesn't contain x , this function will return -1.
+     * The difference with rank function is that this function will return -1
+     * when x isn't in the set, but the rank function will return a
+     * non-negative number.
+     */
+    int64_t getIndex(uint32_t x) const noexcept {
+        return api::roaring_bitmap_get_index(&roaring, x);
+    }
+
     /**
      * Write a bitmap to a char buffer. This is meant to be compatible with
      * the Java and Go versions. Returns how many bytes were written which
diff --git a/cpp/roaring64map.hh b/cpp/roaring64map.hh
index bdc8ba776..0ab1251d0 100644
--- a/cpp/roaring64map.hh
+++ b/cpp/roaring64map.hh
@@ -1092,6 +1092,29 @@ public:
         return result;
     }
 
+    /**
+     * Returns the index of x in the set, index start from 0.
+     * If the set doesn't contain x , this function will return -1.
+     * The difference with rank function is that this function will return -1
+     * when x isn't in the set, but the rank function will return a
+     * non-negative number.
+     */
+    int64_t getIndex(uint64_t x) const {
+        int64_t index = 0;
+        auto roaring_destination = roarings.find(highBytes(x));
+        if (roaring_destination != roarings.cend()) {
+            for (auto roaring_iter = roarings.cbegin();
+                 roaring_iter != roaring_destination; ++roaring_iter) {
+                index += roaring_iter->second.cardinality();
+            }
+            auto low_idx = roaring_destination->second.getIndex(lowBytes(x));
+            if (low_idx < 0) return -1;
+            index += low_idx;
+            return index;
+        }
+        return -1;
+    }
+
     /**
      * Write a bitmap to a char buffer. This is meant to be compatible with
      * the Java and Go versions. Returns how many bytes were written which
diff --git a/include/roaring/containers/array.h b/include/roaring/containers/array.h
index 071b0b25f..d06a5fe83 100644
--- a/include/roaring/containers/array.h
+++ b/include/roaring/containers/array.h
@@ -410,7 +410,18 @@ inline int array_container_rank(const array_container_t *arr, uint16_t x) {
     }
 }
 
-/* Returns the index of the first value equal or smaller than x, or -1 */
+/* Returns the index of x , if not exsist return -1 */
+inline int array_container_get_index(const array_container_t *arr, uint16_t x) {
+    const int32_t idx = binarySearch(arr->array, arr->cardinality, x);
+    const bool is_present = idx >= 0;
+    if (is_present) {
+        return idx;
+    } else {
+        return -1;
+    }
+}
+
+/* Returns the index of the first value equal or larger than x, or -1 */
 inline int array_container_index_equalorlarger(const array_container_t *arr, uint16_t x) {
     const int32_t idx = binarySearch(arr->array, arr->cardinality, x);
     const bool is_present = idx >= 0;
diff --git a/include/roaring/containers/bitset.h b/include/roaring/containers/bitset.h
index 15767a23f..9b8db510d 100644
--- a/include/roaring/containers/bitset.h
+++ b/include/roaring/containers/bitset.h
@@ -481,6 +481,9 @@ uint16_t bitset_container_maximum(const bitset_container_t *container);
 /* Returns the number of values equal or smaller than x */
 int bitset_container_rank(const bitset_container_t *container, uint16_t x);
 
+/* Returns the index of x , if not exsist return -1 */
+int bitset_container_get_index(const bitset_container_t *container, uint16_t x);
+
 /* Returns the index of the first value equal or larger than x, or -1 */
 int bitset_container_index_equalorlarger(const bitset_container_t *container, uint16_t x);
 
diff --git a/include/roaring/containers/containers.h b/include/roaring/containers/containers.h
index b1391b529..d0a97a311 100644
--- a/include/roaring/containers/containers.h
+++ b/include/roaring/containers/containers.h
@@ -2329,6 +2329,26 @@ static inline int container_rank(
     return false;
 }
 
+// return the index of x, if not exsist return -1
+static inline int container_get_index(const container_t *c, uint8_t type,
+                                    uint16_t x) {
+    c = container_unwrap_shared(c, &type);
+    switch (type) {
+        case BITSET_CONTAINER_TYPE:
+            return bitset_container_get_index(const_CAST_bitset(c), x);
+        case ARRAY_CONTAINER_TYPE:
+            return array_container_get_index(const_CAST_array(c), x);
+        case RUN_CONTAINER_TYPE:
+            return run_container_get_index(const_CAST_run(c), x);
+        default:
+            assert(false);
+            roaring_unreachable;
+    }
+    assert(false);
+    roaring_unreachable;
+    return false;
+}
+
 /**
  * Add all values in range [min, max] to a given container.
  *
diff --git a/include/roaring/containers/run.h b/include/roaring/containers/run.h
index 673fde8a0..85deb5767 100644
--- a/include/roaring/containers/run.h
+++ b/include/roaring/containers/run.h
@@ -559,6 +559,9 @@ inline uint16_t run_container_maximum(const run_container_t *run) {
 /* Returns the number of values equal or smaller than x */
 int run_container_rank(const run_container_t *arr, uint16_t x);
 
+/* Returns the index of x, if not exsist return -1 */
+int run_container_get_index(const run_container_t *arr, uint16_t x);
+
 /* Returns the index of the first run containing a value at least as large as x, or -1 */
 inline int run_container_index_equalorlarger(const run_container_t *arr, uint16_t x) {
     int32_t index = interleavedBinarySearch(arr->runs, arr->n_runs, x);
diff --git a/include/roaring/roaring.h b/include/roaring/roaring.h
index 801825592..9afeddb4d 100644
--- a/include/roaring/roaring.h
+++ b/include/roaring/roaring.h
@@ -799,6 +799,15 @@ bool roaring_bitmap_select(const roaring_bitmap_t *r, uint32_t rank,
  */
 uint64_t roaring_bitmap_rank(const roaring_bitmap_t *r, uint32_t x);
 
+/**
+ * Returns the index of x in the given roaring bitmap.
+ * If the roaring bitmap doesn't contain x , this function will return -1.
+ * The difference with rank function is that this function will return -1 when x
+ * is not the element of roaring bitmap, but the rank function will return a
+ * non-negative number.
+ */
+int64_t roaring_bitmap_get_index(const roaring_bitmap_t *r, uint32_t x);
+
 /**
  * Returns the smallest value in the set, or UINT32_MAX if the set is empty.
  */
diff --git a/src/containers/array.c b/src/containers/array.c
index 5d4017a7a..4051de714 100644
--- a/src/containers/array.c
+++ b/src/containers/array.c
@@ -25,6 +25,8 @@ extern inline int array_container_index_equalorlarger(const array_container_t *a
 
 extern inline int array_container_rank(const array_container_t *arr,
                                        uint16_t x);
+extern inline int array_container_get_index(const array_container_t *arr,
+                                          uint16_t x);
 extern inline bool array_container_contains(const array_container_t *arr,
                                             uint16_t pos);
 extern inline int array_container_cardinality(const array_container_t *array);
diff --git a/src/containers/bitset.c b/src/containers/bitset.c
index 30352ee48..456d807ce 100644
--- a/src/containers/bitset.c
+++ b/src/containers/bitset.c
@@ -1212,6 +1212,25 @@ int bitset_container_rank(const bitset_container_t *container, uint16_t x) {
   return sum;
 }
 
+/* Returns the index of x , if not exsist return -1 */
+int bitset_container_get_index(const bitset_container_t *container, uint16_t x) {
+  if (bitset_container_get(container, x)) {
+    // credit: aqrit
+    int sum = 0;
+    int i = 0;
+    for (int end = x / 64; i < end; i++){
+      sum += roaring_hamming(container->words[i]);
+    }
+    uint64_t lastword = container->words[i];
+    uint64_t lastpos = UINT64_C(1) << (x % 64);
+    uint64_t mask = lastpos + lastpos - 1; // smear right
+    sum += roaring_hamming(lastword & mask);
+    return sum - 1;
+  } else {
+    return -1;
+  }
+}
+
 /* Returns the index of the first value equal or larger than x, or -1 */
 int bitset_container_index_equalorlarger(const bitset_container_t *container, uint16_t x) {
   uint32_t x32 = x;
diff --git a/src/containers/run.c b/src/containers/run.c
index ed3c6c4f8..31203a64d 100644
--- a/src/containers/run.c
+++ b/src/containers/run.c
@@ -837,6 +837,27 @@ int run_container_rank(const run_container_t *container, uint16_t x) {
     return sum;
 }
 
+int run_container_get_index(const run_container_t *container, uint16_t x) {
+    if (run_container_contains(container, x)) {
+        int sum = 0;
+        uint32_t x32 = x;
+        for (int i = 0; i < container->n_runs; i++) {
+            uint32_t startpoint = container->runs[i].value;
+            uint32_t length = container->runs[i].length;
+            uint32_t endpoint = length + startpoint;
+            if (x <= endpoint) {
+                if (x < startpoint) break;
+                return sum + (x32 - startpoint);
+            } else {
+                sum += length + 1;
+            }
+        }
+        return sum - 1;
+    } else {
+        return -1;
+    }
+}
+
 #if defined(CROARING_IS_X64) && CROARING_COMPILER_SUPPORTS_AVX512
 
 CROARING_TARGET_AVX512
diff --git a/src/roaring.c b/src/roaring.c
index 234356e0f..298be735c 100644
--- a/src/roaring.c
+++ b/src/roaring.c
@@ -2677,6 +2677,34 @@ uint64_t roaring_bitmap_rank(const roaring_bitmap_t *bm, uint32_t x) {
     return size;
 }
 
+/**
+ * roaring_bitmap_get_index returns the index of x, if not exsist return -1.
+ */
+int64_t roaring_bitmap_get_index(const roaring_bitmap_t *bm, uint32_t x) {
+    int64_t index = 0;
+    const uint16_t xhigh = x >> 16;
+    int32_t high_idx = ra_get_index(&bm->high_low_container, xhigh);
+    if (high_idx < 0) return -1;
+
+    for (int i = 0; i < bm->high_low_container.size; i++) {
+        uint32_t key = bm->high_low_container.keys[i];
+        if (xhigh > key) {
+            index +=
+                container_get_cardinality(bm->high_low_container.containers[i],
+                                          bm->high_low_container.typecodes[i]);
+        } else if (xhigh == key) {
+            int32_t low_idx = container_get_index(
+                bm->high_low_container.containers[high_idx],
+                bm->high_low_container.typecodes[high_idx], x & 0xFFFF);
+            if (low_idx < 0) return -1;
+            return index + low_idx;
+        } else {
+            return -1;
+        }
+    }
+    return index;
+}
+
 /**
 * roaring_bitmap_smallest returns the smallest value in the set.
 * Returns UINT32_MAX if the set is empty.
diff --git a/tests/toplevel_unit.c b/tests/toplevel_unit.c
index b30d720ee..e1c15a096 100644
--- a/tests/toplevel_unit.c
+++ b/tests/toplevel_unit.c
@@ -3512,6 +3512,15 @@ static uint64_t rank(uint32_t *arr, size_t length, uint32_t x) {
     return sum;
 }
 
+static int64_t get_index(uint32_t *arr, size_t length, uint32_t x) {
+    for (size_t i = 0; i < length; ++i) {
+        if (arr[i] == x) {
+            return i;
+        }
+    }
+    return -1;
+}
+
 DEFINE_TEST(test_rank) {
     for (uint32_t mymin = 123; mymin < 1000000; mymin *= 2) {
         // just arrays
@@ -3569,6 +3578,63 @@ DEFINE_TEST(test_rank) {
     }
 }
 
+DEFINE_TEST(test_get_index) {
+    for (uint32_t mymin = 123; mymin < 1000000; mymin *= 2) {
+        // just arrays
+        roaring_bitmap_t *r = roaring_bitmap_create();
+        uint32_t x = mymin;
+        for (; x < 1000 + mymin; x += 100) {
+            roaring_bitmap_add(r, x);
+        }
+        uint64_t card = roaring_bitmap_get_cardinality(r);
+        uint32_t *ans = (uint32_t *)malloc(card * sizeof(uint32_t));
+        roaring_bitmap_to_uint32_array(r, ans);
+        for (uint32_t z = 0; z < 1000 + mymin + 10; z += 10) {
+            int64_t trueidx = get_index(ans, card, z);
+            int64_t computedidx = roaring_bitmap_get_index(r, z);
+            if (trueidx != computedidx)
+                printf("%d != %d \n", (int)trueidx, (int)computedidx);
+            assert_true(trueidx == computedidx);
+        }
+        free(ans);
+        // now bitmap
+        x = mymin;
+        for (; x < 64000 + mymin; x += 2) {
+            roaring_bitmap_add(r, x);
+        }
+        card = roaring_bitmap_get_cardinality(r);
+        ans = (uint32_t *)malloc(card * sizeof(uint32_t));
+        roaring_bitmap_to_uint32_array(r, ans);
+        for (uint32_t z = 0; z < 64000 + mymin + 10; z += 10) {
+            int64_t trueidx = get_index(ans, card, z);
+            int64_t computedidx = roaring_bitmap_get_index(r, z);
+            if (trueidx != computedidx)
+                printf("%d != %d \n", (int)trueidx, (int)computedidx);
+            assert_true(trueidx == computedidx);
+        }
+        free(ans);
+        // now run
+        x = mymin;
+        for (; x < 64000 + mymin; x++) {
+            roaring_bitmap_add(r, x);
+        }
+        roaring_bitmap_run_optimize(r);
+        card = roaring_bitmap_get_cardinality(r);
+        ans = (uint32_t *)malloc(card * sizeof(uint32_t));
+        roaring_bitmap_to_uint32_array(r, ans);
+        for (uint32_t z = 0; z < 64000 + mymin + 10; z += 10) {
+            int64_t trueidx = get_index(ans, card, z);
+            int64_t computedidx = roaring_bitmap_get_index(r, z);
+            if (trueidx != computedidx)
+                printf("%d != %d \n", (int)trueidx, (int)computedidx);
+            assert_true(trueidx == computedidx);
+        }
+        free(ans);
+
+        roaring_bitmap_free(r);
+    }
+}
+
 // Return a random value which does not belong to the roaring bitmap.
 // Value will be lower than upper_bound.
 uint32_t choose_missing_value(roaring_bitmap_t *rb, uint32_t upper_bound) {
@@ -4491,6 +4557,7 @@ int main() {
         cmocka_unit_test(test_intersect_small_run_bitset),
         cmocka_unit_test(is_really_empty),
         cmocka_unit_test(test_rank),
+        cmocka_unit_test(test_get_index),
         cmocka_unit_test(test_maximum_minimum),
         cmocka_unit_test(test_stats),
         cmocka_unit_test(test_addremove),

From d2bf554494b084d7f3993d01b8c6effc0ff84b16 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Fri, 12 May 2023 15:02:26 -0400
Subject: [PATCH 122/162] Checking OS support for AVX-512 (#477)

* Adding support for AVX-512 on macOS.

* fix

* fix

* Minor fix

* Fixing headers.

* Another fix.

* Checking if the OS supports AVX-512

---------

Co-authored-by: Daniel Lemire <dlemire@lemire.me>
---
 src/isadetection.c | 63 +++++++++++++++++++++++++++++++++++-----------
 1 file changed, 48 insertions(+), 15 deletions(-)

diff --git a/src/isadetection.c b/src/isadetection.c
index 06866f4cf..4c0740e4c 100644
--- a/src/isadetection.c
+++ b/src/isadetection.c
@@ -48,7 +48,6 @@ POSSIBILITY OF SUCH DAMAGE.
 #include <stdbool.h>
 #include <stdlib.h>
 
-
 // We need portability.h to be included first, see
 // https://github.com/RoaringBitmap/CRoaring/issues/394
 #include <roaring/portability.h>
@@ -97,7 +96,7 @@ static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx,
                          uint32_t *edx) {
 #if CROARING_REGULAR_VISUAL_STUDIO
   int cpu_info[4];
-  __cpuid(cpu_info, *eax);
+  __cpuidex(cpu_info, *eax, *ecx);
   *eax = cpu_info[0];
   *ebx = cpu_info[1];
   *ecx = cpu_info[2];
@@ -115,6 +114,17 @@ static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx,
 #endif
 }
 
+
+static inline uint64_t xgetbv() {
+#if defined(_MSC_VER)
+  return _xgetbv(0);
+#else
+  uint32_t xcr0_lo, xcr0_hi;
+  __asm__("xgetbv\n\t" : "=a" (xcr0_lo), "=d" (xcr0_hi) : "c" (0));
+  return xcr0_lo | ((uint64_t)xcr0_hi << 32);
+#endif
+}
+
 /**
  * This is a relatively expensive function but it will get called at most
  * *once* per compilation units. Normally, the CRoaring library is built
@@ -133,8 +143,39 @@ static inline uint32_t dynamic_croaring_detect_supported_architectures() {
   static uint32_t cpuid_avx512vbmi2_bit = 1 << 6; ///< @private bit 6 of ECX for EAX=0x7
   static uint32_t cpuid_avx512bitalg_bit = 1 << 12; ///< @private bit 12 of ECX for EAX=0x7
   static uint32_t cpuid_avx512vpopcntdq_bit = 1 << 14; ///< @private bit 14 of ECX for EAX=0x7
+  static uint64_t cpuid_avx256_saved = 1 << 2; ///< @private bit 2 = AVX
+  static uint64_t cpuid_avx512_saved = 7 << 5; ///< @private bits 5,6,7 = opmask, ZMM_hi256, hi16_ZMM
   static uint32_t cpuid_sse42_bit = 1 << 20;    ///< @private bit 20 of ECX for EAX=0x1
+  static uint32_t cpuid_osxsave = (1 << 26) | (1 << 27); ///< @private bits 26+27 of ECX for EAX=0x1
   static uint32_t cpuid_pclmulqdq_bit = 1 << 1; ///< @private bit  1 of ECX for EAX=0x1
+
+
+  // EBX for EAX=0x1
+  eax = 0x1;
+  ecx = 0x0;
+  cpuid(&eax, &ebx, &ecx, &edx);
+
+  if (ecx & cpuid_sse42_bit) {
+    host_isa |= CROARING_SSE42;
+  } else {
+    return host_isa; // everything after is redundant
+  }
+
+  if (ecx & cpuid_pclmulqdq_bit) {
+    host_isa |= CROARING_PCLMULQDQ;
+  }
+
+  if ((ecx & cpuid_osxsave) != cpuid_osxsave) {
+    return host_isa;
+  }
+
+  // xgetbv for checking if the OS saves registers
+  uint64_t xcr0 = xgetbv();
+
+  if ((xcr0 & cpuid_avx256_saved) == 0) {
+    return host_isa;
+  }
+
   // ECX for EAX=0x7
   eax = 0x7;
   ecx = 0x0;
@@ -149,7 +190,11 @@ static inline uint32_t dynamic_croaring_detect_supported_architectures() {
   if (ebx & cpuid_bmi2_bit) {
     host_isa |= CROARING_BMI2;
   }
-  
+
+  if (!((xcr0 & cpuid_avx512_saved) == cpuid_avx512_saved)) {
+     return host_isa;
+  }
+
   if (ebx & cpuid_avx512f_bit) {
     host_isa |= CROARING_AVX512F;
   }
@@ -173,18 +218,6 @@ static inline uint32_t dynamic_croaring_detect_supported_architectures() {
   if (ecx & cpuid_avx512vpopcntdq_bit) {
     host_isa |= CROARING_AVX512VPOPCNTDQ;
   }
-  
-  // EBX for EAX=0x1
-  eax = 0x1;
-  cpuid(&eax, &ebx, &ecx, &edx);
-
-  if (ecx & cpuid_sse42_bit) {
-    host_isa |= CROARING_SSE42;
-  }
-
-  if (ecx & cpuid_pclmulqdq_bit) {
-    host_isa |= CROARING_PCLMULQDQ;
-  }
 
   return host_isa;
 }

From 264b18882fa7124f2abae959697cdd03dd6584ec Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Mon, 15 May 2023 22:37:18 -0400
Subject: [PATCH 123/162] Using atomic counters on shared containers (#473)

* Using atomic counters on shared containers

* Getting around VS limitations.

* Adding thread sanitizer CI

* Adding tests

* Removing unnecessary code

* Adding tests for data races

* Various fixes

* Patching the counter.

* fixing silly leak

* Visual Studio does not support C11 atomics.

* adding vs atomics (#474)

* Using Windows intrinsics.

* Some fixes.

* Removing silly comment.

* Use typedef/inline functions to centralize atomic ref count ops (#475)

* Use typedef/inline functions to centralize atomic ref count ops

* __STDC_NO_ATOMICS__ only matters if we're c11+

I _think_ this will fix all the msvc compile issues

* MSVC can probably use Interlocked functions even in c++

* _Interlocked ops actually take a signed argument

* Ack. uint32->uint32_t

Probably was rust brain, thinking `u32`

* Minor fixes

---------

Co-authored-by: Daniel Lemire <dlemire@lemire.me>
Co-authored-by: Zachary Dremann <dremann@gmail.com>
---
 .github/workflows/ubuntu-sani-thread-ci.yml   |  27 ++++
 CMakeLists.txt                                |   2 +
 README.md                                     |   9 ++
 include/roaring/containers/containers.h       |   3 +-
 include/roaring/portability.h                 | 140 +++++++++++++++++-
 .../performancecounters/apple_arm_events.h    |   6 +-
 src/containers/containers.c                   |  15 +-
 src/isadetection.c                            |  23 ++-
 src/roaring.c                                 |   6 +-
 tests/CMakeLists.txt                          |  18 +++
 tests/threads_unit.cpp                        |  64 ++++++++
 11 files changed, 279 insertions(+), 34 deletions(-)
 create mode 100644 .github/workflows/ubuntu-sani-thread-ci.yml
 create mode 100644 tests/threads_unit.cpp

diff --git a/.github/workflows/ubuntu-sani-thread-ci.yml b/.github/workflows/ubuntu-sani-thread-ci.yml
new file mode 100644
index 000000000..067644aea
--- /dev/null
+++ b/.github/workflows/ubuntu-sani-thread-ci.yml
@@ -0,0 +1,27 @@
+name: Ubuntu-Sanitized-CI
+
+'on':
+  - push
+  - pull_request
+
+permissions:
+  contents: read
+
+jobs:
+  ci:
+    name: ubuntu-gcc
+    runs-on: ubuntu-latest
+
+    env:
+      CC: gcc
+      CXX: g++
+
+    steps: 
+      - uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0
+      - name: Build and Test
+        run: |
+          mkdir build
+          cd build
+          cmake  -DROARING_SANITIZE_THREADS=ON ..
+          cmake --build . 
+          ctest . --output-on-failure
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 16f9cf396..e52be498c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -44,6 +44,8 @@ option(ROARING_BUILD_LTO "Build library with Link Time Optimization" OFF)
 option(ROARING_BUILD_C_AS_CPP "Build library C files using C++ compilation" OFF)
 option(ROARING_BUILD_C_TESTS_AS_CPP "Build test C files using C++ compilation" OFF)
 option(ROARING_SANITIZE "Sanitize addresses" OFF)
+option(ROARING_SANITIZE_THREADS "Sanitize threads" OFF)
+
 option(ENABLE_ROARING_TESTS "If OFF, disable unit tests altogether" ON)
 
 set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/tools/cmake")
diff --git a/README.md b/README.md
index 26cef1d6e..7db6b0335 100644
--- a/README.md
+++ b/README.md
@@ -698,6 +698,15 @@ Our AVX-512 code is only enabled on recent hardware (Intel Ice Lake or better an
 
 Like, for example, STL containers or Java's default data structures, the CRoaring library has no built-in thread support. Thus whenever you modify a bitmap in one thread, it is unsafe to query it in others. It is safe however to query bitmaps (without modifying them) from several distinct threads,  as long as you do not use the copy-on-write attribute. For example, you can safely copy a bitmap and use both copies in concurrently. One should probably avoid the use of the copy-on-write attribute in a threaded environment.
 
+Some of our users rely on "copy-on-write" (default to disabled). A bitmap with the copy-on-write flag
+set to true might generate shared containers. A shared container is just a reference to a single
+container with reference counting (we keep track of the number of shallow copies). If you copy shared
+containers over several threads, this might be unsafe due to the need to update the counter concurrently.
+Thus for shared containers, we use reference counting with an atomic counter. If the library is compiled
+as a C library (the default), we use C11 atomics. Unfortunately, Visual Studio does not support C11
+atomics at this times (though this is subject to change). To compensate, we
+use Windows-specific code in such instances (`_InterlockedDecrement` `_InterlockedIncrement`).
+
 
 # How to best aggregate bitmaps?
 
diff --git a/include/roaring/containers/containers.h b/include/roaring/containers/containers.h
index d0a97a311..3588fc49e 100644
--- a/include/roaring/containers/containers.h
+++ b/include/roaring/containers/containers.h
@@ -55,11 +55,10 @@ extern "C" { namespace roaring { namespace internal {
  * A shared container is a wrapper around a container
  * with reference counting.
  */
-
 STRUCT_CONTAINER(shared_container_s) {
     container_t *container;
     uint8_t typecode;
-    uint32_t counter;  // to be managed atomically
+    croaring_refcount_t counter;  // to be managed atomically
 };
 
 typedef struct shared_container_s shared_container_t;
diff --git a/include/roaring/portability.h b/include/roaring/portability.h
index f05206272..1dd6b04b1 100644
--- a/include/roaring/portability.h
+++ b/include/roaring/portability.h
@@ -38,6 +38,15 @@
 #define CROARING_REGULAR_VISUAL_STUDIO 1
 #endif // __clang__
 #endif // _MSC_VER
+#ifndef CROARING_VISUAL_STUDIO
+#define CROARING_VISUAL_STUDIO 0
+#endif
+#ifndef CROARING_CLANG_VISUAL_STUDIO
+#define CROARING_CLANG_VISUAL_STUDIO 0
+#endif
+#ifndef CROARING_REGULAR_VISUAL_STUDIO
+#define CROARING_REGULAR_VISUAL_STUDIO 0
+#endif
 
 #if defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE < 200809L)
 #undef _POSIX_C_SOURCE
@@ -61,11 +70,6 @@
 extern "C" {  // portability definitions are in global scope, not a namespace
 #endif
 
-#if CROARING_REGULAR_VISUAL_STUDIO && !defined(_WIN64) && !defined(CROARING_ACK_32BIT)
-#pragma message( \
-    "You appear to be attempting a 32-bit build under Visual Studio. We recommend a 64-bit build instead.")
-#endif
-
 #if defined(__SIZEOF_LONG_LONG__) && __SIZEOF_LONG_LONG__ != 8
 #error This code assumes  64-bit long longs (by use of the GCC intrinsics). Your system is not currently supported.
 #endif
@@ -366,7 +370,7 @@ static inline int roaring_hamming(uint64_t x) {
 
 #if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__)
  #define CROARING_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
- #elif defined(_WIN32)
+#elif defined(_WIN32)
  #define CROARING_IS_BIG_ENDIAN 0
  #else
  #if defined(__APPLE__) || defined(__FreeBSD__) // defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__
@@ -395,6 +399,130 @@ static inline int roaring_hamming(uint64_t x) {
  #endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 #endif
 
+// Defines for the possible CROARING atomic implementations
+#define CROARING_ATOMIC_IMPL_NONE          1
+#define CROARING_ATOMIC_IMPL_CPP           2
+#define CROARING_ATOMIC_IMPL_C             3
+#define CROARING_ATOMIC_IMPL_C_WINDOWS     4
+
+// If the use has forced a specific implementation, use that, otherwise,
+// figure out the best implementation we can use.
+#if !defined(CROARING_ATOMIC_IMPL)
+  #if defined(__cplusplus) && __cplusplus >= 201103L
+    #ifdef __has_include
+      #if __has_include(<atomic>)
+        #define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_CPP
+      #endif //__has_include(<atomic>)
+    #else
+      // We lack __has_include to check:
+      #define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_CPP
+    #endif //__has_include
+  #elif __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_ATOMICS__)
+    #define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_C
+  #elif CROARING_REGULAR_VISUAL_STUDIO
+    // https://www.technetworkhub.com/c11-atomics-in-visual-studio-2022-version-17/
+    #define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_C_WINDOWS
+  #endif
+#endif // !defined(CROARING_ATOMIC_IMPL)
+
+#if !defined(CROARING_ATOMIC_IMPL)
+  #pragma message ( "No atomic implementation found, copy on write bitmaps will not be threadsafe" )
+  #define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_NONE
+#endif
+
+#if CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_C
+#include <stdatomic.h>
+typedef _Atomic(uint32_t) croaring_refcount_t;
+
+static inline void croaring_refcount_inc(croaring_refcount_t *val) {
+    // Increasing the reference counter can always be done with
+    // memory_order_relaxed: New references to an object can only be formed from
+    // an existing reference, and passing an existing reference from one thread to
+    // another must already provide any required synchronization.
+    atomic_fetch_add_explicit(val, 1, memory_order_relaxed);
+}
+
+static inline bool croaring_refcount_dec(croaring_refcount_t *val) {
+    // It is important to enforce any possible access to the object in one thread
+    // (through an existing reference) to happen before deleting the object in a
+    // different thread. This is achieved by a "release" operation after dropping
+    // a reference (any access to the object through this reference must obviously
+    // happened before), and an "acquire" operation before deleting the object.
+    bool is_zero = atomic_fetch_sub_explicit(val, 1, memory_order_release) == 1;
+    if (is_zero) {
+        atomic_thread_fence(memory_order_acquire);
+    }
+    return is_zero;
+}
+
+static inline uint32_t croaring_refcount_get(croaring_refcount_t *val) {
+    return atomic_load_explicit(val, memory_order_relaxed);
+}
+#elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_CPP
+#include <atomic>
+typedef std::atomic<uint32_t> croaring_refcount_t;
+
+static inline void croaring_refcount_inc(croaring_refcount_t *val) {
+    val->fetch_add(1, std::memory_order_relaxed);
+}
+
+static inline bool croaring_refcount_dec(croaring_refcount_t *val) {
+    // See above comments on the c11 atomic implementation for memory ordering
+    bool is_zero = val->fetch_sub(1, std::memory_order_release) == 1;
+    if (is_zero) {
+        std::atomic_thread_fence(std::memory_order_acquire);
+    }
+    return is_zero;
+}
+
+static inline uint32_t croaring_refcount_get(croaring_refcount_t *val) {
+    return val->load(std::memory_order_relaxed);
+}
+#elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_C_WINDOWS
+#include <intrin.h>
+#pragma intrinsic(_InterlockedIncrement)
+#pragma intrinsic(_InterlockedDecrement)
+
+// _InterlockedIncrement and _InterlockedDecrement take a (signed) long, and
+// overflow is defined to wrap, so we can pretend it is a uint32_t for our case
+typedef volatile long croaring_refcount_t;
+
+static inline void croaring_refcount_inc(croaring_refcount_t *val) {
+    _InterlockedIncrement(val);
+}
+
+static inline bool croaring_refcount_dec(croaring_refcount_t *val) {
+    return _InterlockedDecrement(val) == 0;
+}
+
+static inline uint32_t croaring_refcount_get(croaring_refcount_t *val) {
+    // Per https://learn.microsoft.com/en-us/windows/win32/sync/interlocked-variable-access
+    // > Simple reads and writes to properly-aligned 32-bit variables are atomic
+    // > operations. In other words, you will not end up with only one portion
+    // > of the variable updated; all bits are updated in an atomic fashion.
+    return *val;
+}
+#elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_NONE
+typedef uint32_t croaring_refcount_t;
+
+static inline void croaring_refcount_inc(croaring_refcount_t *val) {
+    *val += 1;
+}
+
+static inline bool croaring_refcount_dec(croaring_refcount_t *val) {
+    assert(*val > 0);
+    *val -= 1;
+    return val == 0;
+}
+
+static inline uint32_t croaring_refcount_get(croaring_refcount_t *val) {
+    return *val;
+}
+#else
+#error "Unknown atomic implementation"
+#endif
+
+
 // We need portability.h to be included first,
 // but we also always want isadetection.h to be
 // included (right after).
diff --git a/microbenchmarks/performancecounters/apple_arm_events.h b/microbenchmarks/performancecounters/apple_arm_events.h
index 5ce147ee2..c9eeb8aea 100644
--- a/microbenchmarks/performancecounters/apple_arm_events.h
+++ b/microbenchmarks/performancecounters/apple_arm_events.h
@@ -874,11 +874,11 @@ struct AppleEvents {
   u64 counters_1[KPC_MAX_COUNTERS] = {0};
   static constexpr usize ev_count =
       sizeof(profile_events) / sizeof(profile_events[0]);
+  bool init = false;
+  bool worked = false;
 
-  inline bool setup_performance_counters() {
-    static bool init = false;
-    static bool worked = false;
 
+  inline bool setup_performance_counters() {
     if (init) {
       return worked;
     }
diff --git a/src/containers/containers.c b/src/containers/containers.c
index c2fd32942..9bbd758c8 100644
--- a/src/containers/containers.c
+++ b/src/containers/containers.c
@@ -137,7 +137,7 @@ container_t *get_copy_of_container(
         shared_container_t *shared_container;
         if (*typecode == SHARED_CONTAINER_TYPE) {
             shared_container = CAST_shared(c);
-            shared_container->counter += 1;
+            croaring_refcount_inc(&shared_container->counter);
             return shared_container;
         }
         assert(*typecode != SHARED_CONTAINER_TYPE);
@@ -149,7 +149,10 @@ container_t *get_copy_of_container(
 
         shared_container->container = c;
         shared_container->typecode = *typecode;
-
+        // At this point, we are creating new shared container
+        // so there should be no other references, and setting
+        // the counter to 2 - even non-atomically - is safe as
+        // long as the value is set before the return statement.
         shared_container->counter = 2;
         *typecode = SHARED_CONTAINER_TYPE;
 
@@ -188,12 +191,10 @@ container_t *container_clone(const container_t *c, uint8_t typecode) {
 container_t *shared_container_extract_copy(
     shared_container_t *sc, uint8_t *typecode
 ){
-    assert(sc->counter > 0);
     assert(sc->typecode != SHARED_CONTAINER_TYPE);
-    sc->counter--;
     *typecode = sc->typecode;
     container_t *answer;
-    if (sc->counter == 0) {
+    if (croaring_refcount_dec(&sc->counter)) {
         answer = sc->container;
         sc->container = NULL;  // paranoid
         roaring_free(sc);
@@ -205,9 +206,7 @@ container_t *shared_container_extract_copy(
 }
 
 void shared_container_free(shared_container_t *container) {
-    assert(container->counter > 0);
-    container->counter--;
-    if (container->counter == 0) {
+    if (croaring_refcount_dec(&container->counter)) {
         assert(container->typecode != SHARED_CONTAINER_TYPE);
         container_free(container->container, container->typecode);
         container->container = NULL;  // paranoid
diff --git a/src/isadetection.c b/src/isadetection.c
index 4c0740e4c..cd56b6983 100644
--- a/src/isadetection.c
+++ b/src/isadetection.c
@@ -227,33 +227,32 @@ static inline uint32_t dynamic_croaring_detect_supported_architectures() {
 
 #if defined(__x86_64__) || defined(_M_AMD64) // x64
 
-#if defined(__cplusplus)
+#if CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_CPP
 static inline uint32_t croaring_detect_supported_architectures() {
     // thread-safe as per the C++11 standard.
     static uint32_t buffer = dynamic_croaring_detect_supported_architectures();
     return buffer;
 }
-#elif CROARING_VISUAL_STUDIO
-// Visual Studio does not support C11 atomics.
-static inline uint32_t croaring_detect_supported_architectures() {
-    static int buffer = CROARING_UNINITIALIZED;
+#elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_C
+static uint32_t croaring_detect_supported_architectures() {
+    // we use an atomic for thread safety
+    static _Atomic uint32_t buffer = CROARING_UNINITIALIZED;
     if (buffer == CROARING_UNINITIALIZED) {
+      // atomicity is sufficient
       buffer = dynamic_croaring_detect_supported_architectures();
     }
     return buffer;
 }
-#else // CROARING_VISUAL_STUDIO
-#include <stdatomic.h>
-uint32_t croaring_detect_supported_architectures() {
-    // we use an atomic for thread safety
-    static _Atomic uint32_t buffer = CROARING_UNINITIALIZED;
+#else
+// If we do not have atomics, we do the best we can.
+static inline uint32_t croaring_detect_supported_architectures() {
+    static uint32_t buffer = CROARING_UNINITIALIZED;
     if (buffer == CROARING_UNINITIALIZED) {
-      // atomicity is sufficient
       buffer = dynamic_croaring_detect_supported_architectures();
     }
     return buffer;
 }
-#endif // CROARING_REGULAR_VISUAL_STUDIO
+#endif // CROARING_C_ATOMIC
 
 #ifdef ROARING_DISABLE_AVX
 
diff --git a/src/roaring.c b/src/roaring.c
index 298be735c..4bdb94fa7 100644
--- a/src/roaring.c
+++ b/src/roaring.c
@@ -348,9 +348,9 @@ void roaring_bitmap_printf_describe(const roaring_bitmap_t *r) {
                get_full_container_name(ra->containers[i], ra->typecodes[i]),
                container_get_cardinality(ra->containers[i], ra->typecodes[i]));
         if (ra->typecodes[i] == SHARED_CONTAINER_TYPE) {
-            printf(
-                "(shared count = %" PRIu32 " )",
-                    CAST_shared(ra->containers[i])->counter);
+            printf("(shared count = %" PRIu32 " )",
+                   croaring_refcount_get(
+                       &(CAST_shared(ra->containers[i])->counter)));
         }
 
         if (i + 1 < ra->size) {
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 28177c01a..d7fd398ba 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -20,6 +20,24 @@ add_c_test(format_portability_unit)
 add_c_test(robust_deserialization_unit)
 add_c_test(container_comparison_unit)
 add_c_test(add_offset)
+find_package(Threads)
+if(Threads_FOUND)
+  message(STATUS "Your system supports threads.")
+  add_executable(threads_unit threads_unit.cpp)
+  target_link_libraries(threads_unit PRIVATE roaring Threads::Threads)
+  if(ROARING_SANITIZE_THREADS)
+    # libtsan might be needed
+    if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+      message(STATUS "Under Linux, you may need to install libtsan." )
+    endif()
+    target_compile_options(threads_unit PRIVATE -fsanitize=thread -fno-sanitize-recover=all)
+    target_link_options(threads_unit PRIVATE -fsanitize=thread -fno-sanitize-recover=all)
+    message(STATUS "Sanitizing threads.")
+  endif()
+  add_test(threads_unit threads_unit)
+else(Threads_FOUND)
+  message(STATUS "Your system does not support threads.")
+endif(Threads_FOUND)
 
 if (NOT WIN32)
 # We exclude POSIX tests from Microsoft Windows
diff --git a/tests/threads_unit.cpp b/tests/threads_unit.cpp
new file mode 100644
index 000000000..2d913889c
--- /dev/null
+++ b/tests/threads_unit.cpp
@@ -0,0 +1,64 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <thread>
+#include <roaring/roaring.h>
+#include <roaring/misc/configreport.h>
+
+// We are mostly running this test to check for data races suing thread sanitizer.
+void run(roaring_bitmap_t **rarray) {
+    for(size_t i = 0; i < 100; i++) {
+      roaring_bitmap_t *r1 = roaring_bitmap_copy(rarray[0]);
+      roaring_bitmap_t *r2 = roaring_bitmap_copy(rarray[1]);
+      roaring_bitmap_t *r3 = roaring_bitmap_copy(rarray[2]);
+      roaring_bitmap_and_inplace(r1, r2);
+      roaring_bitmap_andnot_inplace(r1, r3);
+      roaring_bitmap_free(r1);
+      roaring_bitmap_free(r2);
+      roaring_bitmap_free(r3);
+    }
+}
+
+bool run_threads_unit_tests() {
+    roaring_bitmap_t *r1 = roaring_bitmap_create();
+
+    for (uint32_t i = 0; i < 50000; i++) {
+        if (i != 300) {
+            roaring_bitmap_add(r1, 65536 + i);
+        }
+    }
+    for (uint32_t i = 50000; i < 150000; i++) {
+        if ((i%500) == 0) {
+            roaring_bitmap_add(r1, i);
+        }
+    }
+    for (uint32_t i = 150000; i < 200000; i++) {
+        if ((i%2) == 0) {
+            roaring_bitmap_add(r1, i);
+        }
+    }
+    
+    roaring_bitmap_set_copy_on_write(r1, true);
+    roaring_bitmap_run_optimize(r1);
+    roaring_bitmap_t *r2 = roaring_bitmap_of(5, 10010,10020,10030,10040,10050);
+    roaring_bitmap_set_copy_on_write(r2, true);
+    roaring_bitmap_t *r3 = roaring_bitmap_copy(r1);
+    roaring_bitmap_set_copy_on_write(r3, true);
+
+    roaring_bitmap_t* rarray1[3] = {r1, r2, r3};
+    roaring_bitmap_t* rarray2[3] = {r1, r2, r3};
+    std::thread thread1(run,rarray1);
+    std::thread thread2(run,rarray2);
+    thread1.join();
+    thread2.join();
+    roaring_bitmap_free(r1);
+    roaring_bitmap_free(r2);
+    roaring_bitmap_free(r3);
+    return true;
+}
+
+int main() {
+    roaring::misc::tellmeall();
+    bool is_ok = run_threads_unit_tests();
+    if(is_ok) { printf("code run completed.\n"); }
+    return is_ok ? EXIT_SUCCESS : EXIT_FAILURE;
+}

From 5d6dd2342d9e3ffaf481aa5ebe344e19984faa4a Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Mon, 15 May 2023 22:38:46 -0400
Subject: [PATCH 124/162] Version bump.

---
 CMakeLists.txt                    | 8 ++++----
 doxygen                           | 2 +-
 include/roaring/roaring_version.h | 6 +++---
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e52be498c..57f4833aa 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -17,10 +17,10 @@ if(CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_C_COMPILER_VERSION VERSIO
 endif()
 set(ROARING_LIB_NAME roaring)
 set(PROJECT_VERSION_MAJOR 1)
-set(PROJECT_VERSION_MINOR 1)
-set(PROJECT_VERSION_PATCH 2)
-set(ROARING_LIB_VERSION "1.1.2" CACHE STRING "Roaring library version")
-set(ROARING_LIB_SOVERSION "9" CACHE STRING "Roaring library soversion")
+set(PROJECT_VERSION_MINOR 2)
+set(PROJECT_VERSION_PATCH 0)
+set(ROARING_LIB_VERSION "1.2.0" CACHE STRING "Roaring library version")
+set(ROARING_LIB_SOVERSION "10" CACHE STRING "Roaring library soversion")
 
 option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON)
 if(NOT ROARING_EXCEPTIONS)
diff --git a/doxygen b/doxygen
index bdd5f7909..544adf94c 100644
--- a/doxygen
+++ b/doxygen
@@ -48,7 +48,7 @@ PROJECT_NAME           = "CRoaring"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "1.1.2"
+PROJECT_NUMBER         = "1.2.0"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/include/roaring/roaring_version.h b/include/roaring/roaring_version.h
index 87fae5974..3b52edb5c 100644
--- a/include/roaring/roaring_version.h
+++ b/include/roaring/roaring_version.h
@@ -1,10 +1,10 @@
 // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand 
 #ifndef ROARING_INCLUDE_ROARING_VERSION 
 #define ROARING_INCLUDE_ROARING_VERSION 
-#define ROARING_VERSION "1.1.2"
+#define ROARING_VERSION "1.2.0"
 enum { 
     ROARING_VERSION_MAJOR = 1,
-    ROARING_VERSION_MINOR = 1,
-    ROARING_VERSION_REVISION = 2
+    ROARING_VERSION_MINOR = 2,
+    ROARING_VERSION_REVISION = 0
 }; 
 #endif // ROARING_INCLUDE_ROARING_VERSION 

From 96d550d0e2e8d725f691674e181ddff5ac818ab6 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Wed, 17 May 2023 11:21:49 -0400
Subject: [PATCH 125/162] Additional tests related to issue 476 (#479)

Co-authored-by: Daniel Lemire <dlemire@lemire.me>
---
 tests/array_container_unit.c | 183 +++++++++++++++++++++++++++++++++++
 1 file changed, 183 insertions(+)

diff --git a/tests/array_container_unit.c b/tests/array_container_unit.c
index 26f1b57a0..09bb6d03a 100644
--- a/tests/array_container_unit.c
+++ b/tests/array_container_unit.c
@@ -8,6 +8,8 @@
 #include <stdlib.h>
 
 #include <roaring/containers/array.h>
+#include <roaring/containers/mixed_equal.h>
+#include <roaring/containers/bitset.h>
 #include <roaring/misc/configreport.h>
 
 #ifdef __cplusplus  // stronger type checking errors if C built in C++ mode
@@ -192,8 +194,189 @@ DEFINE_TEST(capacity_test) {
     array_container_free(array);
 }
 
+
+/* This is a fixed-increment version of Java 8's SplittableRandom generator
+   See http://dx.doi.org/10.1145/2714064.2660195 and
+   http://docs.oracle.com/javase/8/docs/api/java/util/SplittableRandom.html */
+
+// state for splitmix64
+uint64_t splitmix64_x; /* The state can be seeded with any value. */
+
+// call this one before calling splitmix64
+static inline void splitmix64_seed(uint64_t seed) { splitmix64_x = seed; }
+
+// floor( ( (1+sqrt(5))/2 ) * 2**64 MOD 2**64)
+#define GOLDEN_GAMMA UINT64_C(0x9E3779B97F4A7C15)
+
+// returns random number, modifies seed[0]
+// compared with D. Lemire against
+// http://grepcode.com/file/repository.grepcode.com/java/root/jdk/openjdk/8-b132/java/util/SplittableRandom.java#SplittableRandom.0gamma
+static inline uint64_t splitmix64_r(uint64_t *seed) {
+  uint64_t z = (*seed += GOLDEN_GAMMA);
+  // David Stafford's Mix13 for MurmurHash3's 64-bit finalizer
+  z = (z ^ (z >> 30)) * UINT64_C(0xBF58476D1CE4E5B9);
+  z = (z ^ (z >> 27)) * UINT64_C(0x94D049BB133111EB);
+  return z ^ (z >> 31);
+}
+
+static inline uint64_t splitmix64() {
+    return splitmix64_r(&splitmix64_x);
+}
+
+size_t populate(uint16_t* buffer, size_t maxsize) {
+    size_t length = splitmix64() % maxsize;
+    for(size_t i = 0; i < length; i++) {
+        buffer[i] = (uint16_t)splitmix64();
+    }
+    return length;
+}
+
+DEFINE_TEST(mini_fuzz_array_container_intersection_inplace) {
+    splitmix64_seed(12345);
+    uint16_t* buffer1 = (uint16_t*) malloc(DEFAULT_MAX_SIZE * sizeof(uint16_t));
+    uint16_t* buffer2 = (uint16_t*) malloc(DEFAULT_MAX_SIZE * sizeof(uint16_t));
+    uint16_t* buffer3 = (uint16_t*) malloc(DEFAULT_MAX_SIZE * sizeof(uint16_t));
+    for(size_t z = 0; z < 3000; z++) {
+        array_container_t* array1 = array_container_create();
+        array_container_t* array2 = array_container_create();
+        array_container_t* array3 = array_container_create();
+
+        bitset_container_t* bitset1 = bitset_container_create();
+        bitset_container_t* bitset2 = bitset_container_create();
+        bitset_container_t* bitset3 = bitset_container_create();
+        size_t l1 = populate(buffer1, DEFAULT_MAX_SIZE);
+        size_t l2 = populate(buffer2, DEFAULT_MAX_SIZE);
+        size_t l3 = populate(buffer3, DEFAULT_MAX_SIZE);
+
+        for (uint32_t i = 0; i < l1; i++) {
+            array_container_add(array1, buffer1[i]);
+            bitset_container_set(bitset1, buffer1[i]);
+        }
+        for (uint32_t i = 0; i < l2; i++) {
+            array_container_add(array2, buffer2[i]);
+            bitset_container_set(bitset2, buffer2[i]);
+        }
+        for (uint32_t i = 0; i < l3; i++) {
+            array_container_add(array3, buffer3[i]);
+            bitset_container_set(bitset3, buffer3[i]);
+
+        }
+        bitset1->cardinality = BITSET_UNKNOWN_CARDINALITY;
+
+        array_container_intersection_inplace(array1, array2);
+        bitset_container_and_nocard(bitset1, bitset2, bitset1);
+        assert_true(array_container_equal_bitset(array1, bitset1));
+
+        array_container_intersection_inplace(array1, array3);
+        bitset_container_and_nocard(bitset1, bitset3, bitset1);
+        assert_true(array_container_equal_bitset(array1, bitset1));
+
+        for (uint32_t i = 0; i < l1; i++) {
+            array_container_add(array1, buffer1[i]);
+            bitset_container_set(bitset1, buffer1[i]);
+        }
+        bitset1->cardinality = BITSET_UNKNOWN_CARDINALITY;
+        assert_true(array_container_equal_bitset(array1, bitset1));
+
+        array_container_intersection_inplace(array1, array2);
+        bitset_container_and_nocard(bitset1, bitset2, bitset1);
+        assert_true(array_container_equal_bitset(array1, bitset1));
+
+        array_container_intersection_inplace(array1, array3);
+        bitset_container_and_nocard(bitset1, bitset3, bitset1);
+        assert_true(array_container_equal_bitset(array1, bitset1));
+        array_container_free(array1);
+        array_container_free(array2);
+        array_container_free(array3);
+        bitset_container_free(bitset1);
+        bitset_container_free(bitset2);
+        bitset_container_free(bitset3);
+    }
+    free(buffer1);
+    free(buffer2);
+    free(buffer3);
+}
+
+
+
+DEFINE_TEST(mini_fuzz_recycle_array_container_intersection_inplace) {
+    splitmix64_seed(12345);
+    uint16_t* buffer1 = (uint16_t*) malloc(DEFAULT_MAX_SIZE * sizeof(uint16_t));
+    uint16_t* buffer2 = (uint16_t*) malloc(DEFAULT_MAX_SIZE * sizeof(uint16_t));
+    uint16_t* buffer3 = (uint16_t*) malloc(DEFAULT_MAX_SIZE * sizeof(uint16_t));
+    array_container_t* array1 = array_container_create();
+    array_container_t* array2 = array_container_create();
+    array_container_t* array3 = array_container_create();
+
+    bitset_container_t* bitset1 = bitset_container_create();
+    bitset_container_t* bitset2 = bitset_container_create();
+    bitset_container_t* bitset3 = bitset_container_create();
+    for(size_t z = 0; z < 3000; z++) {
+        bitset_container_clear(bitset1);
+        bitset_container_clear(bitset2);
+        bitset_container_clear(bitset3);
+        array1->cardinality = 0;
+        array2->cardinality = 0;
+        array3->cardinality = 0;
+        size_t l1 = populate(buffer1, DEFAULT_MAX_SIZE);
+        size_t l2 = populate(buffer2, DEFAULT_MAX_SIZE);
+        size_t l3 = populate(buffer3, DEFAULT_MAX_SIZE);
+
+        for (uint32_t i = 0; i < l1; i++) {
+            array_container_add(array1, buffer1[i]);
+            bitset_container_set(bitset1, buffer1[i]);
+        }
+        for (uint32_t i = 0; i < l2; i++) {
+            array_container_add(array2, buffer2[i]);
+            bitset_container_set(bitset2, buffer2[i]);
+        }
+        for (uint32_t i = 0; i < l3; i++) {
+            array_container_add(array3, buffer3[i]);
+            bitset_container_set(bitset3, buffer3[i]);
+
+        }
+        bitset1->cardinality = BITSET_UNKNOWN_CARDINALITY;
+
+        array_container_intersection_inplace(array1, array2);
+        bitset_container_and_nocard(bitset1, bitset2, bitset1);
+        assert_true(array_container_equal_bitset(array1, bitset1));
+
+        array_container_intersection_inplace(array1, array3);
+        bitset_container_and_nocard(bitset1, bitset3, bitset1);
+        assert_true(array_container_equal_bitset(array1, bitset1));
+
+        for (uint32_t i = 0; i < l1; i++) {
+            array_container_add(array1, buffer1[i]);
+            bitset_container_set(bitset1, buffer1[i]);
+        }
+        bitset1->cardinality = BITSET_UNKNOWN_CARDINALITY;
+        assert_true(array_container_equal_bitset(array1, bitset1));
+
+        array_container_intersection_inplace(array1, array2);
+        bitset_container_and_nocard(bitset1, bitset2, bitset1);
+        assert_true(array_container_equal_bitset(array1, bitset1));
+
+        array_container_intersection_inplace(array1, array3);
+        bitset_container_and_nocard(bitset1, bitset3, bitset1);
+        assert_true(array_container_equal_bitset(array1, bitset1));
+
+    }
+    array_container_free(array1);
+    array_container_free(array2);
+    array_container_free(array3);
+    bitset_container_free(bitset1);
+    bitset_container_free(bitset2);
+    bitset_container_free(bitset3);
+
+    free(buffer1);
+    free(buffer2);
+    free(buffer3);
+}
+
 int main() {
     const struct CMUnitTest tests[] = {
+        cmocka_unit_test(mini_fuzz_array_container_intersection_inplace),
+        cmocka_unit_test(mini_fuzz_recycle_array_container_intersection_inplace),
         cmocka_unit_test(printf_test), cmocka_unit_test(add_contains_test),
         cmocka_unit_test(and_or_test), cmocka_unit_test(to_uint32_array_test),
         cmocka_unit_test(select_test),

From 070f5ac7dc5f6c134f6b814a50676247c96aa14f Mon Sep 17 00:00:00 2001
From: Chen Tianjie <chentianjie.ctj@alibaba-inc.com>
Date: Wed, 17 May 2023 23:31:31 +0800
Subject: [PATCH 126/162] Correct macro ROARING_DISABLE_X64. (#478)

---
 include/roaring/portability.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/roaring/portability.h b/include/roaring/portability.h
index 1dd6b04b1..cfaaf143f 100644
--- a/include/roaring/portability.h
+++ b/include/roaring/portability.h
@@ -97,7 +97,7 @@ extern "C" {  // portability definitions are in global scope, not a namespace
 #undef CROARING_IS_X64
 #endif
 
-#ifdef CROARING_DISABLE_X64
+#ifdef ROARING_DISABLE_X64
 #undef CROARING_IS_X64
 #endif
 // we include the intrinsic header

From 6db7fbc7a3cc888f0a9657e31e536d9558d55057 Mon Sep 17 00:00:00 2001
From: Taylor Blau <ttaylorr@github.com>
Date: Mon, 22 May 2023 21:38:42 -0500
Subject: [PATCH 127/162] Fix a handful of compilation issues with older C
 standards (#480)

* portability.h: include missing `<assert.h>`

The assertion added via 264b188 (Using atomic counters on shared
containers (#473), 2023-05-15) does not explicitly include <assert.h>
causing GCC to fail compilation when invoked with
`-Werror=implicit-function-declaration`.

In C++ this is OK, since there `assert()` refers to its macro
definition. But in C the only way to get `assert()` is by including
<assert.h>.

Signed-off-by: Taylor Blau <me@ttaylorr.com>

* src: avoid old-style function declarations

A handful of functions with argument arity zero fail when the compiler
is invoked with `-Werror=old-style-definition`. Work around these by
explicitly declaring the parameter list as `void` to clarify that these
functions expect zero arguments.

Signed-off-by: Taylor Blau <me@ttaylorr.com>

* roaring_array.h: declare `ra_get_container()`

This function dates all the way back to 3d12719 (some more untested
code, plus some files that had not been tracked hitherto, 2016-01-19),
but never had a prototype.

This causes compilers building with `-Werror=missing-prototypes` to fail
compilation. Declare a prototype for that function in the corresponding
header accordingly.

Signed-off-by: Taylor Blau <me@ttaylorr.com>

* containers/bitset.h: declare `bitset_container_union_nocard()`

In a similar spirit as the previous commit, declare a function prototype
for `bitset_container_union_nocard()` which rounds out the existing set
of function prototypes for the macro expansion of:

    BITSET_CONTAINER_FN(union, |, _mm256_or_si256, vorrq_u64)

Signed-off-by: Taylor Blau <me@ttaylorr.com>

* containers/bitset.h: declare `bitset_container_intersection_nocard()`

In a similar spirit as the previous commits, declare a function
prototype for `bitset_container_intersection_nocard()` which rounds out
the existing set of function prototypes for the macro expansion of:

    BITSET_CONTAINER_FN(intersection, &, _mm256_and_si256, vandq_u64)

Signed-off-by: Taylor Blau <me@ttaylorr.com>

---------

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 include/roaring/containers/bitset.h | 12 ++++++++++++
 include/roaring/isadetection.h      |  2 +-
 include/roaring/portability.h       |  1 +
 include/roaring/roaring_array.h     |  2 ++
 src/bitset.c                        |  2 +-
 src/containers/array.c              |  2 +-
 src/isadetection.c                  | 18 +++++++++---------
 7 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/include/roaring/containers/bitset.h b/include/roaring/containers/bitset.h
index 9b8db510d..f71b7a960 100644
--- a/include/roaring/containers/bitset.h
+++ b/include/roaring/containers/bitset.h
@@ -304,6 +304,12 @@ int bitset_container_union(const bitset_container_t *src_1,
 int bitset_container_union_justcard(const bitset_container_t *src_1,
                                     const bitset_container_t *src_2);
 
+/* Computes the union of bitsets `src_1' and `src_2' into `dst', but does
+ * not update the cardinality. Provided to optimize chained operations. */
+int bitset_container_union_nocard(const bitset_container_t *src_1,
+				  const bitset_container_t *src_2,
+				  bitset_container_t *dst);
+
 /* Computes the union of bitsets `src_1' and `src_2' into `dst', but does not
  * update the cardinality. Provided to optimize chained operations. */
 int bitset_container_or_nocard(const bitset_container_t *src_1,
@@ -332,6 +338,12 @@ int bitset_container_intersection(const bitset_container_t *src_1,
 int bitset_container_intersection_justcard(const bitset_container_t *src_1,
                                            const bitset_container_t *src_2);
 
+/* Computes the intersection of bitsets `src_1' and `src_2' into `dst', but does
+ * not update the cardinality. Provided to optimize chained operations. */
+int bitset_container_intersection_nocard(const bitset_container_t *src_1,
+					 const bitset_container_t *src_2,
+					 bitset_container_t *dst);
+
 /* Computes the intersection of bitsets `src_1' and `src_2' into `dst', but does
  * not update the cardinality. Provided to optimize chained operations. */
 int bitset_container_and_nocard(const bitset_container_t *src_1,
diff --git a/include/roaring/isadetection.h b/include/roaring/isadetection.h
index 0e0ef0750..446b32dae 100644
--- a/include/roaring/isadetection.h
+++ b/include/roaring/isadetection.h
@@ -34,7 +34,7 @@ enum {
   ROARING_SUPPORTS_AVX2 = 1,
   ROARING_SUPPORTS_AVX512 = 2,
 };
-int croaring_hardware_support();
+int croaring_hardware_support(void);
 #ifdef __cplusplus
 } } }  // extern "C" { namespace roaring { namespace internal {
 #endif
diff --git a/include/roaring/portability.h b/include/roaring/portability.h
index cfaaf143f..b43183a73 100644
--- a/include/roaring/portability.h
+++ b/include/roaring/portability.h
@@ -503,6 +503,7 @@ static inline uint32_t croaring_refcount_get(croaring_refcount_t *val) {
     return *val;
 }
 #elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_NONE
+#include <assert.h>
 typedef uint32_t croaring_refcount_t;
 
 static inline void croaring_refcount_inc(croaring_refcount_t *val) {
diff --git a/include/roaring/roaring_array.h b/include/roaring/roaring_array.h
index 24ce7cad2..ac4941bcd 100644
--- a/include/roaring/roaring_array.h
+++ b/include/roaring/roaring_array.h
@@ -168,6 +168,8 @@ inline void ra_set_container_at_index(
     ra->typecodes[i] = typecode;
 }
 
+container_t *ra_get_container(roaring_array_t *ra, uint16_t x, uint8_t *typecode);
+
 /**
  * If needed, increase the capacity of the array so that it can fit k values
  * (at
diff --git a/src/bitset.c b/src/bitset.c
index c493c0b14..4e39c16ca 100644
--- a/src/bitset.c
+++ b/src/bitset.c
@@ -13,7 +13,7 @@ extern "C" { namespace roaring { namespace internal {
 #endif
 
 /* Create a new bitset. Return NULL in case of failure. */
-bitset_t *bitset_create() {
+bitset_t *bitset_create(void) {
     bitset_t *bitset = NULL;
     /* Allocate the bitset itself. */
     if ((bitset = (bitset_t *)roaring_malloc(sizeof(bitset_t))) == NULL) {
diff --git a/src/containers/array.c b/src/containers/array.c
index 4051de714..6c0ffd2c7 100644
--- a/src/containers/array.c
+++ b/src/containers/array.c
@@ -59,7 +59,7 @@ array_container_t *array_container_create_given_capacity(int32_t size) {
 }
 
 /* Create a new array. Return NULL in case of failure. */
-array_container_t *array_container_create() {
+array_container_t *array_container_create(void) {
     return array_container_create_given_capacity(ARRAY_DEFAULT_INIT_SIZE);
 }
 
diff --git a/src/isadetection.c b/src/isadetection.c
index cd56b6983..ce4b55326 100644
--- a/src/isadetection.c
+++ b/src/isadetection.c
@@ -115,7 +115,7 @@ static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx,
 }
 
 
-static inline uint64_t xgetbv() {
+static inline uint64_t xgetbv(void) {
 #if defined(_MSC_VER)
   return _xgetbv(0);
 #else
@@ -130,7 +130,7 @@ static inline uint64_t xgetbv() {
  * *once* per compilation units. Normally, the CRoaring library is built
  * as one compilation unit.
  */
-static inline uint32_t dynamic_croaring_detect_supported_architectures() {
+static inline uint32_t dynamic_croaring_detect_supported_architectures(void) {
   uint32_t eax, ebx, ecx, edx;
   uint32_t host_isa = 0x0;
   // Can be found on Intel ISA Reference for CPUID
@@ -228,13 +228,13 @@ static inline uint32_t dynamic_croaring_detect_supported_architectures() {
 #if defined(__x86_64__) || defined(_M_AMD64) // x64
 
 #if CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_CPP
-static inline uint32_t croaring_detect_supported_architectures() {
+static inline uint32_t croaring_detect_supported_architectures(void) {
     // thread-safe as per the C++11 standard.
     static uint32_t buffer = dynamic_croaring_detect_supported_architectures();
     return buffer;
 }
 #elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_C
-static uint32_t croaring_detect_supported_architectures() {
+static uint32_t croaring_detect_supported_architectures(void) {
     // we use an atomic for thread safety
     static _Atomic uint32_t buffer = CROARING_UNINITIALIZED;
     if (buffer == CROARING_UNINITIALIZED) {
@@ -245,7 +245,7 @@ static uint32_t croaring_detect_supported_architectures() {
 }
 #else
 // If we do not have atomics, we do the best we can.
-static inline uint32_t croaring_detect_supported_architectures() {
+static inline uint32_t croaring_detect_supported_architectures(void) {
     static uint32_t buffer = CROARING_UNINITIALIZED;
     if (buffer == CROARING_UNINITIALIZED) {
       buffer = dynamic_croaring_detect_supported_architectures();
@@ -256,17 +256,17 @@ static inline uint32_t croaring_detect_supported_architectures() {
 
 #ifdef ROARING_DISABLE_AVX
 
-int croaring_hardware_support() {
+int croaring_hardware_support(void) {
     return 0;
 }
 
 #elif defined(__AVX512F__) && defined(__AVX512DQ__) && defined(__AVX512BW__) && defined(__AVX512VBMI2__) && defined(__AVX512BITALG__) && defined(__AVX512VPOPCNTDQ__)
-int croaring_hardware_support() {
+int croaring_hardware_support(void) {
     return  ROARING_SUPPORTS_AVX2 | ROARING_SUPPORTS_AVX512;
 }
 #elif defined(__AVX2__)
 
-int croaring_hardware_support() {
+int croaring_hardware_support(void) {
   static int support = 0xFFFFFFF;
   if(support == 0xFFFFFFF) {
     bool avx512_support = false;
@@ -280,7 +280,7 @@ int croaring_hardware_support() {
 }
 #else
 
-int croaring_hardware_support() {
+int croaring_hardware_support(void) {
   static int support = 0xFFFFFFF;
   if(support == 0xFFFFFFF) {
     bool has_avx2 = (croaring_detect_supported_architectures() & CROARING_AVX2) == CROARING_AVX2;

From c6ebe141536283c628b8f9fb82211d66c83a6e8c Mon Sep 17 00:00:00 2001
From: Taylor Blau <ttaylorr@github.com>
Date: Mon, 22 May 2023 21:39:00 -0500
Subject: [PATCH 128/162] portability.h: add an build flag to suppress warnings
 (#481)

When building CRoaring without support for atomics (for instance, in a
pre-C11 environment where <stdatomic.h> is not part of the language
specification), we get a compile-time message that copy-on-write bitmaps
are unavailable.

Having a warning message is useful as a default behavior, since it can
prevent surprises for CRoaring users who expect their bitmaps to be
thread-safe, but aren't for whatever reason.

But in environments where we know that <stdatomic.h> is unavailable, or
we are single-threaded, the message is noise, since we know ahead of
time that CRoaring's bitmaps won't be threadsafe (and are OK with that).

Provide an opt-out build knob (CROARING_SILENT_BUILD) to suppress this
warning for applications that wish to do so.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 include/roaring/portability.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/roaring/portability.h b/include/roaring/portability.h
index b43183a73..478ec21f4 100644
--- a/include/roaring/portability.h
+++ b/include/roaring/portability.h
@@ -426,7 +426,9 @@ static inline int roaring_hamming(uint64_t x) {
 #endif // !defined(CROARING_ATOMIC_IMPL)
 
 #if !defined(CROARING_ATOMIC_IMPL)
-  #pragma message ( "No atomic implementation found, copy on write bitmaps will not be threadsafe" )
+  #ifndef CROARING_SILENT_BUILD
+    #pragma message ( "No atomic implementation found, copy on write bitmaps will not be threadsafe" )
+  #endif // CROARING_SILENT_BUILD
   #define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_NONE
 #endif
 

From c7b62bb491faae18d0226aa38632f27221ddd67c Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Mon, 22 May 2023 22:39:35 -0400
Subject: [PATCH 129/162] Release.

---
 CMakeLists.txt                    | 6 +++---
 doxygen                           | 2 +-
 include/roaring/roaring_version.h | 6 +++---
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 57f4833aa..a399d7385 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -17,9 +17,9 @@ if(CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_C_COMPILER_VERSION VERSIO
 endif()
 set(ROARING_LIB_NAME roaring)
 set(PROJECT_VERSION_MAJOR 1)
-set(PROJECT_VERSION_MINOR 2)
-set(PROJECT_VERSION_PATCH 0)
-set(ROARING_LIB_VERSION "1.2.0" CACHE STRING "Roaring library version")
+set(PROJECT_VERSION_MINOR 1)
+set(PROJECT_VERSION_PATCH 4)
+set(ROARING_LIB_VERSION "1.1.4" CACHE STRING "Roaring library version")
 set(ROARING_LIB_SOVERSION "10" CACHE STRING "Roaring library soversion")
 
 option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON)
diff --git a/doxygen b/doxygen
index 544adf94c..fd581f555 100644
--- a/doxygen
+++ b/doxygen
@@ -48,7 +48,7 @@ PROJECT_NAME           = "CRoaring"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "1.2.0"
+PROJECT_NUMBER         = "1.1.4"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/include/roaring/roaring_version.h b/include/roaring/roaring_version.h
index 3b52edb5c..2f1555757 100644
--- a/include/roaring/roaring_version.h
+++ b/include/roaring/roaring_version.h
@@ -1,10 +1,10 @@
 // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand 
 #ifndef ROARING_INCLUDE_ROARING_VERSION 
 #define ROARING_INCLUDE_ROARING_VERSION 
-#define ROARING_VERSION "1.2.0"
+#define ROARING_VERSION "1.1.4"
 enum { 
     ROARING_VERSION_MAJOR = 1,
-    ROARING_VERSION_MINOR = 2,
-    ROARING_VERSION_REVISION = 0
+    ROARING_VERSION_MINOR = 1,
+    ROARING_VERSION_REVISION = 4
 }; 
 #endif // ROARING_INCLUDE_ROARING_VERSION 

From 8ac201622410be69d27b81b3e768fed9f46cb1f7 Mon Sep 17 00:00:00 2001
From: Paul Smith <paul@mad-scientist.net>
Date: Thu, 25 May 2023 11:20:03 -0400
Subject: [PATCH 130/162] amalgamation.sh: Sort the C files in the output
 (#482)

Since users may check in the output of the amalgamation, ensure that
the source files are generated in a common order from run to run.

Use a more straightforward invocation of git ls-files.

Fail early if the source path contains whitespace, which will break
the amalgamation script.

Co-authored-by: Paul Smith <paul@mad-scientiset.net>
---
 amalgamation.sh | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/amalgamation.sh b/amalgamation.sh
index e35e5bd6c..545f1f7ed 100755
--- a/amalgamation.sh
+++ b/amalgamation.sh
@@ -5,6 +5,10 @@
 ########################################################################
 SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
 
+case $SCRIPTPATH in
+    (*\ *) echo "Path ($SCRIPTPATH) cannot contain whitespace"; exit 1 ;;
+esac
+
 timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ")  # capture to label files with their generation time
 
 function newline {
@@ -76,12 +80,13 @@ $SCRIPTPATH/include/roaring/roaring_array.h
 # has the definitions available from all the header files.  Since the order of
 # the top level declarations doesn't matter after that point, the file list is
 # generated automatically from git-tracked C files in the /src/ directory.
+# Sort them so every run uses the same order.
 #
-ALL_PRIVATE_C=$( ( \
+ALL_PRIVATE_C=$( ( ( \
     [ -d $SCRIPTPATH/.git ] \
         && ( type git >/dev/null 2>&1 ) \
-        && ( git ls-files $SCRIPTPATH/src/*.c $SCRIPTPATH/src/**/*c ) \
-    ) || ( find $SCRIPTPATH/src -name '*.c' ) )
+        && ( git -C $SCRIPTPATH ls-files 'src/*.c' ) \
+    ) || ( find $SCRIPTPATH/src -name '*.c' ) ) | sort )
 # Verify up-front that all the files exist
 #
 for i in ${ALL_PUBLIC_H} ${ALL_PUBLIC_HH} ${ALL_PRIVATE_H} ${ALL_PRIVATE_C}; do

From 0bf71f502951e4c7067995662cb897bae53807d2 Mon Sep 17 00:00:00 2001
From: kangkaisen <kangkaisen@gmail.com>
Date: Mon, 29 May 2023 12:47:28 -0700
Subject: [PATCH 131/162] Update README.md (#483)

Add Starrcoks to readme
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 7db6b0335..abdfbca56 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ Bitsets, also called bitmaps, are commonly used as fast data structures. Unfortu
 
 Roaring bitmaps are compressed bitmaps which tend to outperform conventional compressed bitmaps such as WAH, EWAH or Concise.
 They are used by several major systems such as [Apache Lucene][lucene] and derivative systems such as [Solr][solr] and
-[Elasticsearch][elasticsearch], [Metamarkets' Druid][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas],  [Apache Spark][spark], [OpenSearchServer][opensearchserver], [Cloud Torrent][cloudtorrent], [Whoosh][whoosh], [InfluxDB](https://www.influxdata.com), [Pilosa][pilosa], [Bleve](http://www.blevesearch.com), [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin]. The CRoaring library is used in several systems such as [Apache Doris](http://doris.incubator.apache.org). The YouTube SQL Engine, [Google Procella](https://research.google/pubs/pub48388/), uses Roaring bitmaps for indexing.
+[Elasticsearch][elasticsearch], [Metamarkets' Druid][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas],  [Apache Spark][spark], [OpenSearchServer][opensearchserver], [Cloud Torrent][cloudtorrent], [Whoosh][whoosh], [InfluxDB](https://www.influxdata.com), [Pilosa][pilosa], [Bleve](http://www.blevesearch.com), [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin]. The CRoaring library is used in several systems such as [Apache Doris](http://doris.incubator.apache.org), [StarRocks](https://github.com/StarRocks/starrocks). The YouTube SQL Engine, [Google Procella](https://research.google/pubs/pub48388/), uses Roaring bitmaps for indexing.
 
 We published a peer-reviewed article on the design and evaluation of this library:
 

From 6935aefbfa7fbe295c7e5b6c53fb09f8b3452bad Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Tue, 30 May 2023 11:53:35 -0400
Subject: [PATCH 132/162] Fixing issue 484 (#485)

* Fixing issue 484

* Further guarding the avx-512 headers.
---
 include/roaring/portability.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/roaring/portability.h b/include/roaring/portability.h
index 478ec21f4..85ac27eb6 100644
--- a/include/roaring/portability.h
+++ b/include/roaring/portability.h
@@ -107,7 +107,7 @@ extern "C" {  // portability definitions are in global scope, not a namespace
 
 
-#ifdef CROARING_CLANG_VISUAL_STUDIO
+#if CROARING_CLANG_VISUAL_STUDIO
 
 /**
  * You are not supposed, normally, to include these
@@ -132,6 +132,7 @@ extern "C" {  // portability definitions are in global scope, not a namespace
 #include <avxintrin.h>
 #include <avx2intrin.h>
 #include <wmmintrin.h>
+#if _MSC_VER >= 1920
 // Important: we need the AVX-512 headers:
 #include <avx512fintrin.h>
 #include <avx512dqintrin.h>
@@ -141,6 +142,7 @@ extern "C" {  // portability definitions are in global scope, not a namespace
 #include <avx512vbmiintrin.h>
 #include <avx512vbmi2intrin.h>
 #include <avx512vpopcntdqintrin.h>
+#endif // _MSC_VER >= 1920
 // unfortunately, we may not get _blsr_u64, but, thankfully, clang
 // has it as a macro.
 #ifndef _blsr_u64

From 4ccc9f2b7b65b013405ed282ca4c345d768744a2 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Tue, 30 May 2023 11:54:23 -0400
Subject: [PATCH 133/162] New version

---
 CMakeLists.txt                    | 4 ++--
 doxygen                           | 2 +-
 include/roaring/roaring_version.h | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a399d7385..783b1f620 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,8 +18,8 @@ endif()
 set(ROARING_LIB_NAME roaring)
 set(PROJECT_VERSION_MAJOR 1)
 set(PROJECT_VERSION_MINOR 1)
-set(PROJECT_VERSION_PATCH 4)
-set(ROARING_LIB_VERSION "1.1.4" CACHE STRING "Roaring library version")
+set(PROJECT_VERSION_PATCH 5)
+set(ROARING_LIB_VERSION "1.1.5" CACHE STRING "Roaring library version")
 set(ROARING_LIB_SOVERSION "10" CACHE STRING "Roaring library soversion")
 
 option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON)
diff --git a/doxygen b/doxygen
index fd581f555..dffec5faf 100644
--- a/doxygen
+++ b/doxygen
@@ -48,7 +48,7 @@ PROJECT_NAME           = "CRoaring"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "1.1.4"
+PROJECT_NUMBER         = "1.1.5"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/include/roaring/roaring_version.h b/include/roaring/roaring_version.h
index 2f1555757..cf5df71d0 100644
--- a/include/roaring/roaring_version.h
+++ b/include/roaring/roaring_version.h
@@ -1,10 +1,10 @@
 // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand 
 #ifndef ROARING_INCLUDE_ROARING_VERSION 
 #define ROARING_INCLUDE_ROARING_VERSION 
-#define ROARING_VERSION "1.1.4"
+#define ROARING_VERSION "1.1.5"
 enum { 
     ROARING_VERSION_MAJOR = 1,
     ROARING_VERSION_MINOR = 1,
-    ROARING_VERSION_REVISION = 4
+    ROARING_VERSION_REVISION = 5
 }; 
 #endif // ROARING_INCLUDE_ROARING_VERSION 

From 73bfce6f064c203199f07a6ce9d3717d66b48274 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Fri, 2 Jun 2023 20:35:35 -0400
Subject: [PATCH 134/162] More explicit target

---
 include/roaring/portability.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/roaring/portability.h b/include/roaring/portability.h
index 85ac27eb6..e093c7157 100644
--- a/include/roaring/portability.h
+++ b/include/roaring/portability.h
@@ -340,8 +340,8 @@ static inline int roaring_hamming(uint64_t x) {
 #endif
 
 
-#define CROARING_TARGET_AVX2 CROARING_TARGET_REGION("avx2,bmi,pclmul,lzcnt")
-#define CROARING_TARGET_AVX512 CROARING_TARGET_REGION("bmi2,avx512f,avx512dq,avx512bw,avx512vbmi2,avx512bitalg,avx512vpopcntdq")
+#define CROARING_TARGET_AVX2 CROARING_TARGET_REGION("avx2,bmi,pclmul,lzcnt,popcnt")
+#define CROARING_TARGET_AVX512 CROARING_TARGET_REGION("avx2,bmi,bmi2,pclmul,lzcnt,popcnt,avx512f,avx512dq,avx512bw,avx512vbmi2,avx512bitalg,avx512vpopcntdq")
 #define CROARING_UNTARGET_AVX2 CROARING_UNTARGET_REGION
 #define CROARING_UNTARGET_AVX512 CROARING_UNTARGET_REGION
 

From e8c0bca7d3b53ba9f2e783d506dd34572de2487c Mon Sep 17 00:00:00 2001
From: Salvatore Previti <roorback@gmail.com>
Date: Mon, 5 Jun 2023 19:24:12 +0100
Subject: [PATCH 135/162] Add roaring_bitmap_deserialize_safe (#486)

* Add roaring_bitmap_deserialize_safe

* minor

* fix
---
 include/roaring/roaring.h | 14 ++++++++++++
 src/roaring.c             | 46 +++++++++++++++++++++++++++++++++++++++
 tests/toplevel_unit.c     | 26 ++++++++++++++++++++++
 3 files changed, 86 insertions(+)

diff --git a/include/roaring/roaring.h b/include/roaring/roaring.h
index 9afeddb4d..58781544e 100644
--- a/include/roaring/roaring.h
+++ b/include/roaring/roaring.h
@@ -514,6 +514,20 @@ size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf);
  */
 roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf);
 
+/**
+ * Use with `roaring_bitmap_serialize()`.
+ *
+ * (See `roaring_bitmap_portable_deserialize_safe()` if you want a format that's
+ * compatible with Java and Go implementations).
+ *
+ * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x),
+ * the data format is going to be big-endian and not compatible with little-endian systems.
+ * 
+ * The difference with `roaring_bitmap_deserialize()` is that this function checks that the input buffer
+ * is a valid bitmap.  If the buffer is too small, NULL is returned.
+ */
+roaring_bitmap_t *roaring_bitmap_deserialize_safe(const void *buf, size_t maxbytes);
+
 /**
  * How many bytes are required to serialize this bitmap (NOT compatible
  * with Java and Go versions)
diff --git a/src/roaring.c b/src/roaring.c
index 4bdb94fa7..800521e58 100644
--- a/src/roaring.c
+++ b/src/roaring.c
@@ -1463,9 +1463,12 @@ roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf) {
     if (bufaschar[0] == CROARING_SERIALIZATION_ARRAY_UINT32) {
         /* This looks like a compressed set of uint32_t elements */
         uint32_t card;
+
         memcpy(&card, bufaschar + 1, sizeof(uint32_t));
+
         const uint32_t *elems =
             (const uint32_t *)(bufaschar + 1 + sizeof(uint32_t));
+        
         roaring_bitmap_t *bitmap = roaring_bitmap_create();
         if (bitmap == NULL) {
             return NULL;
@@ -1478,12 +1481,55 @@ roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf) {
             roaring_bitmap_add_bulk(bitmap, &context, elem);
         }
         return bitmap;
+
     } else if (bufaschar[0] == CROARING_SERIALIZATION_CONTAINER) {
         return roaring_bitmap_portable_deserialize(bufaschar + 1);
     } else
         return (NULL);
 }
 
+roaring_bitmap_t* roaring_bitmap_deserialize_safe(const void *buf, size_t maxbytes) {
+    if (maxbytes < 1) {
+        return NULL;
+    }
+
+    const char *bufaschar = (const char *)buf;
+    if (bufaschar[0] == CROARING_SERIALIZATION_ARRAY_UINT32) {
+        if (maxbytes < 1 + sizeof(uint32_t)) {
+            return NULL;
+        }
+
+        /* This looks like a compressed set of uint32_t elements */
+        uint32_t card;
+        memcpy(&card, bufaschar + 1, sizeof(uint32_t));
+
+        // Check the buffer is big enough to contain card uint32_t elements
+        if (maxbytes < 1 + sizeof(uint32_t) + card * sizeof(uint32_t)) {
+            return NULL;
+        }
+
+        const uint32_t *elems =
+            (const uint32_t *)(bufaschar + 1 + sizeof(uint32_t));
+        
+        roaring_bitmap_t *bitmap = roaring_bitmap_create();
+        if (bitmap == NULL) {
+            return NULL;
+        }
+        roaring_bulk_context_t context = {0};
+        for (uint32_t i = 0; i < card; i++) {
+            // elems may not be aligned, read with memcpy
+            uint32_t elem;
+            memcpy(&elem, elems + i, sizeof(elem));
+            roaring_bitmap_add_bulk(bitmap, &context, elem);
+        }
+        return bitmap;
+        
+    } else if (bufaschar[0] == CROARING_SERIALIZATION_CONTAINER) {
+        return roaring_bitmap_portable_deserialize_safe(bufaschar + 1, maxbytes - 1);
+    } else
+        return (NULL);
+}
+
 bool roaring_iterate(const roaring_bitmap_t *r, roaring_iterator iterator,
                      void *ptr) {
     const roaring_array_t *ra = &r->high_low_container;
diff --git a/tests/toplevel_unit.c b/tests/toplevel_unit.c
index e1c15a096..274a20bbb 100644
--- a/tests/toplevel_unit.c
+++ b/tests/toplevel_unit.c
@@ -1407,6 +1407,23 @@ DEFINE_TEST(test_serialize) {
     r2 = roaring_bitmap_deserialize(serialized);
     assert_true(roaring_bitmap_equals(r1, r2));
 
+    // Check that roaring_bitmap_deserialize_safe fails on invalid length
+
+    assert_null(roaring_bitmap_deserialize_safe(serialized, 0));
+    assert_null(roaring_bitmap_deserialize_safe(serialized, serialize_len - 1));
+
+    // Check that roaring_bitmap_deserialize_safe succeed with valid length
+
+    roaring_bitmap_t *t_safe = roaring_bitmap_deserialize_safe(serialized, serialize_len);
+    assert_true(roaring_bitmap_equals(r1, t_safe));
+    roaring_bitmap_free(t_safe);
+
+    // Check that roaring_bitmap_deserialize_safe succeed with larger length
+
+    t_safe = roaring_bitmap_deserialize_safe(serialized, serialize_len + 10);
+    assert_true(roaring_bitmap_equals(r1, t_safe));
+    roaring_bitmap_free(t_safe);
+
     free(serialized);
     roaring_bitmap_free(r1);
     roaring_bitmap_free(r2);
@@ -1460,6 +1477,7 @@ DEFINE_TEST(test_serialize) {
 
     assert_true(array_equals(arr1, card1, arr2, card2));
     assert_true(roaring_bitmap_equals(r1, r2));
+
     free(arr1);
     free(arr2);
     free(serialized);
@@ -1473,6 +1491,14 @@ DEFINE_TEST(test_serialize) {
     uint32_t size = roaring_bitmap_serialize(old_bm, buff);
     assert_int_equal(size, roaring_bitmap_size_in_bytes(old_bm));
     roaring_bitmap_t *new_bm = roaring_bitmap_deserialize(buff);
+
+    // Check that roaring_bitmap_deserialize_safe fails on invalid length
+    assert_null(roaring_bitmap_deserialize_safe(buff, size - 1));
+    // Check that roaring_bitmap_deserialize_safe succeed with valid length
+    t_safe = roaring_bitmap_deserialize_safe(buff, size);
+    assert_true(roaring_bitmap_equals(new_bm, t_safe));
+    roaring_bitmap_free(t_safe);
+
     free(buff);
     assert_true((unsigned int)roaring_bitmap_get_cardinality(old_bm) ==
                 (unsigned int)roaring_bitmap_get_cardinality(new_bm));

From 4c5e7d8d1d76e1f6f086e7391c2376e4de227ee6 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Sun, 11 Jun 2023 11:35:22 -0400
Subject: [PATCH 136/162] Adding a few missing extern. (#488)

---
 src/bitset.c  | 15 +++++++++++++++
 src/roaring.c |  2 ++
 2 files changed, 17 insertions(+)

diff --git a/src/bitset.c b/src/bitset.c
index 4e39c16ca..b4b62ba3a 100644
--- a/src/bitset.c
+++ b/src/bitset.c
@@ -12,6 +12,21 @@
 extern "C" { namespace roaring { namespace internal {
 #endif
 
+extern inline void bitset_print(const bitset_t *b);
+extern inline bool bitset_for_each(const bitset_t *b, bitset_iterator iterator,
+                                   void *ptr);
+extern inline size_t bitset_next_set_bits(const bitset_t *bitset, size_t *buffer,
+                                 size_t capacity, size_t *startfrom);
+extern inline size_t bitset_next_set_bits(const bitset_t *bitset, size_t *buffer,
+                                 size_t capacity, size_t *startfrom);
+extern inline void bitset_set_to_value(bitset_t *bitset, size_t i, bool flag);
+extern inline void bitset_set(bitset_t *bitset, size_t i);
+extern inline size_t bitset_size_in_words(const bitset_t *bitset);
+extern inline size_t bitset_size_in_bits(const bitset_t *bitset);
+extern inline size_t bitset_size_in_bytes(const bitset_t *bitset);
+
+
+extern inline bool bitset_get(const bitset_t *bitset, size_t i);
 /* Create a new bitset. Return NULL in case of failure. */
 bitset_t *bitset_create(void) {
     bitset_t *bitset = NULL;
diff --git a/src/roaring.c b/src/roaring.c
index 800521e58..104bc590d 100644
--- a/src/roaring.c
+++ b/src/roaring.c
@@ -21,8 +21,10 @@ extern "C" { namespace roaring { namespace api {
 #define CROARING_SERIALIZATION_ARRAY_UINT32 1
 #define CROARING_SERIALIZATION_CONTAINER 2
 
+extern inline void roaring_bitmap_init_cleared(roaring_bitmap_t *r);
 extern inline bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t* r);
 extern inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t* r, bool cow);
+extern inline roaring_bitmap_t *roaring_bitmap_create(void);
 
 static inline bool is_cow(const roaring_bitmap_t *r) {
     return r->high_low_container.flags & ROARING_FLAG_COW;

From b5939ab6e683934a7f80bcf905725058d6550fca Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Sun, 11 Jun 2023 11:36:01 -0400
Subject: [PATCH 137/162] Version bump

---
 CMakeLists.txt                    | 4 ++--
 doxygen                           | 2 +-
 include/roaring/roaring_version.h | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 783b1f620..5845830cb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,8 +18,8 @@ endif()
 set(ROARING_LIB_NAME roaring)
 set(PROJECT_VERSION_MAJOR 1)
 set(PROJECT_VERSION_MINOR 1)
-set(PROJECT_VERSION_PATCH 5)
-set(ROARING_LIB_VERSION "1.1.5" CACHE STRING "Roaring library version")
+set(PROJECT_VERSION_PATCH 6)
+set(ROARING_LIB_VERSION "1.1.6" CACHE STRING "Roaring library version")
 set(ROARING_LIB_SOVERSION "10" CACHE STRING "Roaring library soversion")
 
 option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON)
diff --git a/doxygen b/doxygen
index dffec5faf..7dce40baf 100644
--- a/doxygen
+++ b/doxygen
@@ -48,7 +48,7 @@ PROJECT_NAME           = "CRoaring"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "1.1.5"
+PROJECT_NUMBER         = "1.1.6"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/include/roaring/roaring_version.h b/include/roaring/roaring_version.h
index cf5df71d0..680dd611b 100644
--- a/include/roaring/roaring_version.h
+++ b/include/roaring/roaring_version.h
@@ -1,10 +1,10 @@
 // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand 
 #ifndef ROARING_INCLUDE_ROARING_VERSION 
 #define ROARING_INCLUDE_ROARING_VERSION 
-#define ROARING_VERSION "1.1.5"
+#define ROARING_VERSION "1.1.6"
 enum { 
     ROARING_VERSION_MAJOR = 1,
     ROARING_VERSION_MINOR = 1,
-    ROARING_VERSION_REVISION = 5
+    ROARING_VERSION_REVISION = 6
 }; 
 #endif // ROARING_INCLUDE_ROARING_VERSION 

From 82e3db5bffd6e2d53f1c11499587ab66141db457 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Sun, 11 Jun 2023 11:36:49 -0400
Subject: [PATCH 138/162] Version bump.

---
 CMakeLists.txt                    | 8 ++++----
 doxygen                           | 2 +-
 include/roaring/roaring_version.h | 6 +++---
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5845830cb..420f4c492 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -17,10 +17,10 @@ if(CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_C_COMPILER_VERSION VERSIO
 endif()
 set(ROARING_LIB_NAME roaring)
 set(PROJECT_VERSION_MAJOR 1)
-set(PROJECT_VERSION_MINOR 1)
-set(PROJECT_VERSION_PATCH 6)
-set(ROARING_LIB_VERSION "1.1.6" CACHE STRING "Roaring library version")
-set(ROARING_LIB_SOVERSION "10" CACHE STRING "Roaring library soversion")
+set(PROJECT_VERSION_MINOR 2)
+set(PROJECT_VERSION_PATCH 0)
+set(ROARING_LIB_VERSION "1.2.0" CACHE STRING "Roaring library version")
+set(ROARING_LIB_SOVERSION "11" CACHE STRING "Roaring library soversion")
 
 option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON)
 if(NOT ROARING_EXCEPTIONS)
diff --git a/doxygen b/doxygen
index 7dce40baf..544adf94c 100644
--- a/doxygen
+++ b/doxygen
@@ -48,7 +48,7 @@ PROJECT_NAME           = "CRoaring"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "1.1.6"
+PROJECT_NUMBER         = "1.2.0"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/include/roaring/roaring_version.h b/include/roaring/roaring_version.h
index 680dd611b..3b52edb5c 100644
--- a/include/roaring/roaring_version.h
+++ b/include/roaring/roaring_version.h
@@ -1,10 +1,10 @@
 // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand 
 #ifndef ROARING_INCLUDE_ROARING_VERSION 
 #define ROARING_INCLUDE_ROARING_VERSION 
-#define ROARING_VERSION "1.1.6"
+#define ROARING_VERSION "1.2.0"
 enum { 
     ROARING_VERSION_MAJOR = 1,
-    ROARING_VERSION_MINOR = 1,
-    ROARING_VERSION_REVISION = 6
+    ROARING_VERSION_MINOR = 2,
+    ROARING_VERSION_REVISION = 0
 }; 
 #endif // ROARING_INCLUDE_ROARING_VERSION 

From e29535bea457b2c3267b93bec196256ad75eb27b Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Sun, 11 Jun 2023 11:53:17 -0400
Subject: [PATCH 139/162] This warning is just annoying.

---
 include/roaring/portability.h | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/include/roaring/portability.h b/include/roaring/portability.h
index e093c7157..84f09f714 100644
--- a/include/roaring/portability.h
+++ b/include/roaring/portability.h
@@ -427,13 +427,6 @@ static inline int roaring_hamming(uint64_t x) {
   #endif
 #endif // !defined(CROARING_ATOMIC_IMPL)
 
-#if !defined(CROARING_ATOMIC_IMPL)
-  #ifndef CROARING_SILENT_BUILD
-    #pragma message ( "No atomic implementation found, copy on write bitmaps will not be threadsafe" )
-  #endif // CROARING_SILENT_BUILD
-  #define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_NONE
-#endif
-
 #if CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_C
 #include <stdatomic.h>
 typedef _Atomic(uint32_t) croaring_refcount_t;

From ed8c0b99bfc14c29817b10656f512c81f4a514d0 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Sun, 11 Jun 2023 17:12:25 -0400
Subject: [PATCH 140/162] Trimming duplicated line

---
 src/bitset.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/bitset.c b/src/bitset.c
index b4b62ba3a..6384f0b51 100644
--- a/src/bitset.c
+++ b/src/bitset.c
@@ -17,8 +17,6 @@ extern inline bool bitset_for_each(const bitset_t *b, bitset_iterator iterator,
                                    void *ptr);
 extern inline size_t bitset_next_set_bits(const bitset_t *bitset, size_t *buffer,
                                  size_t capacity, size_t *startfrom);
-extern inline size_t bitset_next_set_bits(const bitset_t *bitset, size_t *buffer,
-                                 size_t capacity, size_t *startfrom);
 extern inline void bitset_set_to_value(bitset_t *bitset, size_t i, bool flag);
 extern inline void bitset_set(bitset_t *bitset, size_t i);
 extern inline size_t bitset_size_in_words(const bitset_t *bitset);

From 0a432c068a8b0ab8a3fcbcaed46deb70cc3dd9ad Mon Sep 17 00:00:00 2001
From: Zachary Dremann <dremann@gmail.com>
Date: Mon, 12 Jun 2023 07:54:28 -0400
Subject: [PATCH 141/162] Use `inline` in headers, `extern inline` in c files
 (#490)

Don't use `static inline` for functions meant to be part of the API, so
they should always get an external linkage version. `static inline` and
`extern inline` shouldn't even work together, but it seems they do.

Also, add some formerly un-added externs, and brings extern declarations
together, since all the rest are together
---
 include/roaring/bitset/bitset.h | 24 ++++++++++++------------
 include/roaring/roaring.h       | 17 ++++++++---------
 src/bitset.c                    |  3 ++-
 src/roaring.c                   |  5 ++---
 4 files changed, 24 insertions(+), 25 deletions(-)

diff --git a/include/roaring/bitset/bitset.h b/include/roaring/bitset/bitset.h
index f9707b351..011702bc8 100644
--- a/include/roaring/bitset/bitset.h
+++ b/include/roaring/bitset/bitset.h
@@ -56,17 +56,17 @@ bitset_t *bitset_copy(const bitset_t *bitset);
 bool bitset_resize(bitset_t *bitset, size_t newarraysize, bool padwithzeroes);
 
 /* returns how many bytes of memory the backend buffer uses */
-static inline size_t bitset_size_in_bytes(const bitset_t *bitset) {
+inline size_t bitset_size_in_bytes(const bitset_t *bitset) {
     return bitset->arraysize * sizeof(uint64_t);
 }
 
 /* returns how many bits can be accessed */
-static inline size_t bitset_size_in_bits(const bitset_t *bitset) {
+inline size_t bitset_size_in_bits(const bitset_t *bitset) {
     return bitset->arraysize * 64;
 }
 
 /* returns how many words (64-bit) of memory the backend buffer uses */
-static inline size_t bitset_size_in_words(const bitset_t *bitset) {
+inline size_t bitset_size_in_words(const bitset_t *bitset) {
     return bitset->arraysize;
 }
 
@@ -88,7 +88,7 @@ void bitset_shift_right(bitset_t *bitset, size_t s);
 
 /* Set the ith bit. Attempts to resize the bitset if needed (may silently fail)
  */
-static inline void bitset_set(bitset_t *bitset, size_t i) {
+inline void bitset_set(bitset_t *bitset, size_t i) {
     size_t shiftedi = i / 64;
     if (shiftedi >= bitset->arraysize) {
         if (!bitset_grow(bitset, shiftedi + 1)) {
@@ -100,7 +100,7 @@ static inline void bitset_set(bitset_t *bitset, size_t i) {
 
 /* Set the ith bit to the specified value. Attempts to resize the bitset if
  * needed (may silently fail) */
-static inline void bitset_set_to_value(bitset_t *bitset, size_t i, bool flag) {
+inline void bitset_set_to_value(bitset_t *bitset, size_t i, bool flag) {
     size_t shiftedi = i / 64;
     uint64_t mask = ((uint64_t)1) << (i % 64);
     uint64_t dynmask = ((uint64_t)flag) << (i % 64);
@@ -116,7 +116,7 @@ static inline void bitset_set_to_value(bitset_t *bitset, size_t i, bool flag) {
 }
 
 /* Get the value of the ith bit.  */
-static inline bool bitset_get(const bitset_t *bitset, size_t i) {
+inline bool bitset_get(const bitset_t *bitset, size_t i) {
     size_t shiftedi = i / 64;
     if (shiftedi >= bitset->arraysize) {
         return false;
@@ -184,7 +184,7 @@ size_t bitset_symmetric_difference_count(const bitset_t *CBITSET_RESTRICT b1,
     //.....
   }
   */
-static inline bool bitset_next_set_bit(const bitset_t *bitset, size_t *i) {
+inline bool bitset_next_set_bit(const bitset_t *bitset, size_t *i) {
     size_t x = *i / 64;
     if (x >= bitset->arraysize) {
         return false;
@@ -216,8 +216,8 @@ static inline bool bitset_next_set_bit(const bitset_t *bitset, size_t *i) {
     //.....
   }
   */
-static inline size_t bitset_next_set_bits(const bitset_t *bitset, size_t *buffer,
-                                 size_t capacity, size_t *startfrom) {
+inline size_t bitset_next_set_bits(const bitset_t *bitset, size_t *buffer,
+                                   size_t capacity, size_t *startfrom) {
     if (capacity == 0) return 0;  // sanity check
     size_t x = *startfrom / 64;
     if (x >= bitset->arraysize) {
@@ -252,8 +252,8 @@ static inline size_t bitset_next_set_bits(const bitset_t *bitset, size_t *buffer
 typedef bool (*bitset_iterator)(size_t value, void *param);
 
 // return true if uninterrupted
-static inline bool bitset_for_each(const bitset_t *b, bitset_iterator iterator,
-                                   void *ptr) {
+inline bool bitset_for_each(const bitset_t *b, bitset_iterator iterator,
+                            void *ptr) {
     size_t base = 0;
     for (size_t i = 0; i < b->arraysize; ++i) {
         uint64_t w = b->array[i];
@@ -268,7 +268,7 @@ static inline bool bitset_for_each(const bitset_t *b, bitset_iterator iterator,
     return true;
 }
 
-static inline void bitset_print(const bitset_t *b) {
+inline void bitset_print(const bitset_t *b) {
     printf("{");
     for (size_t i = 0; bitset_next_set_bit(b, &i); i++) {
         printf("%zu, ", i);
diff --git a/include/roaring/roaring.h b/include/roaring/roaring.h
index 58781544e..dd4daab5a 100644
--- a/include/roaring/roaring.h
+++ b/include/roaring/roaring.h
@@ -35,7 +35,7 @@ roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap);
  * Returns NULL if the allocation fails.
  * Client is responsible for calling `roaring_bitmap_free()`.
  */
-static inline roaring_bitmap_t *roaring_bitmap_create(void)
+inline roaring_bitmap_t *roaring_bitmap_create(void)
   { return roaring_bitmap_create_with_capacity(0); }
 
 /**
@@ -50,7 +50,7 @@ bool roaring_bitmap_init_with_capacity(roaring_bitmap_t *r, uint32_t cap);
  * The bitmap will be in a "clear" state, with no auxiliary allocations.
  * Since this performs no allocations, the function will not fail.
  */
-static inline void roaring_bitmap_init_cleared(roaring_bitmap_t *r)
+inline void roaring_bitmap_init_cleared(roaring_bitmap_t *r)
   { roaring_bitmap_init_with_capacity(r, 0); }
 
 /**
@@ -74,11 +74,10 @@ roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals);
  * do so for all of your bitmaps, since interactions between bitmaps with and
  * without COW is unsafe.
  */
-static inline bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t* r) {
+inline bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t* r) {
     return r->high_low_container.flags & ROARING_FLAG_COW;
 }
-static inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t* r,
-                                                    bool cow) {
+inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t* r, bool cow) {
     if (cow) {
         r->high_low_container.flags |= ROARING_FLAG_COW;
     } else {
@@ -332,8 +331,8 @@ void roaring_bitmap_add_range_closed(roaring_bitmap_t *r,
 /**
  * Add all values in range [min, max)
  */
-static inline void roaring_bitmap_add_range(roaring_bitmap_t *r,
-                                            uint64_t min, uint64_t max) {
+inline void roaring_bitmap_add_range(roaring_bitmap_t *r,
+                                     uint64_t min, uint64_t max) {
     if(max <= min) return;
     roaring_bitmap_add_range_closed(r, (uint32_t)min, (uint32_t)(max - 1));
 }
@@ -352,8 +351,8 @@ void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r,
 /**
  * Remove all values in range [min, max)
  */
-static inline void roaring_bitmap_remove_range(roaring_bitmap_t *r,
-                                               uint64_t min, uint64_t max) {
+inline void roaring_bitmap_remove_range(roaring_bitmap_t *r,
+                                        uint64_t min, uint64_t max) {
     if(max <= min) return;
     roaring_bitmap_remove_range_closed(r, (uint32_t)min, (uint32_t)(max - 1));
 }
diff --git a/src/bitset.c b/src/bitset.c
index 6384f0b51..03337951f 100644
--- a/src/bitset.c
+++ b/src/bitset.c
@@ -18,13 +18,14 @@ extern inline bool bitset_for_each(const bitset_t *b, bitset_iterator iterator,
 extern inline size_t bitset_next_set_bits(const bitset_t *bitset, size_t *buffer,
                                  size_t capacity, size_t *startfrom);
 extern inline void bitset_set_to_value(bitset_t *bitset, size_t i, bool flag);
+extern inline bool bitset_next_set_bit(const bitset_t *bitset, size_t *i);
 extern inline void bitset_set(bitset_t *bitset, size_t i);
+extern inline bool bitset_get(const bitset_t *bitset, size_t i);
 extern inline size_t bitset_size_in_words(const bitset_t *bitset);
 extern inline size_t bitset_size_in_bits(const bitset_t *bitset);
 extern inline size_t bitset_size_in_bytes(const bitset_t *bitset);
 
 
-extern inline bool bitset_get(const bitset_t *bitset, size_t i);
 /* Create a new bitset. Return NULL in case of failure. */
 bitset_t *bitset_create(void) {
     bitset_t *bitset = NULL;
diff --git a/src/roaring.c b/src/roaring.c
index 104bc590d..1d8c5161d 100644
--- a/src/roaring.c
+++ b/src/roaring.c
@@ -25,6 +25,8 @@ extern inline void roaring_bitmap_init_cleared(roaring_bitmap_t *r);
 extern inline bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t* r);
 extern inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t* r, bool cow);
 extern inline roaring_bitmap_t *roaring_bitmap_create(void);
+extern inline void roaring_bitmap_add_range(roaring_bitmap_t *r, uint64_t min, uint64_t max);
+extern inline void roaring_bitmap_remove_range(roaring_bitmap_t *r, uint64_t min, uint64_t max);
 
 static inline bool is_cow(const roaring_bitmap_t *r) {
     return r->high_low_container.flags & ROARING_FLAG_COW;
@@ -323,9 +325,6 @@ void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r, uint32_t min, uint3
     }
 }
 
-extern inline void roaring_bitmap_add_range(roaring_bitmap_t *r, uint64_t min, uint64_t max);
-extern inline void roaring_bitmap_remove_range(roaring_bitmap_t *r, uint64_t min, uint64_t max);
-
 void roaring_bitmap_printf(const roaring_bitmap_t *r) {
     const roaring_array_t *ra = &r->high_low_container;
 

From 5a40011acaf15de00142bb8675a9a8d619a6dc95 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Mon, 12 Jun 2023 07:57:04 -0400
Subject: [PATCH 142/162] Version bump

---
 CMakeLists.txt                    | 6 +++---
 doxygen                           | 2 +-
 include/roaring/roaring_version.h | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 420f4c492..0bc46919e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -17,10 +17,10 @@ if(CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_C_COMPILER_VERSION VERSIO
 endif()
 set(ROARING_LIB_NAME roaring)
 set(PROJECT_VERSION_MAJOR 1)
-set(PROJECT_VERSION_MINOR 2)
+set(PROJECT_VERSION_MINOR 3)
 set(PROJECT_VERSION_PATCH 0)
-set(ROARING_LIB_VERSION "1.2.0" CACHE STRING "Roaring library version")
-set(ROARING_LIB_SOVERSION "11" CACHE STRING "Roaring library soversion")
+set(ROARING_LIB_VERSION "1.3.0" CACHE STRING "Roaring library version")
+set(ROARING_LIB_SOVERSION "12" CACHE STRING "Roaring library soversion")
 
 option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON)
 if(NOT ROARING_EXCEPTIONS)
diff --git a/doxygen b/doxygen
index 544adf94c..a18002df4 100644
--- a/doxygen
+++ b/doxygen
@@ -48,7 +48,7 @@ PROJECT_NAME           = "CRoaring"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "1.2.0"
+PROJECT_NUMBER         = "1.3.0"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/include/roaring/roaring_version.h b/include/roaring/roaring_version.h
index 3b52edb5c..55b836a18 100644
--- a/include/roaring/roaring_version.h
+++ b/include/roaring/roaring_version.h
@@ -1,10 +1,10 @@
 // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand 
 #ifndef ROARING_INCLUDE_ROARING_VERSION 
 #define ROARING_INCLUDE_ROARING_VERSION 
-#define ROARING_VERSION "1.2.0"
+#define ROARING_VERSION "1.3.0"
 enum { 
     ROARING_VERSION_MAJOR = 1,
-    ROARING_VERSION_MINOR = 2,
+    ROARING_VERSION_MINOR = 3,
     ROARING_VERSION_REVISION = 0
 }; 
 #endif // ROARING_INCLUDE_ROARING_VERSION 

From 63e22ab72c16aff79da55d00d1189682fdf3a229 Mon Sep 17 00:00:00 2001
From: mwish <1506118561@qq.com>
Date: Sun, 18 Jun 2023 22:21:04 +0800
Subject: [PATCH 143/162] C++ Wrapper for Roaring32 `*Bulk` (#492)

* [Add] Adding basic Wrapper for Bulk

* fix comment

* add bulk contains

* fix comment: remove wrapper postfix, and make BulkContext only movable
---
 cpp/roaring.hh            | 46 +++++++++++++++++++++++++++++++++++++++
 include/roaring/roaring.h |  4 ++--
 tests/cpp_unit.cpp        | 28 ++++++++++++++++++++++++
 3 files changed, 76 insertions(+), 2 deletions(-)

diff --git a/cpp/roaring.hh b/cpp/roaring.hh
index 1fd7a4655..f14150baf 100644
--- a/cpp/roaring.hh
+++ b/cpp/roaring.hh
@@ -42,6 +42,28 @@ namespace roaring {
 
 class RoaringSetBitForwardIterator;
 
+/**
+ * A bit of context usable with `*Bulk()` functions.
+ *
+ * A context may only be used with a single bitmap, and any modification to a bitmap
+ * (other than modifications performed with `Bulk()` functions with the context
+ * passed) will invalidate any contexts associated with that bitmap.
+ */
+class BulkContext {
+   public:
+    friend class Roaring;
+    using roaring_bitmap_bulk_context_t = api::roaring_bulk_context_t;
+    BulkContext() : context_{nullptr, 0, 0, 0} {}
+
+    BulkContext(const BulkContext&) = delete;
+    BulkContext& operator=(const BulkContext&) = delete;
+    BulkContext(BulkContext&&) noexcept = default;
+    BulkContext& operator=(BulkContext&&) noexcept = default;
+
+   private:
+    roaring_bitmap_bulk_context_t context_;
+};
+
 class Roaring {
     typedef api::roaring_bitmap_t roaring_bitmap_t;  // class-local name alias
 
@@ -166,6 +188,30 @@ public:
         api::roaring_bitmap_add_many(&roaring, n_args, vals);
     }
 
+    /**
+     * Add value val, using context from a previous insert for speed
+     * optimization.
+     *
+     * `context` will be used to store information between calls to make bulk
+     * operations faster. `context` should be default-initialized before the
+     * first call to this function.
+     */
+    void addBulk(BulkContext &context, uint32_t x) noexcept {
+        api::roaring_bitmap_add_bulk(&roaring, &context.context_, x);
+    }
+
+    /**
+     * Check if item x is present, using context from a previous insert or search
+     * for speed optimization.
+     *
+     * `context` will be used to store information between calls to make bulk
+     * operations faster. `context` should be default-initialized before the
+     * first call to this function.
+     */
+    bool containsBulk(BulkContext& context, uint32_t x) const noexcept {
+        return api::roaring_bitmap_contains_bulk(&roaring, &context.context_, x);
+    }
+
     /**
      * Remove value x
      */
diff --git a/include/roaring/roaring.h b/include/roaring/roaring.h
index dd4daab5a..fdf13823a 100644
--- a/include/roaring/roaring.h
+++ b/include/roaring/roaring.h
@@ -383,8 +383,8 @@ bool roaring_bitmap_contains_range(const roaring_bitmap_t *r,
                                    uint64_t range_end);
 
 /**
- * Check if an items is present, using context from a previous insert for speed
- * optimization.
+ * Check if an items is present, using context from a previous insert or search
+ * for speed optimization.
  *
  * `context` will be used to store information between calls to make bulk
  * operations faster. `*context` should be zero-initialized before the first
diff --git a/tests/cpp_unit.cpp b/tests/cpp_unit.cpp
index b5abbe44d..ecec3c83d 100644
--- a/tests/cpp_unit.cpp
+++ b/tests/cpp_unit.cpp
@@ -757,6 +757,32 @@ DEFINE_TEST(test_cpp_add_range) {
     }
 }
 
+DEFINE_TEST(test_cpp_add_bulk) {
+    std::vector<uint32_t> values = {9999, 123, 0xFFFFFFFF, 0xFFFFFFF7, 9999};
+    Roaring r1;
+    Roaring r2;
+    roaring::BulkContext bulk_context;
+    for (const auto value : values) {
+        r1.addBulk(bulk_context, value);
+        r2.add(value);
+        assert_true(r1 == r2);
+    }
+}
+
+DEFINE_TEST(test_cpp_contains_bulk) {
+    std::vector<uint32_t> values_exists = {9999, 123, 0xFFFFFFFF, 0xFFFFFFF7};
+    std::vector<uint32_t> values_not_exists = {10, 12, 2000, 0xFFFFFFF, 0xFFFFFFF9, 2048};
+    Roaring r;
+    r.addMany(values_exists.size(), values_exists.data());
+    roaring::BulkContext bulk_context;
+    for (const auto value: values_exists) {
+        assert_true(r.containsBulk(bulk_context, value));
+    }
+    for (const auto value: values_not_exists) {
+        assert_false(r.containsBulk(bulk_context, value));
+    }
+}
+
 DEFINE_TEST(test_cpp_remove_range) {
     {
         // min < r1.minimum, max > r1.maximum
@@ -1974,6 +2000,8 @@ int main() {
         cmocka_unit_test(test_cpp_add_many),
         cmocka_unit_test(test_cpp_add_many_64),
         cmocka_unit_test(test_cpp_add_range_closed_combinatoric_64),
+        cmocka_unit_test(test_cpp_add_bulk),
+        cmocka_unit_test(test_cpp_contains_bulk),
         cmocka_unit_test(test_cpp_remove_range_closed_64),
         cmocka_unit_test(test_cpp_remove_range_64),
         cmocka_unit_test(test_run_compression_cpp_64_true),

From 7d5a67b7d2cbf820cf05b2a47d4bac03202922da Mon Sep 17 00:00:00 2001
From: Zachary Dremann <dremann@gmail.com>
Date: Sat, 1 Jul 2023 13:43:20 -0400
Subject: [PATCH 144/162] Simplify roaring64map rank (#494)

Use a single search of the map, rather than `find` and sometimes
`lower_bound`, use a single `lower_bound` call.
---
 cpp/roaring64map.hh | 28 ++++++++++++----------------
 1 file changed, 12 insertions(+), 16 deletions(-)

diff --git a/cpp/roaring64map.hh b/cpp/roaring64map.hh
index 0ab1251d0..6caea14a7 100644
--- a/cpp/roaring64map.hh
+++ b/cpp/roaring64map.hh
@@ -1075,19 +1075,15 @@ public:
      */
     uint64_t rank(uint64_t x) const {
         uint64_t result = 0;
-        auto roaring_destination = roarings.find(highBytes(x));
-        if (roaring_destination != roarings.cend()) {
-            for (auto roaring_iter = roarings.cbegin();
-                 roaring_iter != roaring_destination; ++roaring_iter) {
-                result += roaring_iter->second.cardinality();
-            }
-            result += roaring_destination->second.rank(lowBytes(x));
-            return result;
+        // Find the first bitmap >= x's bucket. If that is the bucket x would be in, find it's rank in that bucket.
+        // Either way, we're left with a range of all buckets strictly smaller than x's bucket, add all their
+        // cardinalities together.
+        auto end = roarings.lower_bound(highBytes(x));
+        if (end != roarings.cend() && end->first == highBytes(x)) {
+            result += end->second.rank(lowBytes(x));
         }
-        roaring_destination = roarings.lower_bound(highBytes(x));
-        for (auto roaring_iter = roarings.cbegin();
-             roaring_iter != roaring_destination; ++roaring_iter) {
-            result += roaring_iter->second.cardinality();
+        for (auto iter = roarings.cbegin(); iter != end; ++iter) {
+            result += iter->second.cardinality();
         }
         return result;
     }
@@ -1556,10 +1552,10 @@ private:
     typedef std::map<uint32_t, Roaring> roarings_t;
     roarings_t roarings{}; // The empty constructor silences warnings from pedantic static analyzers.
     bool copyOnWrite{false};
-    static uint32_t highBytes(const uint64_t in) { return uint32_t(in >> 32); }
-    static uint32_t lowBytes(const uint64_t in) { return uint32_t(in); }
-    static uint64_t uniteBytes(const uint32_t highBytes,
-                               const uint32_t lowBytes) {
+    static constexpr uint32_t highBytes(const uint64_t in) { return uint32_t(in >> 32); }
+    static constexpr uint32_t lowBytes(const uint64_t in) { return uint32_t(in); }
+    static constexpr uint64_t uniteBytes(const uint32_t highBytes,
+                                         const uint32_t lowBytes) {
         return (uint64_t(highBytes) << 32) | uint64_t(lowBytes);
     }
     // this is needed to tolerate gcc's C++11 libstdc++ lacking emplace

From cc34c6dc23b2eab9f6531fed9c082e6225c6f83e Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Tue, 4 Jul 2023 15:12:19 -0400
Subject: [PATCH 145/162] Removing a static qualifier (#496)

---
 include/roaring/portability.h | 2 +-
 tests/mixed_container_unit.c  | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/include/roaring/portability.h b/include/roaring/portability.h
index 84f09f714..b787f1b15 100644
--- a/include/roaring/portability.h
+++ b/include/roaring/portability.h
@@ -180,7 +180,7 @@ extern "C" {  // portability definitions are in global scope, not a namespace
 
 /* wrappers for Visual Studio built-ins that look like gcc built-ins __builtin_ctzll */
 /* result might be undefined when input_num is zero */
-static inline int roaring_trailing_zeroes(unsigned long long input_num) {
+inline int roaring_trailing_zeroes(unsigned long long input_num) {
     unsigned long index;
 #ifdef _WIN64  // highly recommended!!!
     _BitScanForward64(&index, input_num);
diff --git a/tests/mixed_container_unit.c b/tests/mixed_container_unit.c
index 72964eba7..cfcaed231 100644
--- a/tests/mixed_container_unit.c
+++ b/tests/mixed_container_unit.c
@@ -1482,7 +1482,6 @@ DEFINE_TEST(array_negation_range_test3) {
  * sparse */
 static int bitset_negation_range_tests(int sparsity, int r_start, int r_end,
                                        bool is_bitset, bool inplace) {
-    int ctr = 0;
     bitset_container_t* BI = bitset_container_create();
     container_t* BO;
     bool result_is_bitset;
@@ -1490,7 +1489,6 @@ static int bitset_negation_range_tests(int sparsity, int r_start, int r_end,
 
     for (int x = 0; x < (1 << 16); x++) {
         if (x % sparsity) bitset_container_add(BI, (uint16_t)x);
-        ++ctr;
     }
 
     for (int x = 0; x < (1 << 16); x++) {

From 31067e3130d8c41aace05e62faa4f809d9ef4cd3 Mon Sep 17 00:00:00 2001
From: Zachary Dremann <dremann@gmail.com>
Date: Tue, 4 Jul 2023 15:12:35 -0400
Subject: [PATCH 146/162] Add a function to do internal validations of the
 bitmap, intended for testing (#493)

* Add a "validate" function, which verifies a bitmap

Verfies that assumptions which should always be true continue to hold:
e.g. containers are in sorted order, array/run containers contain strictly
increasing numbers, bitsets precomputed cardinality is correct, etc.

* Avoid overflow when validating runs which include UINT16_MAX

* Check if size GT allocation size

* Start checking array after the first item

* Correct a grammar
---
 include/roaring/containers/array.h      |  2 +
 include/roaring/containers/bitset.h     |  2 +
 include/roaring/containers/containers.h |  3 ++
 include/roaring/containers/run.h        |  2 +
 include/roaring/portability.h           |  8 +--
 include/roaring/roaring.h               | 10 ++++
 src/containers/array.c                  | 40 ++++++++++++++
 src/containers/bitset.c                 | 20 +++++++
 src/containers/containers.c             | 37 +++++++++++++
 src/containers/run.c                    | 48 +++++++++++++++++
 src/roaring.c                           | 70 +++++++++++++++++++++++++
 tests/test.h                            |  6 +++
 tests/toplevel_unit.c                   | 26 +++++++++
 13 files changed, 270 insertions(+), 4 deletions(-)

diff --git a/include/roaring/containers/array.h b/include/roaring/containers/array.h
index d06a5fe83..3070d6e33 100644
--- a/include/roaring/containers/array.h
+++ b/include/roaring/containers/array.h
@@ -158,6 +158,8 @@ void array_container_printf(const array_container_t *v);
 void array_container_printf_as_uint32_array(const array_container_t *v,
                                             uint32_t base);
 
+bool array_container_validate(const array_container_t *v, const char **reason);
+
 /**
  * Return the serialized size in bytes of a container having cardinality "card".
  */
diff --git a/include/roaring/containers/bitset.h b/include/roaring/containers/bitset.h
index f71b7a960..a27e715ae 100644
--- a/include/roaring/containers/bitset.h
+++ b/include/roaring/containers/bitset.h
@@ -413,6 +413,8 @@ void bitset_container_printf(const bitset_container_t *v);
 void bitset_container_printf_as_uint32_array(const bitset_container_t *v,
                                              uint32_t base);
 
+bool bitset_container_validate(const bitset_container_t *v, const char **reason);
+
 /**
  * Return the serialized size in bytes of a container.
  */
diff --git a/include/roaring/containers/containers.h b/include/roaring/containers/containers.h
index 3588fc49e..d011cc02e 100644
--- a/include/roaring/containers/containers.h
+++ b/include/roaring/containers/containers.h
@@ -435,6 +435,9 @@ void container_printf(const container_t *container, uint8_t typecode);
 void container_printf_as_uint32_array(const container_t *container,
                                       uint8_t typecode, uint32_t base);
 
+bool container_internal_validate(const container_t *container,
+                                 uint8_t typecode, const char **reason);
+
 /**
  * Checks whether a container is not empty, requires a  typecode
  */
diff --git a/include/roaring/containers/run.h b/include/roaring/containers/run.h
index 85deb5767..f24a579a3 100644
--- a/include/roaring/containers/run.h
+++ b/include/roaring/containers/run.h
@@ -435,6 +435,8 @@ void run_container_printf(const run_container_t *v);
 void run_container_printf_as_uint32_array(const run_container_t *v,
                                           uint32_t base);
 
+bool run_container_validate(const run_container_t *run, const char **reason);
+
 /**
  * Return the serialized size in bytes of a container having "num_runs" runs.
  */
diff --git a/include/roaring/portability.h b/include/roaring/portability.h
index b787f1b15..162d49016 100644
--- a/include/roaring/portability.h
+++ b/include/roaring/portability.h
@@ -452,7 +452,7 @@ static inline bool croaring_refcount_dec(croaring_refcount_t *val) {
     return is_zero;
 }
 
-static inline uint32_t croaring_refcount_get(croaring_refcount_t *val) {
+static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) {
     return atomic_load_explicit(val, memory_order_relaxed);
 }
 #elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_CPP
@@ -472,7 +472,7 @@ static inline bool croaring_refcount_dec(croaring_refcount_t *val) {
     return is_zero;
 }
 
-static inline uint32_t croaring_refcount_get(croaring_refcount_t *val) {
+static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) {
     return val->load(std::memory_order_relaxed);
 }
 #elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_C_WINDOWS
@@ -492,7 +492,7 @@ static inline bool croaring_refcount_dec(croaring_refcount_t *val) {
     return _InterlockedDecrement(val) == 0;
 }
 
-static inline uint32_t croaring_refcount_get(croaring_refcount_t *val) {
+static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) {
     // Per https://learn.microsoft.com/en-us/windows/win32/sync/interlocked-variable-access
     // > Simple reads and writes to properly-aligned 32-bit variables are atomic
     // > operations. In other words, you will not end up with only one portion
@@ -513,7 +513,7 @@ static inline bool croaring_refcount_dec(croaring_refcount_t *val) {
     return val == 0;
 }
 
-static inline uint32_t croaring_refcount_get(croaring_refcount_t *val) {
+static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) {
     return *val;
 }
 #else
diff --git a/include/roaring/roaring.h b/include/roaring/roaring.h
index fdf13823a..6bfcd859a 100644
--- a/include/roaring/roaring.h
+++ b/include/roaring/roaring.h
@@ -840,6 +840,16 @@ uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *r);
 void roaring_bitmap_statistics(const roaring_bitmap_t *r,
                                roaring_statistics_t *stat);
 
+/**
+ * Perform internal consistency checks. Returns true if the bitmap is consistent.
+ *
+ * Note that some operations intentionally leave bitmaps in an inconsistent state temporarily,
+ * for example, `roaring_bitmap_lazy_*` functions, until `roaring_bitmap_repair_after_lazy` is called.
+ *
+ * If reason is non-null, it will be set to a string describing the first inconsistency found if any.
+ */
+bool roaring_bitmap_internal_validate(const roaring_bitmap_t *r, const char **reason);
+
 /*********************
 * What follows is code use to iterate through values in a roaring bitmap
 
diff --git a/src/containers/array.c b/src/containers/array.c
index 6c0ffd2c7..e74b4f62e 100644
--- a/src/containers/array.c
+++ b/src/containers/array.c
@@ -444,6 +444,46 @@ void array_container_printf_as_uint32_array(const array_container_t *v,
     }
 }
 
+/*
+ * Validate the container. Returns true if valid.
+ */
+bool array_container_validate(const array_container_t *v, const char **reason) {
+    if (v->capacity < 0) {
+        *reason = "negative capacity";
+        return false;
+    }
+    if (v->cardinality < 0) {
+        *reason = "negative cardinality";
+        return false;
+    }
+    if (v->cardinality > v->capacity) {
+        *reason = "cardinality exceeds capacity";
+        return false;
+    }
+    if (v->cardinality > DEFAULT_MAX_SIZE) {
+        *reason = "cardinality exceeds DEFAULT_MAX_SIZE";
+        return false;
+    }
+    if (v->cardinality == 0) {
+        return true;
+    }
+
+    if (v->array == NULL) {
+        *reason = "NULL array pointer";
+        return false;
+    }
+    uint16_t prev = v->array[0];
+    for (int i = 1; i < v->cardinality; ++i) {
+        if (v->array[i] <= prev) {
+            *reason = "array elements not strictly increasing";
+            return false;
+        }
+        prev = v->array[i];
+    }
+
+    return true;
+}
+
 /* Compute the number of runs */
 int32_t array_container_number_of_runs(const array_container_t *ac) {
     // Can SIMD work here?
diff --git a/src/containers/bitset.c b/src/containers/bitset.c
index 456d807ce..722eda1e3 100644
--- a/src/containers/bitset.c
+++ b/src/containers/bitset.c
@@ -1000,6 +1000,26 @@ void bitset_container_printf_as_uint32_array(const bitset_container_t * v, uint3
 	}
 }
 
+/*
+ * Validate the container. Returns true if valid.
+ */
+bool bitset_container_validate(const bitset_container_t *v, const char **reason) {
+    if (v->words == NULL) {
+        *reason = "words is NULL";
+        return false;
+    }
+    if (v->cardinality != bitset_container_compute_cardinality(v)) {
+        *reason = "cardinality is incorrect";
+        return false;
+    }
+    // Attempt to forcibly load the first and last words, hopefully causing
+    // a segfault or an address sanitizer error if words is not allocated.
+    volatile uint64_t *words = v->words;
+    (void) words[0];
+    (void) words[BITSET_CONTAINER_SIZE_IN_WORDS - 1];
+    return true;
+}
+
 
 // TODO: use the fast lower bound, also
 int bitset_container_number_of_runs(bitset_container_t *bc) {
diff --git a/src/containers/containers.c b/src/containers/containers.c
index 9bbd758c8..78a72db58 100644
--- a/src/containers/containers.c
+++ b/src/containers/containers.c
@@ -95,6 +95,43 @@ void container_printf_as_uint32_array(
     }
 }
 
+bool container_internal_validate(const container_t *container,
+                                 uint8_t typecode, const char **reason) {
+    if (container == NULL) {
+        *reason = "container is NULL";
+        return false;
+    }
+    // Not using container_unwrap_shared because it asserts if shared containers are nested
+    if (typecode == SHARED_CONTAINER_TYPE) {
+        const shared_container_t *shared_container = const_CAST_shared(container);
+        if (croaring_refcount_get(&shared_container->counter) == 0) {
+            *reason = "shared container has zero refcount";
+            return false;
+        }
+        if (shared_container->typecode == SHARED_CONTAINER_TYPE) {
+            *reason = "shared container is nested";
+            return false;
+        }
+        if (shared_container->container == NULL) {
+            *reason = "shared container has NULL container";
+            return false;
+        }
+        container = shared_container->container;
+        typecode = shared_container->typecode;
+    }
+    switch (typecode) {
+        case BITSET_CONTAINER_TYPE:
+            return bitset_container_validate(const_CAST_bitset(container), reason);
+        case ARRAY_CONTAINER_TYPE:
+            return array_container_validate(const_CAST_array(container), reason);
+        case RUN_CONTAINER_TYPE:
+            return run_container_validate(const_CAST_run(container), reason);
+        default:
+            *reason = "invalid typecode";
+            return false;
+    }
+}
+
 extern inline bool container_nonzero_cardinality(
         const container_t *c, uint8_t typecode);
 
diff --git a/src/containers/run.c b/src/containers/run.c
index 31203a64d..248432cb3 100644
--- a/src/containers/run.c
+++ b/src/containers/run.c
@@ -680,6 +680,54 @@ void run_container_printf_as_uint32_array(const run_container_t *cont,
     }
 }
 
+/*
+ * Validate the container. Returns true if valid.
+ */
+bool run_container_validate(const run_container_t *run, const char **reason) {
+    if (run->n_runs < 0) {
+        *reason = "negative run count";
+        return false;
+    }
+    if (run->capacity < 0) {
+        *reason = "negative run capacity";
+        return false;
+    }
+    if (run->capacity < run->n_runs) {
+        *reason = "capacity less than run count";
+        return false;
+    }
+
+    if (run->n_runs == 0) {
+        return true;
+    }
+    if (run->runs == NULL) {
+        *reason = "NULL runs";
+        return false;
+    }
+
+    // Use uint32_t to avoid overflow issues on ranges that contain UINT16_MAX.
+    uint32_t last_end = 0;
+    for (int i = 0; i < run->n_runs; ++i) {
+        uint32_t start = run->runs[i].value;
+        uint32_t end = start + run->runs[i].length + 1;
+        if (end <= start) {
+            *reason = "run start + length overflow";
+            return false;
+        }
+
+        if (start < last_end) {
+            *reason = "run start less than last end";
+            return false;
+        }
+        if (start == last_end && last_end != 0) {
+            *reason = "run start equal to last end, should have combined";
+            return false;
+        }
+        last_end = end;
+    }
+    return true;
+}
+
 int32_t run_container_write(const run_container_t *container, char *buf) {
     uint16_t cast_16 = container->n_runs;
     memcpy(buf, &cast_16, sizeof(uint16_t));
diff --git a/src/roaring.c b/src/roaring.c
index 1d8c5161d..843d0ae98 100644
--- a/src/roaring.c
+++ b/src/roaring.c
@@ -425,6 +425,76 @@ void roaring_bitmap_statistics(const roaring_bitmap_t *r,
     }
 }
 
+/*
+ * Checks that:
+ * - Array containers are sorted and contain no duplicates
+ * - Range containers are sorted and contain no overlapping ranges
+ * - Roaring containers are sorted by key and there are no duplicate keys
+ * - The correct container type is use for each container (e.g. bitmaps aren't used for small containers)
+ */
+bool roaring_bitmap_internal_validate(const roaring_bitmap_t *r, const char **reason) {
+    const char *reason_local;
+    if (reason == NULL) {
+        // Always allow assigning through *reason
+        reason = &reason_local;
+    }
+    *reason = NULL;
+    const roaring_array_t *ra = &r->high_low_container;
+    if (ra->size < 0) {
+        *reason = "negative size";
+        return false;
+    }
+    if (ra->allocation_size < 0) {
+        *reason = "negative allocation size";
+        return false;
+    }
+    if (ra->size > ra->allocation_size) {
+        *reason = "more containers than allocated space";
+        return false;
+    }
+    if (ra->flags & ~(ROARING_FLAG_COW | ROARING_FLAG_FROZEN)) {
+        *reason = "invalid flags";
+        return false;
+    }
+    if (ra->size == 0) {
+        return true;
+    }
+
+    if (ra->keys == NULL) {
+        *reason = "keys is NULL";
+        return false;
+    }
+    if (ra->typecodes == NULL) {
+        *reason = "typecodes is NULL";
+        return false;
+    }
+    if (ra->containers == NULL) {
+        *reason = "containers is NULL";
+        return false;
+    }
+
+    uint32_t prev_key = ra->keys[0];
+    for (int32_t i = 1; i < ra->size; ++i) {
+        if (ra->keys[i] <= prev_key) {
+            *reason = "keys not strictly increasing";
+            return false;
+        }
+        prev_key = ra->keys[i];
+    }
+
+    for (int32_t i = 0; i < ra->size; ++i) {
+        if (!container_internal_validate(ra->containers[i], ra->typecodes[i], reason)) {
+            // reason should already be set
+            if (*reason == NULL) {
+                *reason = "container failed to validate but no reason given";
+            }
+            return false;
+        }
+    }
+
+    return true;
+}
+
 roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r) {
     roaring_bitmap_t *ans =
         (roaring_bitmap_t *)roaring_malloc(sizeof(roaring_bitmap_t));
diff --git a/tests/test.h b/tests/test.h
index f4d1fe4b0..a71d94c4e 100644
--- a/tests/test.h
+++ b/tests/test.h
@@ -27,6 +27,12 @@
 
 #define DESCRIBE_TEST fprintf(stderr, "--- %s\n", __func__)
 
+#define assert_bitmap_validate(b) do {                                       \
+        const char *internal_reason_buf = NULL;                              \
+        if (!roaring_bitmap_internal_validate((b), &internal_reason_buf)) {  \
+            fail_msg("internal validation failed: %s", internal_reason_buf); \
+        }                                                                    \
+    } while (0)
 
 // The "cmocka" test functions are supposed to look like:
 //
diff --git a/tests/toplevel_unit.c b/tests/toplevel_unit.c
index 274a20bbb..4d50fc51d 100644
--- a/tests/toplevel_unit.c
+++ b/tests/toplevel_unit.c
@@ -692,11 +692,13 @@ void test_example(bool copy_on_write) {
     // create a new empty bitmap
     roaring_bitmap_t *r1 = roaring_bitmap_create();
     roaring_bitmap_set_copy_on_write(r1, copy_on_write);
+    assert_bitmap_validate(r1);
     assert_non_null(r1);
 
     // then we can add values
     for (uint32_t i = 100; i < 1000; i++) {
         roaring_bitmap_add(r1, i);
+        assert_bitmap_validate(r1);
     }
 
     // check whether a value is contained
@@ -710,6 +712,7 @@ void test_example(bool copy_on_write) {
     // run_optimize
     uint32_t size = roaring_bitmap_portable_size_in_bytes(r1);
     roaring_bitmap_run_optimize(r1);
+    assert_bitmap_validate(r1);
     uint32_t compact_size = roaring_bitmap_portable_size_in_bytes(r1);
 
     printf("size before run optimize %d bytes, and after %d bytes\n", size,
@@ -717,6 +720,7 @@ void test_example(bool copy_on_write) {
 
     // create a new bitmap with varargs
     roaring_bitmap_t *r2 = roaring_bitmap_of(5, 1, 2, 3, 5, 6);
+    assert_bitmap_validate(r2);
     assert_non_null(r2);
 
     roaring_bitmap_printf(r2);
@@ -725,6 +729,7 @@ void test_example(bool copy_on_write) {
     const uint32_t values[] = {2, 3, 4};
     roaring_bitmap_t *r3 = roaring_bitmap_of_ptr(3, values);
     roaring_bitmap_set_copy_on_write(r3, copy_on_write);
+    assert_bitmap_validate(r3);
 
     // we can also go in reverse and go from arrays to bitmaps
     uint64_t card1 = roaring_bitmap_get_cardinality(r1);
@@ -742,6 +747,7 @@ void test_example(bool copy_on_write) {
 
 
     roaring_bitmap_t *r1f = roaring_bitmap_of_ptr(card1, arr1);
+    assert_bitmap_validate(r1f);
     free(arr1);
     assert_non_null(r1f);
 
@@ -752,12 +758,14 @@ void test_example(bool copy_on_write) {
     // we can copy and compare bitmaps
     roaring_bitmap_t *z = roaring_bitmap_copy(r3);
     roaring_bitmap_set_copy_on_write(z, copy_on_write);
+    assert_bitmap_validate(z);
     assert_true(roaring_bitmap_equals(r3, z));
 
     roaring_bitmap_free(z);
 
     // we can compute union two-by-two
     roaring_bitmap_t *r1_2_3 = roaring_bitmap_or(r1, r2);
+    assert_bitmap_validate(r1_2_3);
     assert_true(roaring_bitmap_get_cardinality(r1_2_3) ==
                 roaring_bitmap_or_cardinality(r1, r2));
 
@@ -767,9 +775,11 @@ void test_example(bool copy_on_write) {
     // we can compute a big union
     const roaring_bitmap_t *allmybitmaps[] = {r1, r2, r3};
     roaring_bitmap_t *bigunion = roaring_bitmap_or_many(3, allmybitmaps);
+    assert_bitmap_validate(bigunion);
     assert_true(roaring_bitmap_equals(r1_2_3, bigunion));
     roaring_bitmap_t *bigunionheap =
         roaring_bitmap_or_many_heap(3, allmybitmaps);
+    assert_bitmap_validate(bigunionheap);
     assert_true(roaring_bitmap_equals(r1_2_3, bigunionheap));
     roaring_bitmap_free(r1_2_3);
     roaring_bitmap_free(bigunion);
@@ -778,11 +788,13 @@ void test_example(bool copy_on_write) {
     // we can compute xor two-by-two
     roaring_bitmap_t *rx1_2_3 = roaring_bitmap_xor(r1, r2);
     roaring_bitmap_set_copy_on_write(rx1_2_3, copy_on_write);
+    assert_bitmap_validate(rx1_2_3);
     roaring_bitmap_xor_inplace(rx1_2_3, r3);
 
     // we can compute a big xor
     const roaring_bitmap_t *allmybitmaps_x[] = {r1, r2, r3};
     roaring_bitmap_t *bigxor = roaring_bitmap_xor_many(3, allmybitmaps_x);
+    assert_bitmap_validate(bigxor);
     assert_true(roaring_bitmap_equals(rx1_2_3, bigxor));
 
     roaring_bitmap_free(rx1_2_3);
@@ -790,6 +802,7 @@ void test_example(bool copy_on_write) {
 
     // we can compute intersection two-by-two
     roaring_bitmap_t *i1_2 = roaring_bitmap_and(r1, r2);
+    assert_bitmap_validate(i1_2);
     assert_true(roaring_bitmap_get_cardinality(i1_2) ==
                 roaring_bitmap_and_cardinality(r1, r2));
 
@@ -801,6 +814,7 @@ void test_example(bool copy_on_write) {
     size_t actualsize = roaring_bitmap_portable_serialize(r1, serializedbytes);
     assert_int_equal(actualsize, expectedsize);
     roaring_bitmap_t *t = roaring_bitmap_portable_deserialize(serializedbytes);
+    assert_bitmap_validate(t);
     assert_true(roaring_bitmap_equals(r1, t));
     roaring_bitmap_free(t);
      // we can also check whether there is a bitmap at a memory location without reading it
@@ -808,6 +822,7 @@ void test_example(bool copy_on_write) {
     assert_true(sizeofbitmap == expectedsize);  // sizeofbitmap would be zero if no bitmap were found
     // we can also read the bitmap "safely" by specifying a byte size limit:
     t = roaring_bitmap_portable_deserialize_safe(serializedbytes,expectedsize);
+    assert_bitmap_validate(t);
     assert_true(roaring_bitmap_equals(r1, t));  // what we recover is equal
     roaring_bitmap_free(t);
     free(serializedbytes);
@@ -872,6 +887,7 @@ void test_uint32_iterator(bool run) {
         roaring_bitmap_add(r1, i);
     }
     if(run) roaring_bitmap_run_optimize(r1);
+    assert_bitmap_validate(r1);
     roaring_uint32_iterator_t *iter = roaring_create_iterator(r1);
     for (uint32_t i = 0; i < 66000; i += 3) {
         assert_true(iter->has_value);
@@ -1020,6 +1036,7 @@ DEFINE_TEST(test_addremove) {
     for (uint32_t value = 33057; value < 147849; value += 8) {
         roaring_bitmap_remove(bm, value);
     }
+    assert_bitmap_validate(bm);
     assert_true(roaring_bitmap_is_empty(bm));
     roaring_bitmap_free(bm);
 }
@@ -1033,6 +1050,7 @@ DEFINE_TEST(test_addremove_bulk) {
     for (uint32_t value = 33057; value < 147849; value += 8) {
         assert_true(roaring_bitmap_remove_checked(bm, value));
     }
+    assert_bitmap_validate(bm);
     assert_true(roaring_bitmap_is_empty(bm));
     roaring_bitmap_free(bm);
 }
@@ -1046,6 +1064,7 @@ DEFINE_TEST(test_addremoverun) {
     for (uint32_t value = 33057; value < 147849; value += 8) {
         roaring_bitmap_remove(bm, value);
     }
+    assert_bitmap_validate(bm);
     assert_true(roaring_bitmap_is_empty(bm));
     roaring_bitmap_free(bm);
 }
@@ -1081,6 +1100,8 @@ bool check_bitmap_from_range(uint32_t min, uint64_t max, uint32_t step) {
     for (uint32_t value = min; value < max; value += step) {
         roaring_bitmap_add(expected, value);
     }
+    assert_bitmap_validate(result);
+    assert_bitmap_validate(expected);
     bool is_equal = roaring_bitmap_equals(expected, result);
     if (!is_equal) {
         fprintf(stderr, "[ERROR] check_bitmap_from_range(%u, %u, %u)\n",
@@ -1096,6 +1117,8 @@ DEFINE_TEST(test_silly_range) {
     check_bitmap_from_range(0, 2, 1);
     roaring_bitmap_t *bm1 = roaring_bitmap_from_range(0, 1, 1);
     roaring_bitmap_t *bm2 = roaring_bitmap_from_range(0, 2, 1);
+    assert_bitmap_validate(bm1);
+    assert_bitmap_validate(bm2);
     assert_false(roaring_bitmap_equals(bm1, bm2));
     roaring_bitmap_free(bm1);
     roaring_bitmap_free(bm2);
@@ -1103,6 +1126,7 @@ DEFINE_TEST(test_silly_range) {
 
 DEFINE_TEST(test_adversarial_range) {
     roaring_bitmap_t *bm1 = roaring_bitmap_from_range(0, UINT64_C(0x100000000), 1);
+    assert_bitmap_validate(bm1);
     assert_true(roaring_bitmap_get_cardinality(bm1) == UINT64_C(0x100000000));
     roaring_bitmap_free(bm1);
 }
@@ -1152,6 +1176,7 @@ DEFINE_TEST(test_bitmap_from_range) {
 DEFINE_TEST(test_printf) {
     roaring_bitmap_t *r1 =
         roaring_bitmap_of(8, 1, 2, 3, 100, 1000, 10000, 1000000, 20000000);
+    assert_bitmap_validate(r1);
     assert_non_null(r1);
     roaring_bitmap_printf(r1);
     roaring_bitmap_free(r1);
@@ -1178,6 +1203,7 @@ DEFINE_TEST(test_printf_withrun) {
     for (int i = 100, top_val = 200; i < top_val; i++)
         roaring_bitmap_add(r1, i);
     roaring_bitmap_run_optimize(r1);
+    assert_bitmap_validate(r1);
     roaring_bitmap_printf(r1);  // does it crash?
     roaring_bitmap_free(r1);
     printf("\n");

From aa5377bf1eb4c983ae03500e9c30890717d6eadd Mon Sep 17 00:00:00 2001
From: Jason King <jasonbking@users.noreply.github.com>
Date: Tue, 18 Jul 2023 09:22:17 -0500
Subject: [PATCH 147/162] Define __EXTENSIONS__ on illumos (#498)

---
 include/roaring/portability.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/roaring/portability.h b/include/roaring/portability.h
index 162d49016..fba2a4b25 100644
--- a/include/roaring/portability.h
+++ b/include/roaring/portability.h
@@ -59,6 +59,10 @@
 #define _XOPEN_SOURCE 700
 #endif // !(defined(_XOPEN_SOURCE)) || (_XOPEN_SOURCE < 700)
 
+#ifdef __illumos__
+#define __EXTENSIONS__
+#endif
+
 #include <stdbool.h>
 #include <stdint.h>
 #include <stdlib.h>  // will provide posix_memalign with _POSIX_C_SOURCE as defined above

From 9eb2a3595fd37cdd74fc2ddbe7170c2eff1b3ca9 Mon Sep 17 00:00:00 2001
From: Zachary Dremann <dremann@gmail.com>
Date: Sat, 5 Aug 2023 11:28:32 -0400
Subject: [PATCH 148/162] Keep capacity sane when creating a bitmap with a
 capacity (#499)

* Keep capacity sane when creating a bitmap with a capacity

The previous code would happily accept and create _huge_ allocations
(up to 22 GiB) for up to 2 billion containers. However, we know the maximum
number of containers possible, so we instead clamp to that value.

This leads to two changes:
- When requesting more than 2^16 containers, we will only allocate space for
  2^16 containers
- When requesting more than 2^31 containers, we will no longer error

* Add a unit test that we allow huge capacities
---
 src/roaring_array.c   | 5 ++++-
 tests/toplevel_unit.c | 8 ++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/roaring_array.c b/src/roaring_array.c
index bff88f39b..b547cf85b 100644
--- a/src/roaring_array.c
+++ b/src/roaring_array.c
@@ -76,7 +76,10 @@ bool ra_init_with_capacity(roaring_array_t *new_ra, uint32_t cap) {
     if (!new_ra) return false;
     ra_init(new_ra);
 
-    if (cap > INT32_MAX) { return false; }
+    // Containers hold 64Ki elements, so 64Ki containers is enough to hold `0x10000 * 0x10000` (all 2^32) elements
+    if (cap > 0x10000) {
+        cap = 0x10000;
+    }
 
     if(cap > 0) {
       void *bigalloc = roaring_malloc(cap *
diff --git a/tests/toplevel_unit.c b/tests/toplevel_unit.c
index 4d50fc51d..9697e2d28 100644
--- a/tests/toplevel_unit.c
+++ b/tests/toplevel_unit.c
@@ -532,6 +532,13 @@ DEFINE_TEST(test_stats) {
     roaring_bitmap_free(r1);
 }
 
+DEFINE_TEST(with_huge_capacity) {
+    roaring_bitmap_t *r = roaring_bitmap_create_with_capacity(UINT32_MAX);
+    assert_non_null(r);
+    assert_int_equal(r->high_low_container.allocation_size, (1 << 16));
+    roaring_bitmap_free(r);
+}
+
 // this should expose memory leaks
 // (https://github.com/RoaringBitmap/CRoaring/pull/70)
 void leaks_with_empty(bool copy_on_write) {
@@ -4623,6 +4630,7 @@ int main() {
         cmocka_unit_test(test_silly_range),
         cmocka_unit_test(test_uint32_iterator_true),
         cmocka_unit_test(test_uint32_iterator_false),
+        cmocka_unit_test(with_huge_capacity),
         cmocka_unit_test(leaks_with_empty_true),
         cmocka_unit_test(leaks_with_empty_false),
         cmocka_unit_test(test_bitmap_from_range),

From 9778794423649fd6b793b942084f5586a3638433 Mon Sep 17 00:00:00 2001
From: Zachary Dremann <dremann@gmail.com>
Date: Mon, 7 Aug 2023 23:09:10 -0400
Subject: [PATCH 149/162] Add more docs to `roaring_bitmap_overwrite` (#500)

Document the return value, and what the state will be on error

Fixes #342
---
 include/roaring/roaring.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/roaring/roaring.h b/include/roaring/roaring.h
index 6bfcd859a..b2476e7db 100644
--- a/include/roaring/roaring.h
+++ b/include/roaring/roaring.h
@@ -110,6 +110,9 @@ roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r);
  *
  * It might be preferable and simpler to call roaring_bitmap_copy except
  * that roaring_bitmap_overwrite can save on memory allocations.
+ *
+ * Returns true if successful, or false if there was an error. On failure,
+ * the dest bitmap is left in a valid, empty state (even if it was not empty before).
  */
 bool roaring_bitmap_overwrite(roaring_bitmap_t *dest,
                               const roaring_bitmap_t *src);

From 33da04d7cf922c954228488e71b915a66ecff741 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Tue, 8 Aug 2023 11:12:32 -0400
Subject: [PATCH 150/162] Update README.md

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index abdfbca56..92e8661b3 100644
--- a/README.md
+++ b/README.md
@@ -716,8 +716,8 @@ different strategies.
 You can use `roaring_bitmap_or_many(bitmapcount, bitmaps)` or `roaring_bitmap_or_many_heap(bitmapcount, bitmaps)` or you may
 even roll your own aggregation:
 
-```
-roaring_bitmap_t *answer  = roaring_bitmap_copy(bitmaps[0]);
+```C
+roaring_bitmap_t *answer = roaring_bitmap_copy(bitmaps[0]);
 for (size_t i = 1; i < bitmapcount; i++) {
   roaring_bitmap_or_inplace(answer, bitmaps[i]);
 }

From 8fc18acd32481810acc7166e3faa98be5e1b5675 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Wed, 6 Sep 2023 10:08:17 -0400
Subject: [PATCH 151/162] Fixing issue 501 (#502)

---
 .github/workflows/macos-ci.yml | 9 +++++++++
 CMakeLists.txt                 | 1 -
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/macos-ci.yml b/.github/workflows/macos-ci.yml
index b1a7067fa..c97c5160f 100644
--- a/.github/workflows/macos-ci.yml
+++ b/.github/workflows/macos-ci.yml
@@ -22,6 +22,15 @@ jobs:
           ctest . --output-on-failure
           cmake --install . 
           cd ../tests/installation/find && mkdir build && cd build && cmake -DCMAKE_INSTALL_PREFIX:PATH=../../../build/destination .. &&  cmake --build .
+      - name: Build and Test (shared)
+        run: |
+          cmake -DBUILD_SHARED_LIBS=ON -B buildshared -DCMAKE_INSTALL_PREFIX:PATH=destinationshared
+          cmake --build buildshared
+          cmake --install buildshared
+          cd tests/installation/find
+          cmake -DCMAKE_INSTALL_PREFIX:PATH=../../../destinationshared -B buildshared
+          cmake --build buildshared
+          ./buildshared/repro
       - name: Build and Test Debug
         run: |
           mkdir builddebug
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0bc46919e..7a07c9d87 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -5,7 +5,6 @@ project(RoaringBitmap
 )
 include(GNUInstallDirs)
 
-set(CMAKE_MACOSX_RPATH OFF)
 if (NOT CMAKE_BUILD_TYPE)
                 message(STATUS "No build type selected, default to Release")
                 set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE)

From f533a3c2d1f6c596448cf192b31dd9d1a362b869 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Tue, 19 Sep 2023 18:58:19 -0400
Subject: [PATCH 152/162] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 92e8661b3..6201f00d7 100644
--- a/README.md
+++ b/README.md
@@ -57,7 +57,7 @@ of the latest hardware. Roaring bitmaps are already available on a variety of pl
 
 - Linux, macOS, FreeBSD, Windows (MSYS2 and Microsoft Visual studio).
 - We test the library with ARM, x64/x86 and POWER processors. We only support little endian systems (big endian systems are vanishingly rare).
-- Recent C compiler supporting the C11 standard (GCC 7 or better, LLVM 7.0 or better, Xcode 11 or better), there is also an optional C++ class that requires a C++ compiler supporting the C++11 standard.
+- Recent C compiler supporting the C11 standard (GCC 7 or better, LLVM 7.0 or better, Xcode 11 or better, Microsoft Visual Studio 2022 or better), there is also an optional C++ class that requires a C++ compiler supporting the C++11 standard.
 - CMake (to contribute to the project, users can rely on amalgamation/unity builds if they do not wish to use CMake).
 - Under x64 systems, the library provides runtime dispatch so that optimized functions are called based on the detected CPU features. It works with GCC, clang (version 9 and up) and Visual Studio (2017 and up). Other systems (e.g., ARM) do not need runtime dispatch.
 

From 04340584125ada36351556040cf8be2634e31870 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <dlemire@lemire.me>
Date: Tue, 19 Sep 2023 20:06:45 -0400
Subject: [PATCH 153/162] Allowing users to bypass microbenchmarks.

---
 CMakeLists.txt                 | 7 ++++++-
 microbenchmarks/CMakeLists.txt | 4 ++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 420f4c492..b520e93b4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -101,11 +101,16 @@ configure_file ("${CMAKE_CURRENT_SOURCE_DIR}/tests/config.h.in"
 add_subdirectory(src)
 if(ENABLE_ROARING_TESTS)
   if(CMAKE_SIZEOF_VOID_P EQUAL 8) # we only include the benchmarks on 64-bit systems.
-    add_subdirectory(microbenchmarks)
     add_subdirectory(benchmarks)
   endif()
   add_subdirectory(tests)
 endif()
+option(ENABLE_ROARING_MICROBENCHMARKS "Enable microbenchmarks" OFF)
+if(ENABLE_ROARING_MICROBENCHMARKS)
+    add_subdirectory(microbenchmarks)
+else()
+     MESSAGE( STATUS "You may enable microbenchmarks by setting ENABLE_ROARING_MICROBENCHMARKS to ON " )
+endif()
 # Being terse is good, but knowing how the build is configured is important
 # and should not be hard to figure out.
 MESSAGE( STATUS "CMAKE_SYSTEM_PROCESSOR: " ${CMAKE_SYSTEM_PROCESSOR})
diff --git a/microbenchmarks/CMakeLists.txt b/microbenchmarks/CMakeLists.txt
index ca8862df5..628515bbc 100644
--- a/microbenchmarks/CMakeLists.txt
+++ b/microbenchmarks/CMakeLists.txt
@@ -6,8 +6,8 @@ include(${PROJECT_SOURCE_DIR}/tools/cmake/Import.cmake)
 set_off(BENCHMARK_ENABLE_TESTING)
 set_off(BENCHMARK_ENABLE_INSTALL)
 set_off(BENCHMARK_ENABLE_WERROR)
-
-import_dependency(google_benchmarks google/benchmark f91b6b4)
+set(BENCHMARK_ENABLE_WERROR OFF)
+import_dependency(google_benchmarks google/benchmark 3441176)
 add_dependency(google_benchmarks)
 
 add_executable(bench bench.cpp)

From 7313c9d233f7b6e8463472ac59c580df9b4f3d62 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Mon, 25 Sep 2023 14:43:30 -0400
Subject: [PATCH 154/162] Reverting PR 452 (#509)

Co-authored-by: Daniel Lemire <dlemire@lemire.me>
---
 src/containers/mixed_union.c | 36 +++++-------------------------------
 1 file changed, 5 insertions(+), 31 deletions(-)

diff --git a/src/containers/mixed_union.c b/src/containers/mixed_union.c
index 00822e166..0b63c0124 100644
--- a/src/containers/mixed_union.c
+++ b/src/containers/mixed_union.c
@@ -206,37 +206,11 @@ bool array_array_container_inplace_union(
           return false;  // not a bitset
         } else {
           memmove(src_1->array + src_2->cardinality, src_1->array, src_1->cardinality * sizeof(uint16_t));
-          /*
-            Next line is safe:
-
-            We just need to focus on the reading and writing performed on array1. In `union_vector16`, both vectorized and scalar code still obey the basic rule: read from two inputs, do the union, and then write the output.
-
-            Let's say the length(cardinality) of input2 is L2:
-            ```
-                |<-  L2  ->|
-            array1: [output--- |input 1---|---]
-            array2: [input 2---]
-            ```
-            Let's define 3 __m128i pointers, `pos1` starts from `input1`, `pos2` starts from `input2`, these 2 point at the next byte to read, `out` starts from `output`, pointing at the next byte to overwrite.
-            ```
-            array1: [output--- |input 1---|---]
-                        ^          ^
-                    out        pos1
-            array2: [input 2---]
-                        ^
-                        pos2
-            ```
-            The union output always contains less or equal number of elements than all inputs added, so we have:
-            ```
-            out <= pos1 + pos2
-            ```
-            therefore:
-            ```
-            out <= pos1 + L2
-            ```
-            which means you will not overwrite data beyond pos1, so the data haven't read is safe, and we don't care the data already read.
-          */
-          src_1->cardinality = (int32_t)fast_union_uint16(src_1->array + src_2->cardinality, src_1->cardinality,
+          // In theory, we could use fast_union_uint16, but it is unsafe. It fails
+          // with Intel compilers in particular.
+          // https://github.com/RoaringBitmap/CRoaring/pull/452
+          // See report https://github.com/RoaringBitmap/CRoaring/issues/476
+          src_1->cardinality = (int32_t)union_uint16(src_1->array + src_2->cardinality, src_1->cardinality,
                                   src_2->array, src_2->cardinality, src_1->array);
           return false; // not a bitset
         }

From c506922a8fad0b7f706d14ea097688ec9e3adfc6 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <lemire@gmail.com>
Date: Mon, 25 Sep 2023 14:44:32 -0400
Subject: [PATCH 155/162] Version bump

---
 CMakeLists.txt                    | 4 ++--
 doxygen                           | 2 +-
 include/roaring/roaring_version.h | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a9c26868f..8fb34d59c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -17,8 +17,8 @@ endif()
 set(ROARING_LIB_NAME roaring)
 set(PROJECT_VERSION_MAJOR 1)
 set(PROJECT_VERSION_MINOR 3)
-set(PROJECT_VERSION_PATCH 0)
-set(ROARING_LIB_VERSION "1.3.0" CACHE STRING "Roaring library version")
+set(PROJECT_VERSION_PATCH 1)
+set(ROARING_LIB_VERSION "1.3.1" CACHE STRING "Roaring library version")
 set(ROARING_LIB_SOVERSION "12" CACHE STRING "Roaring library soversion")
 
 option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON)
diff --git a/doxygen b/doxygen
index a18002df4..171e36c95 100644
--- a/doxygen
+++ b/doxygen
@@ -48,7 +48,7 @@ PROJECT_NAME           = "CRoaring"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "1.3.0"
+PROJECT_NUMBER         = "1.3.1"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/include/roaring/roaring_version.h b/include/roaring/roaring_version.h
index 55b836a18..238d1d980 100644
--- a/include/roaring/roaring_version.h
+++ b/include/roaring/roaring_version.h
@@ -1,10 +1,10 @@
 // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand 
 #ifndef ROARING_INCLUDE_ROARING_VERSION 
 #define ROARING_INCLUDE_ROARING_VERSION 
-#define ROARING_VERSION "1.3.0"
+#define ROARING_VERSION "1.3.1"
 enum { 
     ROARING_VERSION_MAJOR = 1,
     ROARING_VERSION_MINOR = 3,
-    ROARING_VERSION_REVISION = 0
+    ROARING_VERSION_REVISION = 1
 }; 
 #endif // ROARING_INCLUDE_ROARING_VERSION 

From 0ea17f857b7d2cdd2a0b22bed5f292f5fba860da Mon Sep 17 00:00:00 2001
From: Daniel Lemire <lemire@gmail.com>
Date: Mon, 25 Sep 2023 14:46:35 -0400
Subject: [PATCH 156/162] Version bump

---
 CMakeLists.txt                    | 10 +++++-----
 doxygen                           |  2 +-
 include/roaring/roaring_version.h |  8 ++++----
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8fb34d59c..5df00fcdd 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,11 +15,11 @@ if(CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_C_COMPILER_VERSION VERSIO
     message(FATAL_ERROR "${PROJECT_NAME} requires at least apple-clang version 11 to support runtime dispatching.")
 endif()
 set(ROARING_LIB_NAME roaring)
-set(PROJECT_VERSION_MAJOR 1)
-set(PROJECT_VERSION_MINOR 3)
-set(PROJECT_VERSION_PATCH 1)
-set(ROARING_LIB_VERSION "1.3.1" CACHE STRING "Roaring library version")
-set(ROARING_LIB_SOVERSION "12" CACHE STRING "Roaring library soversion")
+set(PROJECT_VERSION_MAJOR 2)
+set(PROJECT_VERSION_MINOR 0)
+set(PROJECT_VERSION_PATCH 0)
+set(ROARING_LIB_VERSION "2.0.0" CACHE STRING "Roaring library version")
+set(ROARING_LIB_SOVERSION "13" CACHE STRING "Roaring library soversion")
 
 option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON)
 if(NOT ROARING_EXCEPTIONS)
diff --git a/doxygen b/doxygen
index 171e36c95..7adc253f4 100644
--- a/doxygen
+++ b/doxygen
@@ -48,7 +48,7 @@ PROJECT_NAME           = "CRoaring"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "1.3.1"
+PROJECT_NUMBER         = "2.0.0"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/include/roaring/roaring_version.h b/include/roaring/roaring_version.h
index 238d1d980..95d997074 100644
--- a/include/roaring/roaring_version.h
+++ b/include/roaring/roaring_version.h
@@ -1,10 +1,10 @@
 // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand 
 #ifndef ROARING_INCLUDE_ROARING_VERSION 
 #define ROARING_INCLUDE_ROARING_VERSION 
-#define ROARING_VERSION "1.3.1"
+#define ROARING_VERSION "2.0.0"
 enum { 
-    ROARING_VERSION_MAJOR = 1,
-    ROARING_VERSION_MINOR = 3,
-    ROARING_VERSION_REVISION = 1
+    ROARING_VERSION_MAJOR = 2,
+    ROARING_VERSION_MINOR = 0,
+    ROARING_VERSION_REVISION = 0
 }; 
 #endif // ROARING_INCLUDE_ROARING_VERSION 

From a103d3811702b9389c538881c9974e9a7a7552af Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Mon, 25 Sep 2023 15:18:24 -0400
Subject: [PATCH 157/162] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 6201f00d7..3839fef46 100644
--- a/README.md
+++ b/README.md
@@ -57,7 +57,7 @@ of the latest hardware. Roaring bitmaps are already available on a variety of pl
 
 - Linux, macOS, FreeBSD, Windows (MSYS2 and Microsoft Visual studio).
 - We test the library with ARM, x64/x86 and POWER processors. We only support little endian systems (big endian systems are vanishingly rare).
-- Recent C compiler supporting the C11 standard (GCC 7 or better, LLVM 7.0 or better, Xcode 11 or better, Microsoft Visual Studio 2022 or better), there is also an optional C++ class that requires a C++ compiler supporting the C++11 standard.
+- Recent C compiler supporting the C11 standard (GCC 7 or better, LLVM 7.0 or better, Xcode 11 or better, Microsoft Visual Studio 2022 or better, Intel oneAPI Compiler 2023.2 or better), there is also an optional C++ class that requires a C++ compiler supporting the C++11 standard.
 - CMake (to contribute to the project, users can rely on amalgamation/unity builds if they do not wish to use CMake).
 - Under x64 systems, the library provides runtime dispatch so that optimized functions are called based on the detected CPU features. It works with GCC, clang (version 9 and up) and Visual Studio (2017 and up). Other systems (e.g., ARM) do not need runtime dispatch.
 

From 8dc388105b5520eb122f806cc25425d8a923bdf1 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Tue, 26 Sep 2023 16:15:50 -0400
Subject: [PATCH 158/162] Improves the documentation of
 roaring_bitmap_internal_validate (#510)

---
 README.md              |   21 +-
 fuzz/croaring_fuzzer.c |   26 +-
 src/containers/array.c |    6 +-
 src/containers/run.c   |    6 +-
 src/roaring.c          |    5 +-
 src/roaring_array.c    |   45 +-
 tests/c_example1.c     |   19 +-
 tests/toplevel_unit.c  | 1045 ++++++++++++++++++++--------------------
 8 files changed, 620 insertions(+), 553 deletions(-)

diff --git a/README.md b/README.md
index 3839fef46..4f88b185e 100644
--- a/README.md
+++ b/README.md
@@ -325,12 +325,15 @@ int main() {
     uint32_t expectedsize = roaring_bitmap_portable_size_in_bytes(r1);
     char *serializedbytes = malloc(expectedsize);
     roaring_bitmap_portable_serialize(r1, serializedbytes);
-    // For additional safety, you may replace roaring_bitmap_portable_deserialize by
-    // roaring_bitmap_portable_deserialize_safe.
     // Note: it is expected that the input follows the specification
     // https://github.com/RoaringBitmap/RoaringFormatSpec
     // otherwise the result may be unusable.
-    roaring_bitmap_t *t = roaring_bitmap_portable_deserialize(serializedbytes);
+    roaring_bitmap_t *t = roaring_bitmap_portable_deserialize_safe(serializedbytes, expectedsize);
+    if(t == NULL) { return EXIT_FAILURE; }
+    const char *reason = NULL;
+    if (!roaring_bitmap_internal_validate(t, &reason)) {
+        return EXIT_FAILURE;
+    }
     assert(roaring_bitmap_equals(r1, t));  // what we recover is equal
     roaring_bitmap_free(t);
     // we can also check whether there is a bitmap at a memory location without
@@ -341,6 +344,18 @@ int main() {
            expectedsize);  // sizeofbitmap would be zero if no bitmap were found
     // we can also read the bitmap "safely" by specifying a byte size limit:
     t = roaring_bitmap_portable_deserialize_safe(serializedbytes, expectedsize);
+    if(t == NULL) {
+        printf("Problem during deserialization.\n");
+        // We could clear any memory and close any file here.
+        return EXIT_FAILURE;
+    }
+    // We can validate the bitmap we recovered to make sure it is proper.
+    const char *reason_failure = NULL;
+    if (!roaring_bitmap_internal_validate(t, &reason_failure)) {
+        printf("safely deserialized invalid bitmap: %s\n", reason_failure);
+        // We could clear any memory and close any file here.
+        return EXIT_FAILURE;
+    }
     // It is still necessary for the content of seriallizedbytes to follow
     // the standard: https://github.com/RoaringBitmap/RoaringFormatSpec
     // This is guaranted when calling 'roaring_bitmap_portable_deserialize'.
diff --git a/fuzz/croaring_fuzzer.c b/fuzz/croaring_fuzzer.c
index 27f7aa3b5..10a5391a5 100644
--- a/fuzz/croaring_fuzzer.c
+++ b/fuzz/croaring_fuzzer.c
@@ -15,15 +15,35 @@
 ////////////////////////////////////////////////////////////////////////////////
 
 #include <stdint.h>
-#include <string.h>
 #include <stdlib.h>
+#include <string.h>
+
 #include "roaring/roaring.h"
 
 int LLVMFuzzerTestOneInput(const char *data, size_t size) {
     // We test that deserialization never fails.
-    roaring_bitmap_t* bitmap = roaring_bitmap_portable_deserialize_safe(data, size);
-    if(bitmap) {
+    roaring_bitmap_t *bitmap =
+        roaring_bitmap_portable_deserialize_safe(data, size);
+    if (bitmap) {
         // The bitmap may not be usable if it does not follow the specification.
+        // We can validate the bitmap we recovered to make sure it is proper.
+        const char *reason_failure = NULL;
+        if (roaring_bitmap_internal_validate(t, &reason_failure)) {
+            // the bitmap is ok!
+            uint32_t cardinality = roaring_bitmap_get_cardinality(t);
+
+            for (uint32_t i = 100; i < 1000; i++) {
+                if (!roaring_bitmap_contains(t, i)) {
+                    cardinality++;
+                    roaring_bitmap_add(r1, i);
+                }
+            }
+            uint32_t new_cardinality = roaring_bitmap_get_cardinality(t);
+            if (cardinality != new_cardinality) {
+                printf("bug\n");
+                exit(1);
+            }
+        }
         roaring_bitmap_free(bitmap);
     }
     return 0;
diff --git a/src/containers/array.c b/src/containers/array.c
index e74b4f62e..0816460ba 100644
--- a/src/containers/array.c
+++ b/src/containers/array.c
@@ -172,11 +172,7 @@ void array_container_grow(array_container_t *container, int32_t min,
         container->array = (uint16_t *)roaring_malloc(new_capacity * sizeof(uint16_t));
     }
 
-    //  handle the case where realloc fails
-    if (container->array == NULL) {
-      fprintf(stderr, "could not allocate memory\n");
-    }
-    assert(container->array != NULL);
+    // if realloc fails, we have container->array == NULL.
 }
 
 /* Copy one container into another. We assume that they are distinct. */
diff --git a/src/containers/run.c b/src/containers/run.c
index 248432cb3..cbcd040d3 100644
--- a/src/containers/run.c
+++ b/src/containers/run.c
@@ -208,11 +208,7 @@ void run_container_grow(run_container_t *run, int32_t min, bool copy) {
         }
         run->runs = (rle16_t *)roaring_malloc(run->capacity * sizeof(rle16_t));
     }
-    // handle the case where realloc fails
-    if (run->runs == NULL) {
-      fprintf(stderr, "could not allocate memory\n");
-    }
-    assert(run->runs != NULL);
+    // We may have run->runs == NULL.
 }
 
 /* copy one container into another */
diff --git a/src/roaring.c b/src/roaring.c
index 843d0ae98..c863aa531 100644
--- a/src/roaring.c
+++ b/src/roaring.c
@@ -1505,7 +1505,10 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, size
     }
     size_t bytesread;
     bool is_ok = ra_portable_deserialize(&ans->high_low_container, buf, maxbytes, &bytesread);
-    if(is_ok) assert(bytesread <= maxbytes);
+    if (!is_ok) {
+        roaring_free(ans);
+        return NULL;
+    }
     roaring_bitmap_set_copy_on_write(ans, false);
     if (!is_ok) {
         roaring_free(ans);
diff --git a/src/roaring_array.c b/src/roaring_array.c
index b547cf85b..7924aaeda 100644
--- a/src/roaring_array.c
+++ b/src/roaring_array.c
@@ -635,7 +635,7 @@ size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes) {
         buf += sizeof(uint32_t);
     }
     if (size > (1<<16)) {
-       return 0; // logically impossible
+       return 0;
     }
     char *bitmapOfRunContainers = NULL;
     bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE;
@@ -694,15 +694,15 @@ size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes) {
     return bytestotal;
 }
 
-// this function populates answer from the content of buf (reading up to maxbytes bytes).
+// This function populates answer from the content of buf (reading up to maxbytes bytes).
 // The function returns false if a properly serialized bitmap cannot be found.
-// if it returns true, readbytes is populated by how many bytes were read, we have that *readbytes <= maxbytes.
+// If it returns true, readbytes is populated by how many bytes were read, we have that *readbytes <= maxbytes.
 //
 // This function is endian-sensitive.
 bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const size_t maxbytes, size_t * readbytes) {
     *readbytes = sizeof(int32_t);// for cookie
     if(*readbytes > maxbytes) {
-      fprintf(stderr, "Ran out of bytes while reading first 4 bytes.\n");
+      // Ran out of bytes while reading first 4 bytes.
       return false;
     }
     uint32_t cookie;
@@ -710,8 +710,7 @@ bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const siz
     buf += sizeof(uint32_t);
     if ((cookie & 0xFFFF) != SERIAL_COOKIE &&
         cookie != SERIAL_COOKIE_NO_RUNCONTAINER) {
-        fprintf(stderr, "I failed to find one of the right cookies. Found %" PRIu32 "\n",
-                cookie);
+        // "I failed to find one of the right cookies. 
         return false;
     }
     int32_t size;
@@ -721,21 +720,19 @@ bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const siz
     else {
         *readbytes += sizeof(int32_t);
         if(*readbytes > maxbytes) {
-          fprintf(stderr, "Ran out of bytes while reading second part of the cookie.\n");
+          // Ran out of bytes while reading second part of the cookie.
           return false;
         }
         memcpy(&size, buf, sizeof(int32_t));
         buf += sizeof(uint32_t);
     }
     if (size < 0) {
-       fprintf(stderr, "You cannot have a negative number of containers, the data must be corrupted: %" PRId32 "\n",
-                size);
-       return false; // logically impossible
+       // You cannot have a negative number of containers, the data must be corrupted.
+       return false;
     }
     if (size > (1<<16)) {
-       fprintf(stderr, "You cannot have so many containers, the data must be corrupted: %" PRId32 "\n",
-                size);
-       return false; // logically impossible
+       // You cannot have so many containers, the data must be corrupted.
+       return false;
     }
     const char *bitmapOfRunContainers = NULL;
     bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE;
@@ -743,7 +740,7 @@ bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const siz
         int32_t s = (size + 7) / 8;
         *readbytes += s;
         if(*readbytes > maxbytes) {// data is corrupted?
-          fprintf(stderr, "Ran out of bytes while reading run bitmap.\n");
+          // Ran out of bytes while reading run bitmap.
           return false;
         }
         bitmapOfRunContainers = buf;
@@ -753,14 +750,14 @@ bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const siz
 
     *readbytes += size * 2 * sizeof(uint16_t);
     if(*readbytes > maxbytes) {
-      fprintf(stderr, "Ran out of bytes while reading key-cardinality array.\n");
+      // Ran out of bytes while reading key-cardinality array.
       return false;
     }
     buf += size * 2 * sizeof(uint16_t);
 
     bool is_ok = ra_init_with_capacity(answer, size);
     if (!is_ok) {
-        fprintf(stderr, "Failed to allocate memory for roaring array. Bailing out.\n");
+        // Failed to allocate memory for roaring array. Bailing out.
         return false;
     }
 
@@ -772,7 +769,7 @@ bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const siz
     if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) {
         *readbytes += size * 4;
         if(*readbytes > maxbytes) {// data is corrupted?
-          fprintf(stderr, "Ran out of bytes while reading offsets.\n");
+          // Ran out of bytes while reading offsets.
           ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
           return false;
         }
@@ -798,14 +795,14 @@ bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const siz
             size_t containersize = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
             *readbytes += containersize;
             if(*readbytes > maxbytes) {
-              fprintf(stderr, "Running out of bytes while reading a bitset container.\n");
+              // Running out of bytes while reading a bitset container.
               ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
               return false;
             }
             // it is now safe to read
             bitset_container_t *c = bitset_container_create();
             if(c == NULL) {// memory allocation failure
-              fprintf(stderr, "Failed to allocate memory for a bitset container.\n");
+              // Failed to allocate memory for a bitset container.
               ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
               return false;
             }
@@ -817,7 +814,7 @@ bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const siz
             // we check that the read is allowed
             *readbytes += sizeof(uint16_t);
             if(*readbytes > maxbytes) {
-              fprintf(stderr, "Running out of bytes while reading a run container (header).\n");
+              // Running out of bytes while reading a run container (header).
               ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
               return false;
             }
@@ -826,7 +823,7 @@ bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const siz
             size_t containersize = n_runs * sizeof(rle16_t);
             *readbytes += containersize;
             if(*readbytes > maxbytes) {// data is corrupted?
-              fprintf(stderr, "Running out of bytes while reading a run container.\n");
+              // Running out of bytes while reading a run container.
               ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
               return false;
             }
@@ -834,7 +831,7 @@ bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const siz
 
             run_container_t *c = run_container_create();
             if(c == NULL) {// memory allocation failure
-              fprintf(stderr, "Failed to allocate memory for a run container.\n");
+              // Failed to allocate memory for a run container.
               ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
               return false;
             }
@@ -847,7 +844,7 @@ bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const siz
             size_t containersize = thiscard * sizeof(uint16_t);
             *readbytes += containersize;
             if(*readbytes > maxbytes) {// data is corrupted?
-              fprintf(stderr, "Running out of bytes while reading an array container.\n");
+              // Running out of bytes while reading an array container.
               ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
               return false;
             }
@@ -855,7 +852,7 @@ bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const siz
             array_container_t *c =
                 array_container_create_given_capacity(thiscard);
             if(c == NULL) {// memory allocation failure
-              fprintf(stderr, "Failed to allocate memory for an array container.\n");
+              // Failed to allocate memory for an array container.
               ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
               return false;
             }
diff --git a/tests/c_example1.c b/tests/c_example1.c
index 7e92065f2..dfc594061 100644
--- a/tests/c_example1.c
+++ b/tests/c_example1.c
@@ -86,7 +86,12 @@ int main() {
     uint32_t expectedsize = roaring_bitmap_portable_size_in_bytes(r1);
     char *serializedbytes = (char*)malloc(expectedsize);
     roaring_bitmap_portable_serialize(r1, serializedbytes);
-    roaring_bitmap_t *t = roaring_bitmap_portable_deserialize(serializedbytes);
+    roaring_bitmap_t *t = roaring_bitmap_portable_deserialize_safe(serializedbytes, expectedsize);
+    if(t == NULL) { return EXIT_FAILURE; }
+    const char *reason = NULL;
+    if (!roaring_bitmap_internal_validate(t, &reason)) {
+        return EXIT_FAILURE;
+    }
     assert_true(roaring_bitmap_equals(r1, t));  // what we recover is equal
     roaring_bitmap_free(t);
     // we can also check whether there is a bitmap at a memory location without
@@ -98,6 +103,18 @@ int main() {
            expectedsize);  // sizeofbitmap would be zero if no bitmap were found
     // we can also read the bitmap "safely" by specifying a byte size limit:
     t = roaring_bitmap_portable_deserialize_safe(serializedbytes, expectedsize);
+    if(t == NULL) {
+        printf("Problem during deserialization.\n");
+        // We could clear any memory and close any file here.
+        return EXIT_FAILURE;
+    }
+    // We can validate the bitmap we recovered to make sure it is proper.
+    const char *reason_failure = NULL;
+    if (!roaring_bitmap_internal_validate(t, &reason_failure)) {
+        printf("safely deserialized invalid bitmap: %s\n", reason_failure);
+        // We could clear any memory and close any file here.
+        return EXIT_FAILURE;
+    }
     assert_true(roaring_bitmap_equals(r1, t));  // what we recover is equal
     roaring_bitmap_free(t);
 
diff --git a/tests/toplevel_unit.c b/tests/toplevel_unit.c
index 9697e2d28..532ee0721 100644
--- a/tests/toplevel_unit.c
+++ b/tests/toplevel_unit.c
@@ -1,27 +1,26 @@
 #include <assert.h>
+#include <roaring/misc/configreport.h>
+#include <roaring/roaring.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <time.h>
 
-#include <roaring/roaring.h>
-#include <roaring/misc/configreport.h>
-
 // include internal headers for invasive testing
 #include <roaring/containers/containers.h>
 #include <roaring/roaring_array.h>
 
 #ifdef __cplusplus  // stronger type checking errors if C built in C++ mode
-    using namespace roaring::internal;
+using namespace roaring::internal;
 #endif
 
 #include "test.h"
 
-
 static unsigned int seed = 123456789;
 static const int OUR_RAND_MAX = (1 << 30) - 1;
-inline static unsigned int our_rand() {  // we do not want to depend on a system-specific
-                                // random number generator
+inline static unsigned int
+our_rand() {  // we do not want to depend on a system-specific
+              // random number generator
     seed = (1103515245 * seed + 12345);
     return seed & OUR_RAND_MAX;
 }
@@ -47,94 +46,92 @@ bool roaring_iterator_sumall(uint32_t value, void *param) {
     return true;  // continue till the end
 }
 DEFINE_TEST(issue457) {
-  roaring_bitmap_t *r1 = roaring_bitmap_from_range(65539, 65541, 1);
-  roaring_bitmap_printf_describe(r1);
-  assert_true(roaring_bitmap_get_cardinality(r1) == 2);
-  roaring_bitmap_t *r2 = roaring_bitmap_add_offset(r1, -3);
-  roaring_bitmap_printf_describe(r2);
-  assert_true(roaring_bitmap_get_cardinality(r2) == 2);
-  roaring_bitmap_printf(r2);
-  roaring_bitmap_free(r1);
-  roaring_bitmap_free(r2);
+    roaring_bitmap_t *r1 = roaring_bitmap_from_range(65539, 65541, 1);
+    roaring_bitmap_printf_describe(r1);
+    assert_true(roaring_bitmap_get_cardinality(r1) == 2);
+    roaring_bitmap_t *r2 = roaring_bitmap_add_offset(r1, -3);
+    roaring_bitmap_printf_describe(r2);
+    assert_true(roaring_bitmap_get_cardinality(r2) == 2);
+    roaring_bitmap_printf(r2);
+    roaring_bitmap_free(r1);
+    roaring_bitmap_free(r2);
 }
 
 DEFINE_TEST(issue429) {
-  // This is a memory leak test, so we don't need to check the results.
-  roaring_bitmap_t *b1 = roaring_bitmap_create();
-  roaring_bitmap_add_range(b1, 0, 100);
-  roaring_bitmap_remove_range(b1, 0, 99);
-  roaring_bitmap_t *b2 = roaring_bitmap_copy(b1);
-  const roaring_bitmap_t *bitmaps[] = {b1, b2};
-  roaring_bitmap_t *result = roaring_bitmap_or_many_heap(2, bitmaps);
-  roaring_bitmap_free(result);
-  roaring_bitmap_free(b2);
-  roaring_bitmap_free(b1);
+    // This is a memory leak test, so we don't need to check the results.
+    roaring_bitmap_t *b1 = roaring_bitmap_create();
+    roaring_bitmap_add_range(b1, 0, 100);
+    roaring_bitmap_remove_range(b1, 0, 99);
+    roaring_bitmap_t *b2 = roaring_bitmap_copy(b1);
+    const roaring_bitmap_t *bitmaps[] = {b1, b2};
+    roaring_bitmap_t *result = roaring_bitmap_or_many_heap(2, bitmaps);
+    roaring_bitmap_free(result);
+    roaring_bitmap_free(b2);
+    roaring_bitmap_free(b1);
 }
 
-
 DEFINE_TEST(issue431) {
-  // This is a memory access test, so we don't need to check the results.
-  roaring_bitmap_t *b1 = roaring_bitmap_create();
-  roaring_bitmap_add(b1, 100);
-  roaring_bitmap_flip_inplace(b1, 0, 100 + 1);
-  roaring_bitmap_t *b2 = roaring_bitmap_create();
-  roaring_bitmap_add_range(b2, 50, 100 + 1);
-  roaring_bitmap_is_subset(b2, b1);
-  roaring_bitmap_free(b2);
-  roaring_bitmap_free(b1);
+    // This is a memory access test, so we don't need to check the results.
+    roaring_bitmap_t *b1 = roaring_bitmap_create();
+    roaring_bitmap_add(b1, 100);
+    roaring_bitmap_flip_inplace(b1, 0, 100 + 1);
+    roaring_bitmap_t *b2 = roaring_bitmap_create();
+    roaring_bitmap_add_range(b2, 50, 100 + 1);
+    roaring_bitmap_is_subset(b2, b1);
+    roaring_bitmap_free(b2);
+    roaring_bitmap_free(b1);
 }
 
 DEFINE_TEST(issue433) {
-  roaring_bitmap_t *b1 = roaring_bitmap_create();
-  roaring_bitmap_add(b1, 262143);
-  roaring_bitmap_add_range_closed(b1, 258047, 262143);
-  roaring_bitmap_remove_range_closed(b1, 262143, 262143);
-  size_t len = roaring_bitmap_portable_size_in_bytes(b1);
-  char *data = roaring_malloc(len);
-  roaring_bitmap_portable_serialize(b1, data);
-  roaring_bitmap_t *b2 = roaring_bitmap_portable_deserialize_safe(data, len);
-  assert_true(roaring_bitmap_equals(b1, b2));
-  roaring_bitmap_free(b2);
-  roaring_bitmap_free(b1);
-  roaring_free(data);
+    roaring_bitmap_t *b1 = roaring_bitmap_create();
+    roaring_bitmap_add(b1, 262143);
+    roaring_bitmap_add_range_closed(b1, 258047, 262143);
+    roaring_bitmap_remove_range_closed(b1, 262143, 262143);
+    size_t len = roaring_bitmap_portable_size_in_bytes(b1);
+    char *data = roaring_malloc(len);
+    roaring_bitmap_portable_serialize(b1, data);
+    roaring_bitmap_t *b2 = roaring_bitmap_portable_deserialize_safe(data, len);
+    assert_true(roaring_bitmap_equals(b1, b2));
+    roaring_bitmap_free(b2);
+    roaring_bitmap_free(b1);
+    roaring_free(data);
 }
 
-
-
 DEFINE_TEST(issue436) {
-  roaring_bitmap_t *b1 = roaring_bitmap_create();
-  roaring_bitmap_add_range_closed(b1, 19711, 262068);
-  for (int i = 0; i < 0x10000; i += 2) {
-    roaring_bitmap_add(b1, i);
-  }
-  roaring_bitmap_printf_describe(b1);
-  roaring_bitmap_remove_range_closed(b1, 6143, 65505);
-  size_t len = roaring_bitmap_portable_size_in_bytes(b1);
-  char *data = roaring_malloc(len);
-  roaring_bitmap_portable_serialize(b1, data);
-  roaring_bitmap_t *b2 = roaring_bitmap_portable_deserialize_safe(data, len);
-  assert_true(roaring_bitmap_equals(b1, b2));
-  roaring_bitmap_free(b2);
-  roaring_bitmap_free(b1);
-  roaring_free(data);
+    roaring_bitmap_t *b1 = roaring_bitmap_create();
+    roaring_bitmap_add_range_closed(b1, 19711, 262068);
+    for (int i = 0; i < 0x10000; i += 2) {
+        roaring_bitmap_add(b1, i);
+    }
+    roaring_bitmap_printf_describe(b1);
+    roaring_bitmap_remove_range_closed(b1, 6143, 65505);
+    size_t len = roaring_bitmap_portable_size_in_bytes(b1);
+    char *data = roaring_malloc(len);
+    roaring_bitmap_portable_serialize(b1, data);
+    roaring_bitmap_t *b2 = roaring_bitmap_portable_deserialize_safe(data, len);
+    assert_true(roaring_bitmap_equals(b1, b2));
+    roaring_bitmap_free(b2);
+    roaring_bitmap_free(b1);
+    roaring_free(data);
 }
 
 DEFINE_TEST(issue440) {
-  roaring_bitmap_t *b1 = roaring_bitmap_create();
-  roaring_bitmap_add_range_closed(b1, 0x20000, 0x2FFFF);
-  roaring_bitmap_add_range_closed(b1, 0, 0xFFFF);
-  uint32_t largest_item = 0x11000;
-  assert_false(roaring_bitmap_contains_range(b1, 0, largest_item + 1));
-  assert_false(roaring_bitmap_contains(b1, largest_item));
-  roaring_bitmap_free(b1);
+    roaring_bitmap_t *b1 = roaring_bitmap_create();
+    roaring_bitmap_add_range_closed(b1, 0x20000, 0x2FFFF);
+    roaring_bitmap_add_range_closed(b1, 0, 0xFFFF);
+    uint32_t largest_item = 0x11000;
+    assert_false(roaring_bitmap_contains_range(b1, 0, largest_item + 1));
+    assert_false(roaring_bitmap_contains(b1, largest_item));
+    roaring_bitmap_free(b1);
 }
 
 DEFINE_TEST(range_contains) {
     uint32_t end = 2073952257;
-    uint32_t start = end-2;
-    roaring_bitmap_t *bm = roaring_bitmap_from_range(start, end-1, 1);
-    roaring_bitmap_printf_describe(bm);printf("\n");
-    assert_true(roaring_bitmap_contains_range(bm, start, end-1));
+    uint32_t start = end - 2;
+    roaring_bitmap_t *bm = roaring_bitmap_from_range(start, end - 1, 1);
+    roaring_bitmap_printf_describe(bm);
+    printf("\n");
+    assert_true(roaring_bitmap_contains_range(bm, start, end - 1));
     assert_false(roaring_bitmap_contains_range(bm, start, end));
     roaring_bitmap_free(bm);
 }
@@ -151,46 +148,51 @@ DEFINE_TEST(contains_bulk) {
     roaring_bitmap_add_range_closed(bm, 0, 1000);
 
     // add array container from 77000
-    for (uint32_t i = 77000; i < 87000; i+=2) {
+    for (uint32_t i = 77000; i < 87000; i += 2) {
         roaring_bitmap_add(bm, i);
     }
     // add bitset container from 132000
-    for (uint32_t i = 132000; i < 140000; i+=2) {
+    for (uint32_t i = 132000; i < 140000; i += 2) {
         roaring_bitmap_add(bm, i);
     }
 
     roaring_bitmap_add(bm, UINT32_MAX);
 
     uint32_t values[] = {
-      1000,   // 1
-      1001,   // 0
-      77000,  // 1
-      77001,  // 0
-      77002,  // 1
-      1002,  // 0
-      132000, // 1
-      132001, // 0
-      132002, // 1
-      77003,  // 0
-      UINT32_MAX, // 1
-      UINT32_MAX - 1, // 0
+        1000,            // 1
+        1001,            // 0
+        77000,           // 1
+        77001,           // 0
+        77002,           // 1
+        1002,            // 0
+        132000,          // 1
+        132001,          // 0
+        132002,          // 1
+        77003,           // 0
+        UINT32_MAX,      // 1
+        UINT32_MAX - 1,  // 0
     };
     size_t test_count = sizeof(values) / sizeof(values[0]);
 
     for (size_t i = 0; i < test_count; i++) {
         roaring_bulk_context_t empty_context = {0};
         bool expected_contains = roaring_bitmap_contains(bm, values[i]);
-        assert_true(expected_contains == roaring_bitmap_contains_bulk(bm, &empty_context, values[i]));
-        assert_true(expected_contains == roaring_bitmap_contains_bulk(bm, &context, values[i]));
+        assert_true(expected_contains == roaring_bitmap_contains_bulk(
+                                             bm, &empty_context, values[i]));
+        assert_true(expected_contains ==
+                    roaring_bitmap_contains_bulk(bm, &context, values[i]));
 
         if (expected_contains) {
             assert_int_equal(context.key, values[i] >> 16);
         }
         if (context.container != NULL) {
             assert_in_range(context.idx, 0, bm->high_low_container.size - 1);
-            assert_ptr_equal(context.container, bm->high_low_container.containers[context.idx]);
-            assert_int_equal(context.key, bm->high_low_container.keys[context.idx]);
-            assert_int_equal(context.typecode, bm->high_low_container.typecodes[context.idx]);
+            assert_ptr_equal(context.container,
+                             bm->high_low_container.containers[context.idx]);
+            assert_int_equal(context.key,
+                             bm->high_low_container.keys[context.idx]);
+            assert_int_equal(context.typecode,
+                             bm->high_low_container.typecodes[context.idx]);
         }
     }
     roaring_bitmap_free(bm);
@@ -204,13 +206,13 @@ DEFINE_TEST(is_really_empty) {
 }
 
 DEFINE_TEST(inplaceorwide) {
-  uint64_t end = 4294901761;
-  roaring_bitmap_t *r1 = roaring_bitmap_from_range(0,1,1);
-  roaring_bitmap_t *r2 = roaring_bitmap_from_range(0,end,1);
-  roaring_bitmap_or_inplace(r1, r2);
-  assert_true(roaring_bitmap_get_cardinality(r1) == end);
-  roaring_bitmap_free(r1);
-  roaring_bitmap_free(r2);
+    uint64_t end = 4294901761;
+    roaring_bitmap_t *r1 = roaring_bitmap_from_range(0, 1, 1);
+    roaring_bitmap_t *r2 = roaring_bitmap_from_range(0, end, 1);
+    roaring_bitmap_or_inplace(r1, r2);
+    assert_true(roaring_bitmap_get_cardinality(r1) == end);
+    roaring_bitmap_free(r1);
+    roaring_bitmap_free(r2);
 }
 
 void can_copy_empty(bool copy_on_write) {
@@ -225,10 +227,10 @@ void can_copy_empty(bool copy_on_write) {
     roaring_bitmap_add(bm2, 5);
     assert_true(roaring_bitmap_get_cardinality(bm1) == 1);
     assert_true(roaring_bitmap_get_cardinality(bm2) == 1);
-    assert_true(roaring_bitmap_contains(bm1,3));
-    assert_true(roaring_bitmap_contains(bm2,5));
-    assert_true(!roaring_bitmap_contains(bm2,3));
-    assert_true(!roaring_bitmap_contains(bm1,5));
+    assert_true(roaring_bitmap_contains(bm1, 3));
+    assert_true(roaring_bitmap_contains(bm2, 5));
+    assert_true(!roaring_bitmap_contains(bm2, 3));
+    assert_true(!roaring_bitmap_contains(bm1, 5));
     roaring_bitmap_free(bm1);
     roaring_bitmap_free(bm2);
 }
@@ -237,7 +239,8 @@ bool check_serialization(roaring_bitmap_t *bitmap) {
     const size_t size = roaring_bitmap_portable_size_in_bytes(bitmap);
     char *data = (char *)malloc(size);
     roaring_bitmap_portable_serialize(bitmap, data);
-    roaring_bitmap_t *deserializedBitmap = roaring_bitmap_portable_deserialize(data);
+    roaring_bitmap_t *deserializedBitmap =
+        roaring_bitmap_portable_deserialize(data);
     bool ret = roaring_bitmap_equals(bitmap, deserializedBitmap);
     roaring_bitmap_free(deserializedBitmap);
     free(data);
@@ -277,7 +280,7 @@ DEFINE_TEST(issue245) {
 
 DEFINE_TEST(issue208) {
     roaring_bitmap_t *r = roaring_bitmap_create();
-    for (uint32_t i = 1; i < 8194; i+=2) {
+    for (uint32_t i = 1; i < 8194; i += 2) {
         roaring_bitmap_add(r, i);
     }
     uint32_t rank = roaring_bitmap_rank(r, 63);
@@ -290,7 +293,7 @@ DEFINE_TEST(issue208b) {
     for (uint32_t i = 65536 - 64; i < 65536; i++) {
         roaring_bitmap_add(r, i);
     }
-    for (uint32_t i = 0; i < 8196; i+=2) {
+    for (uint32_t i = 0; i < 8196; i += 2) {
         roaring_bitmap_add(r, i);
     }
     for (uint32_t i = 65536 - 64; i < 65536; i++) {
@@ -316,13 +319,9 @@ DEFINE_TEST(issue288) {
     roaring_bitmap_free(r2);
 }
 
-DEFINE_TEST(can_copy_empty_true) {
-  can_copy_empty(true);
-}
+DEFINE_TEST(can_copy_empty_true) { can_copy_empty(true); }
 
-DEFINE_TEST(can_copy_empty_false) {
-  can_copy_empty(false);
-}
+DEFINE_TEST(can_copy_empty_false) { can_copy_empty(false); }
 
 void can_add_to_copies(bool copy_on_write) {
     roaring_bitmap_t *bm1 = roaring_bitmap_create();
@@ -339,18 +338,19 @@ void can_add_to_copies(bool copy_on_write) {
     roaring_bitmap_free(bm2);
 }
 
-void convert_all_containers(roaring_bitmap_t* r, uint8_t dst_type) {
+void convert_all_containers(roaring_bitmap_t *r, uint8_t dst_type) {
     for (int32_t i = 0; i < r->high_low_container.size; i++) {
         // first step: convert src_type to ARRAY
         if (r->high_low_container.typecodes[i] == BITSET_CONTAINER_TYPE) {
-            array_container_t* dst_container = array_container_from_bitset(
-                    CAST_bitset(r->high_low_container.containers[i]));
-            bitset_container_free(CAST_bitset(r->high_low_container.containers[i]));
+            array_container_t *dst_container = array_container_from_bitset(
+                CAST_bitset(r->high_low_container.containers[i]));
+            bitset_container_free(
+                CAST_bitset(r->high_low_container.containers[i]));
             r->high_low_container.containers[i] = dst_container;
             r->high_low_container.typecodes[i] = ARRAY_CONTAINER_TYPE;
         } else if (r->high_low_container.typecodes[i] == RUN_CONTAINER_TYPE) {
-            array_container_t* dst_container = array_container_from_run(
-                    CAST_run(r->high_low_container.containers[i]));
+            array_container_t *dst_container = array_container_from_run(
+                CAST_run(r->high_low_container.containers[i]));
             run_container_free(CAST_run(r->high_low_container.containers[i]));
             r->high_low_container.containers[i] = dst_container;
             r->high_low_container.typecodes[i] = ARRAY_CONTAINER_TYPE;
@@ -359,15 +359,17 @@ void convert_all_containers(roaring_bitmap_t* r, uint8_t dst_type) {
 
         // second step: convert ARRAY to dst_type
         if (dst_type == BITSET_CONTAINER_TYPE) {
-            bitset_container_t* dst_container = bitset_container_from_array(
-                    CAST_array(r->high_low_container.containers[i]));
-            array_container_free(CAST_array(r->high_low_container.containers[i]));
+            bitset_container_t *dst_container = bitset_container_from_array(
+                CAST_array(r->high_low_container.containers[i]));
+            array_container_free(
+                CAST_array(r->high_low_container.containers[i]));
             r->high_low_container.containers[i] = dst_container;
             r->high_low_container.typecodes[i] = BITSET_CONTAINER_TYPE;
         } else if (dst_type == RUN_CONTAINER_TYPE) {
-            run_container_t* dst_container = run_container_from_array(
-                    CAST_array(r->high_low_container.containers[i]));
-            array_container_free(CAST_array(r->high_low_container.containers[i]));
+            run_container_t *dst_container = run_container_from_array(
+                CAST_array(r->high_low_container.containers[i]));
+            array_container_free(
+                CAST_array(r->high_low_container.containers[i]));
             r->high_low_container.containers[i] = dst_container;
             r->high_low_container.typecodes[i] = RUN_CONTAINER_TYPE;
         }
@@ -384,15 +386,15 @@ struct sbs_s {
 
     // reference implementation
     uint64_t *words;
-    uint32_t size; // number of words
+    uint32_t size;  // number of words
 };
 typedef struct sbs_s sbs_t;
 
 sbs_t *sbs_create(void) {
-    sbs_t *sbs = (sbs_t*)malloc(sizeof(sbs_t));
+    sbs_t *sbs = (sbs_t *)malloc(sizeof(sbs_t));
     sbs->roaring = roaring_bitmap_create();
     sbs->size = 1;
-    sbs->words = (uint64_t*)malloc(sbs->size * sizeof(uint64_t));
+    sbs->words = (uint64_t *)malloc(sbs->size * sizeof(uint64_t));
     for (uint32_t i = 0; i < sbs->size; i++) {
         sbs->words[i] = 0;
     }
@@ -400,38 +402,39 @@ sbs_t *sbs_create(void) {
 }
 
 void sbs_free(sbs_t *sbs) {
-  roaring_bitmap_free(sbs->roaring);
-  free(sbs->words);
-  free(sbs);
+    roaring_bitmap_free(sbs->roaring);
+    free(sbs->words);
+    free(sbs);
 }
 
 void sbs_convert(sbs_t *sbs, uint8_t code) {
-  convert_all_containers(sbs->roaring, code);
+    convert_all_containers(sbs->roaring, code);
 }
 
 void sbs_ensure_room(sbs_t *sbs, uint32_t v) {
-  uint32_t i = v / 64;
-  if (i >= sbs->size) {
-    uint32_t new_size = (i+1) * 3 / 2;
-    sbs->words = (uint64_t*)realloc(sbs->words, new_size*sizeof(uint64_t));
-    for (uint32_t j = sbs->size; j < new_size; j++) {
-      sbs->words[j] = 0;
+    uint32_t i = v / 64;
+    if (i >= sbs->size) {
+        uint32_t new_size = (i + 1) * 3 / 2;
+        sbs->words =
+            (uint64_t *)realloc(sbs->words, new_size * sizeof(uint64_t));
+        for (uint32_t j = sbs->size; j < new_size; j++) {
+            sbs->words[j] = 0;
+        }
+        sbs->size = new_size;
     }
-    sbs->size = new_size;
-  }
 }
 
 void sbs_add_value(sbs_t *sbs, uint32_t v) {
     roaring_bitmap_add(sbs->roaring, v);
 
     sbs_ensure_room(sbs, v);
-    sbs->words[v/64] |= UINT64_C(1) << (v % 64);
+    sbs->words[v / 64] |= UINT64_C(1) << (v % 64);
 }
 
 void sbs_add_range(sbs_t *sbs, uint64_t min, uint64_t max) {
     sbs_ensure_room(sbs, max);
     for (uint64_t v = min; v <= max; v++) {
-        sbs->words[v/64] |= UINT64_C(1) << (v % 64);
+        sbs->words[v / 64] |= UINT64_C(1) << (v % 64);
     }
 
     roaring_bitmap_add_range(sbs->roaring, min, max + 1);
@@ -440,7 +443,7 @@ void sbs_add_range(sbs_t *sbs, uint64_t min, uint64_t max) {
 void sbs_remove_range(sbs_t *sbs, uint64_t min, uint64_t max) {
     sbs_ensure_room(sbs, max);
     for (uint64_t v = min; v <= max; v++) {
-        sbs->words[v/64] &= ~(UINT64_C(1) << (v % 64));
+        sbs->words[v / 64] &= ~(UINT64_C(1) << (v % 64));
     }
 
     roaring_bitmap_remove_range(sbs->roaring, min, max + 1);
@@ -450,7 +453,7 @@ void sbs_remove_many(sbs_t *sbs, size_t n_args, uint32_t *vals) {
     for (size_t i = 0; i < n_args; i++) {
         uint32_t v = vals[i];
         sbs_ensure_room(sbs, v);
-        sbs->words[v/64] &= ~(UINT64_C(1) << (v % 64));
+        sbs->words[v / 64] &= ~(UINT64_C(1) << (v % 64));
     }
     roaring_bitmap_remove_many(sbs->roaring, n_args, vals);
 }
@@ -458,13 +461,14 @@ void sbs_remove_many(sbs_t *sbs, size_t n_args, uint32_t *vals) {
 bool sbs_check_type(sbs_t *sbs, uint8_t type) {
     bool answer = true;
     for (int32_t i = 0; i < sbs->roaring->high_low_container.size; i++) {
-        answer = answer && (sbs->roaring->high_low_container.typecodes[i] == type);
+        answer =
+            answer && (sbs->roaring->high_low_container.typecodes[i] == type);
     }
     return answer;
 }
 
 bool sbs_is_empty(sbs_t *sbs) {
-  return sbs->roaring->high_low_container.size == 0;
+    return sbs->roaring->high_low_container.size == 0;
 }
 
 void sbs_compare(sbs_t *sbs) {
@@ -477,24 +481,24 @@ void sbs_compare(sbs_t *sbs) {
         }
     }
     uint32_t *expected_values =
-            (uint32_t*)malloc(expected_cardinality * sizeof(uint32_t));
+        (uint32_t *)malloc(expected_cardinality * sizeof(uint32_t));
     memset(expected_values, 0, expected_cardinality * sizeof(uint32_t));
     for (uint32_t i = 0, dst = 0; i < sbs->size; i++) {
         for (uint32_t j = 0; j < 64; j++) {
             if ((sbs->words[i] & (UINT64_C(1) << j)) != 0) {
-                expected_values[dst++] = i*64 + j;
+                expected_values[dst++] = i * 64 + j;
             }
         }
     }
 
     uint32_t actual_cardinality = roaring_bitmap_get_cardinality(sbs->roaring);
     uint32_t *actual_values =
-            (uint32_t*)malloc(actual_cardinality * sizeof(uint32_t));
+        (uint32_t *)malloc(actual_cardinality * sizeof(uint32_t));
     memset(actual_values, 0, actual_cardinality * sizeof(uint32_t));
     roaring_bitmap_to_uint32_array(sbs->roaring, actual_values);
 
-    bool ok = array_equals(actual_values, actual_cardinality,
-                           expected_values, expected_cardinality);
+    bool ok = array_equals(actual_values, actual_cardinality, expected_values,
+                           expected_cardinality);
     if (!ok) {
         printf("Expected: ");
         for (uint32_t i = 0; i < expected_cardinality; i++) {
@@ -576,59 +580,57 @@ DEFINE_TEST(check_interval) {
 
     roaring_bitmap_printf(r);
 
-
-    roaring_bitmap_t *range = roaring_bitmap_from_range(10, 1000+1, 1);
+    roaring_bitmap_t *range = roaring_bitmap_from_range(10, 1000 + 1, 1);
     assert_non_null(range);
-    assert_true(roaring_bitmap_intersect(r,range));
+    assert_true(roaring_bitmap_intersect(r, range));
     roaring_bitmap_t *range2 = roaring_bitmap_from_range(10, 1000, 1);
     assert_non_null(range2);
-    assert_false(roaring_bitmap_intersect(r,range2));
+    assert_false(roaring_bitmap_intersect(r, range2));
 
-    assert_true(roaring_bitmap_intersect_with_range(r, 10, 1000+1));
+    assert_true(roaring_bitmap_intersect_with_range(r, 10, 1000 + 1));
     assert_false(roaring_bitmap_intersect_with_range(r, 10, 1000));
 
     roaring_bitmap_free(r);
     roaring_bitmap_free(range);
     roaring_bitmap_free(range2);
-
 }
 
 DEFINE_TEST(check_full_inplace_flip) {
-  roaring_bitmap_t *r1 = roaring_bitmap_create();
-  uint64_t bignumber = UINT64_C(0x100000000);
-  roaring_bitmap_flip_inplace(r1, 0, bignumber);
-  assert_true(roaring_bitmap_get_cardinality(r1) == bignumber);
-  roaring_bitmap_free(r1);
+    roaring_bitmap_t *r1 = roaring_bitmap_create();
+    uint64_t bignumber = UINT64_C(0x100000000);
+    roaring_bitmap_flip_inplace(r1, 0, bignumber);
+    assert_true(roaring_bitmap_get_cardinality(r1) == bignumber);
+    roaring_bitmap_free(r1);
 }
 
 DEFINE_TEST(check_iterate_to_end) {
-  uint64_t bignumber = UINT64_C(0x100000000);
-  for(uint64_t s = 0; s < 1024; s++) {
-    roaring_bitmap_t *r1 = roaring_bitmap_create();
-    roaring_bitmap_flip_inplace(r1, bignumber - s, bignumber);
-    roaring_uint32_iterator_t iterator;
-    roaring_init_iterator(r1, &iterator);
-    uint64_t count = 0;
-    while(iterator.has_value) {
-      assert_true(iterator.current_value + (s - count) == bignumber);
-      count++;
-      roaring_advance_uint32_iterator(&iterator);
-    }
-    assert_true(count == s);
-    assert_true(roaring_bitmap_get_cardinality(r1) == s);
-    roaring_bitmap_free(r1);
-  }
+    uint64_t bignumber = UINT64_C(0x100000000);
+    for (uint64_t s = 0; s < 1024; s++) {
+        roaring_bitmap_t *r1 = roaring_bitmap_create();
+        roaring_bitmap_flip_inplace(r1, bignumber - s, bignumber);
+        roaring_uint32_iterator_t iterator;
+        roaring_init_iterator(r1, &iterator);
+        uint64_t count = 0;
+        while (iterator.has_value) {
+            assert_true(iterator.current_value + (s - count) == bignumber);
+            count++;
+            roaring_advance_uint32_iterator(&iterator);
+        }
+        assert_true(count == s);
+        assert_true(roaring_bitmap_get_cardinality(r1) == s);
+        roaring_bitmap_free(r1);
+    }
 }
 
 DEFINE_TEST(check_iterate_to_beginning) {
     uint64_t bignumber = UINT64_C(0x100000000);
-    for(uint64_t s = 0; s < 1024; s++) {
+    for (uint64_t s = 0; s < 1024; s++) {
         roaring_bitmap_t *r1 = roaring_bitmap_create();
         roaring_bitmap_flip_inplace(r1, bignumber - s, bignumber);
         roaring_uint32_iterator_t iterator;
         roaring_init_iterator_last(r1, &iterator);
         uint64_t count = 0;
-        while(iterator.has_value) {
+        while (iterator.has_value) {
             count++;
             assert_true(iterator.current_value + count == bignumber);
             roaring_previous_uint32_iterator(&iterator);
@@ -640,60 +642,60 @@ DEFINE_TEST(check_iterate_to_beginning) {
 }
 
 DEFINE_TEST(check_range_contains_from_end) {
-  uint64_t bignumber = UINT64_C(0x100000000);
-  for(uint64_t s = 0; s <  1024 * 1024; s++) {
-    roaring_bitmap_t *r1 = roaring_bitmap_create();
-    roaring_bitmap_add_range(r1, bignumber - s, bignumber);
-    assert_true(roaring_bitmap_get_cardinality(r1) == s);
-    if(s>0) {
-      assert_true(roaring_bitmap_contains_range(r1, bignumber - s, bignumber - 1));
-    }
-    assert_true(roaring_bitmap_contains_range(r1, bignumber - s, bignumber));
-    assert_false(roaring_bitmap_contains_range(r1, bignumber - s - 1, bignumber));
-    assert_true(roaring_bitmap_get_cardinality(r1) == s);
-    roaring_bitmap_free(r1);
-  }
+    uint64_t bignumber = UINT64_C(0x100000000);
+    for (uint64_t s = 0; s < 1024 * 1024; s++) {
+        roaring_bitmap_t *r1 = roaring_bitmap_create();
+        roaring_bitmap_add_range(r1, bignumber - s, bignumber);
+        assert_true(roaring_bitmap_get_cardinality(r1) == s);
+        if (s > 0) {
+            assert_true(roaring_bitmap_contains_range(r1, bignumber - s,
+                                                      bignumber - 1));
+        }
+        assert_true(
+            roaring_bitmap_contains_range(r1, bignumber - s, bignumber));
+        assert_false(
+            roaring_bitmap_contains_range(r1, bignumber - s - 1, bignumber));
+        assert_true(roaring_bitmap_get_cardinality(r1) == s);
+        roaring_bitmap_free(r1);
+    }
 }
 
 DEFINE_TEST(check_full_flip) {
-  roaring_bitmap_t *rorg = roaring_bitmap_create();
-  uint64_t bignumber = UINT64_C(0x100000000);
-  roaring_bitmap_t *r1 = roaring_bitmap_flip(rorg, 0, bignumber);
-  assert_true(roaring_bitmap_get_cardinality(r1) == bignumber);
-  roaring_bitmap_free(r1);
-  roaring_bitmap_free(rorg);
+    roaring_bitmap_t *rorg = roaring_bitmap_create();
+    uint64_t bignumber = UINT64_C(0x100000000);
+    roaring_bitmap_t *r1 = roaring_bitmap_flip(rorg, 0, bignumber);
+    assert_true(roaring_bitmap_get_cardinality(r1) == bignumber);
+    roaring_bitmap_free(r1);
+    roaring_bitmap_free(rorg);
 }
 
 void test_stress_memory(bool copy_on_write) {
-	for (size_t i = 0; i < 5; i++) {
-		roaring_bitmap_t *r1 = roaring_bitmap_create();
-    roaring_bitmap_set_copy_on_write(r1, copy_on_write);
-		assert_non_null(r1);
-		for (size_t k = 0; k < 1000000; k++) {
-			uint32_t j = rand() % (100000000);
-			roaring_bitmap_add(r1, j);
-		}
-		roaring_bitmap_run_optimize(r1);
-		uint32_t compact_size = roaring_bitmap_portable_size_in_bytes(r1);
-		char * serializedbytes = (char *) malloc(compact_size);
-		size_t actualsize = roaring_bitmap_portable_serialize(r1, serializedbytes);
-		assert_int_equal(actualsize, compact_size);
-    roaring_bitmap_t *t = roaring_bitmap_portable_deserialize(serializedbytes);
-    assert_true(roaring_bitmap_equals(r1, t));
-    roaring_bitmap_free(t);
-		free(serializedbytes);
-		roaring_bitmap_free(r1);
-	}
-}
-
-DEFINE_TEST(test_stress_memory_true) {
-  test_stress_memory(true);
+    for (size_t i = 0; i < 5; i++) {
+        roaring_bitmap_t *r1 = roaring_bitmap_create();
+        roaring_bitmap_set_copy_on_write(r1, copy_on_write);
+        assert_non_null(r1);
+        for (size_t k = 0; k < 1000000; k++) {
+            uint32_t j = rand() % (100000000);
+            roaring_bitmap_add(r1, j);
+        }
+        roaring_bitmap_run_optimize(r1);
+        uint32_t compact_size = roaring_bitmap_portable_size_in_bytes(r1);
+        char *serializedbytes = (char *)malloc(compact_size);
+        size_t actualsize =
+            roaring_bitmap_portable_serialize(r1, serializedbytes);
+        assert_int_equal(actualsize, compact_size);
+        roaring_bitmap_t *t =
+            roaring_bitmap_portable_deserialize(serializedbytes);
+        assert_true(roaring_bitmap_equals(r1, t));
+        roaring_bitmap_free(t);
+        free(serializedbytes);
+        roaring_bitmap_free(r1);
+    }
 }
 
-DEFINE_TEST(test_stress_memory_false) {
-  test_stress_memory(false);
-}
+DEFINE_TEST(test_stress_memory_true) { test_stress_memory(true); }
 
+DEFINE_TEST(test_stress_memory_false) { test_stress_memory(false); }
 
 void test_example(bool copy_on_write) {
     // create a new empty bitmap
@@ -752,7 +754,6 @@ void test_example(bool copy_on_write) {
     roaring_bitmap_range_uint32_array(r1, offset, limit, arr3);
     free(arr3);
 
-
     roaring_bitmap_t *r1f = roaring_bitmap_of_ptr(card1, arr1);
     assert_bitmap_validate(r1f);
     free(arr1);
@@ -817,18 +818,22 @@ void test_example(bool copy_on_write) {
 
     // we can write a bitmap to a pointer and recover it later
     uint32_t expectedsize = roaring_bitmap_portable_size_in_bytes(r1);
-    char *serializedbytes = (char*)malloc(expectedsize);
+    char *serializedbytes = (char *)malloc(expectedsize);
     size_t actualsize = roaring_bitmap_portable_serialize(r1, serializedbytes);
     assert_int_equal(actualsize, expectedsize);
     roaring_bitmap_t *t = roaring_bitmap_portable_deserialize(serializedbytes);
     assert_bitmap_validate(t);
     assert_true(roaring_bitmap_equals(r1, t));
     roaring_bitmap_free(t);
-     // we can also check whether there is a bitmap at a memory location without reading it
-    size_t sizeofbitmap = roaring_bitmap_portable_deserialize_size(serializedbytes,expectedsize);
-    assert_true(sizeofbitmap == expectedsize);  // sizeofbitmap would be zero if no bitmap were found
+    // we can also check whether there is a bitmap at a memory location without
+    // reading it
+    size_t sizeofbitmap =
+        roaring_bitmap_portable_deserialize_size(serializedbytes, expectedsize);
+    assert_true(
+        sizeofbitmap ==
+        expectedsize);  // sizeofbitmap would be zero if no bitmap were found
     // we can also read the bitmap "safely" by specifying a byte size limit:
-    t = roaring_bitmap_portable_deserialize_safe(serializedbytes,expectedsize);
+    t = roaring_bitmap_portable_deserialize_safe(serializedbytes, expectedsize);
     assert_bitmap_validate(t);
     assert_true(roaring_bitmap_equals(r1, t));  // what we recover is equal
     roaring_bitmap_free(t);
@@ -856,18 +861,17 @@ void test_example(bool copy_on_write) {
     roaring_free_uint32_iterator(i);
     assert_true(roaring_bitmap_get_cardinality(r1) == counter);
 
-
     // for greater speed, you can iterate over the data in bulk
     i = roaring_create_iterator(r1);
     uint32_t buffer[256];
     while (1) {
-      uint32_t ret = roaring_read_uint32_iterator(i, buffer, 256);
-      for (uint32_t j = 0; j < ret; j++) {
-             counter += buffer[j];
-      }
-      if (ret < 256) {
-             break;
-     }
+        uint32_t ret = roaring_read_uint32_iterator(i, buffer, 256);
+        for (uint32_t j = 0; j < ret; j++) {
+            counter += buffer[j];
+        }
+        if (ret < 256) {
+            break;
+        }
     }
     roaring_free_uint32_iterator(i);
 
@@ -893,13 +897,13 @@ void test_uint32_iterator(bool run) {
     for (uint32_t i = 800000; i < 900000; i += 7) {
         roaring_bitmap_add(r1, i);
     }
-    if(run) roaring_bitmap_run_optimize(r1);
+    if (run) roaring_bitmap_run_optimize(r1);
     assert_bitmap_validate(r1);
     roaring_uint32_iterator_t *iter = roaring_create_iterator(r1);
     for (uint32_t i = 0; i < 66000; i += 3) {
         assert_true(iter->has_value);
         assert_true(iter->current_value == i);
-        roaring_move_uint32_iterator_equalorlarger(iter,i);
+        roaring_move_uint32_iterator_equalorlarger(iter, i);
         assert_true(iter->has_value);
         assert_true(iter->current_value == i);
         roaring_advance_uint32_iterator(iter);
@@ -907,7 +911,7 @@ void test_uint32_iterator(bool run) {
     for (uint32_t i = 100000; i < 200000; i++) {
         assert_true(iter->has_value);
         assert_true(iter->current_value == i);
-        roaring_move_uint32_iterator_equalorlarger(iter,i);
+        roaring_move_uint32_iterator_equalorlarger(iter, i);
         assert_true(iter->has_value);
         assert_true(iter->current_value == i);
         roaring_advance_uint32_iterator(iter);
@@ -915,7 +919,7 @@ void test_uint32_iterator(bool run) {
     for (uint32_t i = 300000; i < 500000; i += 100) {
         assert_true(iter->has_value);
         assert_true(iter->current_value == i);
-        roaring_move_uint32_iterator_equalorlarger(iter,i);
+        roaring_move_uint32_iterator_equalorlarger(iter, i);
         assert_true(iter->has_value);
         assert_true(iter->current_value == i);
         roaring_advance_uint32_iterator(iter);
@@ -923,7 +927,7 @@ void test_uint32_iterator(bool run) {
     for (uint32_t i = 600000; i < 700000; i += 1) {
         assert_true(iter->has_value);
         assert_true(iter->current_value == i);
-        roaring_move_uint32_iterator_equalorlarger(iter,i);
+        roaring_move_uint32_iterator_equalorlarger(iter, i);
         assert_true(iter->has_value);
         assert_true(iter->current_value == i);
         roaring_advance_uint32_iterator(iter);
@@ -931,66 +935,66 @@ void test_uint32_iterator(bool run) {
     for (uint32_t i = 800000; i < 900000; i += 7) {
         assert_true(iter->has_value);
         assert_true(iter->current_value == i);
-        roaring_move_uint32_iterator_equalorlarger(iter,i);
+        roaring_move_uint32_iterator_equalorlarger(iter, i);
         assert_true(iter->has_value);
         assert_true(iter->current_value == i);
         roaring_advance_uint32_iterator(iter);
     }
     assert_false(iter->has_value);
-    roaring_move_uint32_iterator_equalorlarger(iter,0);
+    roaring_move_uint32_iterator_equalorlarger(iter, 0);
     assert_true(iter->has_value);
     assert_true(iter->current_value == 0);
-    roaring_move_uint32_iterator_equalorlarger(iter,66000);
+    roaring_move_uint32_iterator_equalorlarger(iter, 66000);
     assert_true(iter->has_value);
     assert_true(iter->current_value == 100000);
-    roaring_move_uint32_iterator_equalorlarger(iter,100000);
+    roaring_move_uint32_iterator_equalorlarger(iter, 100000);
     assert_true(iter->has_value);
     assert_true(iter->current_value == 100000);
-    roaring_move_uint32_iterator_equalorlarger(iter,200000);
+    roaring_move_uint32_iterator_equalorlarger(iter, 200000);
     assert_true(iter->has_value);
     assert_true(iter->current_value == 300000);
-    roaring_move_uint32_iterator_equalorlarger(iter,300000);
+    roaring_move_uint32_iterator_equalorlarger(iter, 300000);
     assert_true(iter->has_value);
     assert_true(iter->current_value == 300000);
-    roaring_move_uint32_iterator_equalorlarger(iter,500000);
+    roaring_move_uint32_iterator_equalorlarger(iter, 500000);
     assert_true(iter->has_value);
     assert_true(iter->current_value == 600000);
-    roaring_move_uint32_iterator_equalorlarger(iter,600000);
+    roaring_move_uint32_iterator_equalorlarger(iter, 600000);
     assert_true(iter->has_value);
     assert_true(iter->current_value == 600000);
-    roaring_move_uint32_iterator_equalorlarger(iter,700000);
+    roaring_move_uint32_iterator_equalorlarger(iter, 700000);
     assert_true(iter->has_value);
     assert_true(iter->current_value == 800000);
-    roaring_move_uint32_iterator_equalorlarger(iter,800000);
+    roaring_move_uint32_iterator_equalorlarger(iter, 800000);
     assert_true(iter->has_value);
     assert_true(iter->current_value == 800000);
-    roaring_move_uint32_iterator_equalorlarger(iter,900000);
+    roaring_move_uint32_iterator_equalorlarger(iter, 900000);
     assert_false(iter->has_value);
-    roaring_move_uint32_iterator_equalorlarger(iter,0);
+    roaring_move_uint32_iterator_equalorlarger(iter, 0);
     for (uint32_t i = 0; i < 66000; i += 3) {
         assert_true(iter->has_value);
         assert_true(iter->current_value == i);
-        roaring_move_uint32_iterator_equalorlarger(iter,i+1);
+        roaring_move_uint32_iterator_equalorlarger(iter, i + 1);
     }
     for (uint32_t i = 100000; i < 200000; i++) {
         assert_true(iter->has_value);
         assert_true(iter->current_value == i);
-        roaring_move_uint32_iterator_equalorlarger(iter,i+1);
+        roaring_move_uint32_iterator_equalorlarger(iter, i + 1);
     }
     for (uint32_t i = 300000; i < 500000; i += 100) {
         assert_true(iter->has_value);
         assert_true(iter->current_value == i);
-        roaring_move_uint32_iterator_equalorlarger(iter,i+1);
+        roaring_move_uint32_iterator_equalorlarger(iter, i + 1);
     }
     for (uint32_t i = 600000; i < 700000; i += 1) {
         assert_true(iter->has_value);
         assert_true(iter->current_value == i);
-        roaring_move_uint32_iterator_equalorlarger(iter,i+1);
+        roaring_move_uint32_iterator_equalorlarger(iter, i + 1);
     }
     for (uint32_t i = 800000; i < 900000; i += 7) {
         assert_true(iter->has_value);
         assert_true(iter->current_value == i);
-        roaring_move_uint32_iterator_equalorlarger(iter,i+1);
+        roaring_move_uint32_iterator_equalorlarger(iter, i + 1);
     }
     assert_false(iter->has_value);
 
@@ -1086,7 +1090,7 @@ DEFINE_TEST(test_clear) {
     size_t expected_card = 0;
     for (uint32_t value = 33057; value < 147849; value += 8) {
         roaring_bitmap_add(bm, value);
-        expected_card ++;
+        expected_card++;
     }
     assert_true(roaring_bitmap_get_cardinality(bm) == expected_card);
     roaring_bitmap_clear(bm);
@@ -1094,7 +1098,6 @@ DEFINE_TEST(test_clear) {
     roaring_bitmap_free(bm);
 }
 
-
 DEFINE_TEST(test_remove_from_copies_true) { can_remove_from_copies(true); }
 
 DEFINE_TEST(test_remove_from_copies_false) { can_remove_from_copies(false); }
@@ -1132,7 +1135,8 @@ DEFINE_TEST(test_silly_range) {
 }
 
 DEFINE_TEST(test_adversarial_range) {
-    roaring_bitmap_t *bm1 = roaring_bitmap_from_range(0, UINT64_C(0x100000000), 1);
+    roaring_bitmap_t *bm1 =
+        roaring_bitmap_from_range(0, UINT64_C(0x100000000), 1);
     assert_bitmap_validate(bm1);
     assert_true(roaring_bitmap_get_cardinality(bm1) == UINT64_C(0x100000000));
     roaring_bitmap_free(bm1);
@@ -1141,7 +1145,7 @@ DEFINE_TEST(test_adversarial_range) {
 DEFINE_TEST(test_range_and_serialize) {
     roaring_bitmap_t *old_bm = roaring_bitmap_from_range(65520, 131057, 16);
     size_t size = roaring_bitmap_portable_size_in_bytes(old_bm);
-    char *buff = (char*)malloc(size);
+    char *buff = (char *)malloc(size);
     size_t actualsize = roaring_bitmap_portable_serialize(old_bm, buff);
     assert_int_equal(actualsize, size);
     roaring_bitmap_t *new_bm = roaring_bitmap_portable_deserialize(buff);
@@ -1308,7 +1312,7 @@ DEFINE_TEST(test_portable_serialize) {
         roaring_bitmap_add(r1, 3 * i);
 
     uint32_t expectedsize = roaring_bitmap_portable_size_in_bytes(r1);
-    char *serialized = (char*)malloc(expectedsize);
+    char *serialized = (char *)malloc(expectedsize);
     serialize_len = roaring_bitmap_portable_serialize(r1, serialized);
     assert_int_equal(serialize_len, expectedsize);
     assert_int_equal(serialize_len, expectedsize);
@@ -1334,7 +1338,7 @@ DEFINE_TEST(test_portable_serialize) {
     r1 = roaring_bitmap_of(6, 2946000, 2997491, 10478289, 10490227, 10502444,
                            19866827);
     expectedsize = roaring_bitmap_portable_size_in_bytes(r1);
-    serialized = (char*)malloc(expectedsize);
+    serialized = (char *)malloc(expectedsize);
     serialize_len = roaring_bitmap_portable_serialize(r1, serialized);
     assert_int_equal(serialize_len, expectedsize);
     assert_int_equal(serialize_len, expectedsize);
@@ -1367,7 +1371,7 @@ DEFINE_TEST(test_portable_serialize) {
 
     roaring_bitmap_run_optimize(r1);
     expectedsize = roaring_bitmap_portable_size_in_bytes(r1);
-    serialized = (char*)malloc(expectedsize);
+    serialized = (char *)malloc(expectedsize);
     serialize_len = roaring_bitmap_portable_serialize(r1, serialized);
     assert_int_equal(serialize_len, expectedsize);
 
@@ -1403,7 +1407,7 @@ DEFINE_TEST(test_serialize) {
     /* Add some values to the bitmap */
     for (int i = 0, top_val = 384000; i < top_val; i++)
         roaring_bitmap_add(r1, 3 * i);
-    serialized = (char*)malloc(roaring_bitmap_size_in_bytes(r1));
+    serialized = (char *)malloc(roaring_bitmap_size_in_bytes(r1));
     serialize_len = roaring_bitmap_serialize(r1, serialized);
     assert_int_equal(serialize_len, roaring_bitmap_size_in_bytes(r1));
     r2 = roaring_bitmap_deserialize(serialized);
@@ -1434,7 +1438,7 @@ DEFINE_TEST(test_serialize) {
     ra_append(&r1->high_low_container, 0, run, RUN_CONTAINER_TYPE);
 
     serialize_len = roaring_bitmap_size_in_bytes(r1);
-    serialized = (char*)malloc(serialize_len);
+    serialized = (char *)malloc(serialize_len);
     assert_int_equal((int32_t)serialize_len,
                      roaring_bitmap_serialize(r1, serialized));
     r2 = roaring_bitmap_deserialize(serialized);
@@ -1447,7 +1451,8 @@ DEFINE_TEST(test_serialize) {
 
     // Check that roaring_bitmap_deserialize_safe succeed with valid length
 
-    roaring_bitmap_t *t_safe = roaring_bitmap_deserialize_safe(serialized, serialize_len);
+    roaring_bitmap_t *t_safe =
+        roaring_bitmap_deserialize_safe(serialized, serialize_len);
     assert_true(roaring_bitmap_equals(r1, t_safe));
     roaring_bitmap_free(t_safe);
 
@@ -1464,7 +1469,7 @@ DEFINE_TEST(test_serialize) {
     r1 = roaring_bitmap_of(6, 2946000, 2997491, 10478289, 10490227, 10502444,
                            19866827);
 
-    serialized = (char*)malloc(roaring_bitmap_size_in_bytes(r1));
+    serialized = (char *)malloc(roaring_bitmap_size_in_bytes(r1));
     serialize_len = roaring_bitmap_serialize(r1, serialized);
     assert_int_equal(serialize_len, roaring_bitmap_size_in_bytes(r1));
     r2 = roaring_bitmap_deserialize(serialized);
@@ -1493,7 +1498,7 @@ DEFINE_TEST(test_serialize) {
         roaring_bitmap_add(r1, k);
     }
     roaring_bitmap_run_optimize(r1);
-    serialized = (char*)malloc(roaring_bitmap_size_in_bytes(r1));
+    serialized = (char *)malloc(roaring_bitmap_size_in_bytes(r1));
     serialize_len = roaring_bitmap_serialize(r1, serialized);
     assert_int_equal(serialize_len, roaring_bitmap_size_in_bytes(r1));
     r2 = roaring_bitmap_deserialize(serialized);
@@ -1520,7 +1525,7 @@ DEFINE_TEST(test_serialize) {
     /* ******* */
     roaring_bitmap_t *old_bm = roaring_bitmap_create();
     for (unsigned i = 0; i < 102; i++) roaring_bitmap_add(old_bm, i);
-    char *buff = (char*)malloc(roaring_bitmap_size_in_bytes(old_bm));
+    char *buff = (char *)malloc(roaring_bitmap_size_in_bytes(old_bm));
     uint32_t size = roaring_bitmap_serialize(old_bm, buff);
     assert_int_equal(size, roaring_bitmap_size_in_bytes(old_bm));
     roaring_bitmap_t *new_bm = roaring_bitmap_deserialize(buff);
@@ -1601,42 +1606,47 @@ DEFINE_TEST(test_contains) {
 }
 
 DEFINE_TEST(test_contains_range) {
-    uint32_t* values = (uint32_t*)malloc(100000 * sizeof(uint32_t));
+    uint32_t *values = (uint32_t *)malloc(100000 * sizeof(uint32_t));
     assert_non_null(values);
     for (uint32_t length_range = 1; length_range <= 64; ++length_range) {
-      roaring_bitmap_t *r1 = roaring_bitmap_create();
-      assert_non_null(r1);
-      for (uint32_t i = 0; i < 100000; ++i){
+        roaring_bitmap_t *r1 = roaring_bitmap_create();
+        assert_non_null(r1);
+        for (uint32_t i = 0; i < 100000; ++i) {
             const uint32_t val = rand() % 200000;
             roaring_bitmap_add(r1, val);
             values[i] = val;
-      }
-      for (uint64_t i = 0; i < 100000; ++i){
-            if (roaring_bitmap_contains_range(r1, values[i], values[i] + length_range)) {
-                for (uint32_t j = values[i]; j < values[i] + length_range; ++j) assert_true(roaring_bitmap_contains(r1, j));
-            }
-            else {
+        }
+        for (uint64_t i = 0; i < 100000; ++i) {
+            if (roaring_bitmap_contains_range(r1, values[i],
+                                              values[i] + length_range)) {
+                for (uint32_t j = values[i]; j < values[i] + length_range; ++j)
+                    assert_true(roaring_bitmap_contains(r1, j));
+            } else {
                 uint32_t count = 0;
-                for (uint32_t j = values[i]; j < values[i] + length_range; ++j){
-                    if (roaring_bitmap_contains(r1, j)) ++count;
-                    else break;
+                for (uint32_t j = values[i]; j < values[i] + length_range;
+                     ++j) {
+                    if (roaring_bitmap_contains(r1, j))
+                        ++count;
+                    else
+                        break;
                 }
                 assert_true(count != length_range);
             }
         }
-      roaring_bitmap_free(r1);
+        roaring_bitmap_free(r1);
     }
     free(values);
     for (uint32_t length_range = 1; length_range <= 64; ++length_range) {
         roaring_bitmap_t *r1 = roaring_bitmap_create();
         assert_non_null(r1);
         const uint32_t length_range_twice = length_range * 2;
-        for (uint32_t i = 0; i < 130000; i += length_range){
-            if (i % length_range_twice == 0){
-                for (uint32_t j = i; j < i + length_range; ++j) roaring_bitmap_add(r1, j);
+        for (uint32_t i = 0; i < 130000; i += length_range) {
+            if (i % length_range_twice == 0) {
+                for (uint32_t j = i; j < i + length_range; ++j)
+                    roaring_bitmap_add(r1, j);
             }
         }
-        for (uint32_t i = 0; i < 130000; i += length_range){
+        for (uint32_t i = 0; i < 130000; i += length_range) {
             bool pres = roaring_bitmap_contains_range(r1, i, i + length_range);
             assert_true(((i % length_range_twice == 0) ? pres : !pres));
         }
@@ -1645,10 +1655,10 @@ DEFINE_TEST(test_contains_range) {
 }
 
 DEFINE_TEST(test_contains_range_PyRoaringBitMap_issue81) {
-    roaring_bitmap_t* r = roaring_bitmap_create();
+    roaring_bitmap_t *r = roaring_bitmap_create();
     roaring_bitmap_add_range(r, 1, 1900544);
-    assert_true(roaring_bitmap_contains_range(r,1,1900544));
-    assert_false(roaring_bitmap_contains_range(r,1900543,1900545));
+    assert_true(roaring_bitmap_contains_range(r, 1, 1900544));
+    assert_false(roaring_bitmap_contains_range(r, 1900543, 1900545));
     roaring_bitmap_free(r);
 }
 
@@ -2604,7 +2614,7 @@ static roaring_bitmap_t *make_roaring_from_array(uint32_t *a, int len) {
 
 DEFINE_TEST(test_conversion_to_int_array) {
     int ans_ctr = 0;
-    uint32_t *ans = (uint32_t*)calloc(100000, sizeof(int32_t));
+    uint32_t *ans = (uint32_t *)calloc(100000, sizeof(int32_t));
 
     // a dense bitmap container  (best done with runs)
     for (uint32_t i = 0; i < 50000; ++i) {
@@ -2641,7 +2651,7 @@ DEFINE_TEST(test_conversion_to_int_array) {
 DEFINE_TEST(test_conversion_to_int_array_with_runoptimize) {
     roaring_bitmap_t *r1 = roaring_bitmap_create();
     int ans_ctr = 0;
-    uint32_t *ans = (uint32_t*)calloc(100000, sizeof(int32_t));
+    uint32_t *ans = (uint32_t *)calloc(100000, sizeof(int32_t));
 
     // a dense bitmap container  (best done with runs)
     for (uint32_t i = 0; i < 50000; ++i) {
@@ -2679,7 +2689,7 @@ DEFINE_TEST(test_conversion_to_int_array_with_runoptimize) {
 
 DEFINE_TEST(test_array_to_run) {
     int ans_ctr = 0;
-    uint32_t *ans = (uint32_t*)calloc(100000, sizeof(int32_t));
+    uint32_t *ans = (uint32_t *)calloc(100000, sizeof(int32_t));
 
     // array container  (best done with runs)
     for (uint32_t i = 0; i < 500; ++i) {
@@ -2704,7 +2714,7 @@ DEFINE_TEST(test_array_to_run) {
 DEFINE_TEST(test_array_to_self) {
     int ans_ctr = 0;
 
-    uint32_t *ans = (uint32_t*)calloc(100000, sizeof(int32_t));
+    uint32_t *ans = (uint32_t *)calloc(100000, sizeof(int32_t));
 
     // array container  (best not done with runs)
     for (uint32_t i = 0; i < 500; i += 2) {
@@ -2728,7 +2738,7 @@ DEFINE_TEST(test_array_to_self) {
 
 DEFINE_TEST(test_bitset_to_self) {
     int ans_ctr = 0;
-    uint32_t *ans = (uint32_t*)calloc(100000, sizeof(int32_t));
+    uint32_t *ans = (uint32_t *)calloc(100000, sizeof(int32_t));
 
     // bitset container  (best not done with runs)
     for (uint32_t i = 0; i < 50000; i += 2) {
@@ -2752,7 +2762,7 @@ DEFINE_TEST(test_bitset_to_self) {
 
 DEFINE_TEST(test_bitset_to_run) {
     int ans_ctr = 0;
-    uint32_t *ans = (uint32_t*)calloc(100000, sizeof(int32_t));
+    uint32_t *ans = (uint32_t *)calloc(100000, sizeof(int32_t));
 
     // bitset container  (best done with runs)
     for (uint32_t i = 0; i < 50000; i++) {
@@ -2778,7 +2788,7 @@ DEFINE_TEST(test_bitset_to_run) {
 
 DEFINE_TEST(test_run_to_self) {
     int ans_ctr = 0;
-    uint32_t *ans = (uint32_t*)calloc(100000, sizeof(int32_t));
+    uint32_t *ans = (uint32_t *)calloc(100000, sizeof(int32_t));
 
     // bitset container  (best done with runs)
     for (uint32_t i = 0; i < 50000; i++) {
@@ -2804,7 +2814,7 @@ DEFINE_TEST(test_run_to_self) {
 
 DEFINE_TEST(test_remove_run_to_bitset) {
     int ans_ctr = 0;
-    uint32_t *ans = (uint32_t*)calloc(100000, sizeof(int32_t));
+    uint32_t *ans = (uint32_t *)calloc(100000, sizeof(int32_t));
 
     // bitset container  (best done with runs)
     for (uint32_t i = 0; i < 50000; i++) {
@@ -2831,7 +2841,7 @@ DEFINE_TEST(test_remove_run_to_bitset) {
 
 DEFINE_TEST(test_remove_run_to_array) {
     int ans_ctr = 0;
-    uint32_t *ans = (uint32_t*)calloc(100000, sizeof(int32_t));
+    uint32_t *ans = (uint32_t *)calloc(100000, sizeof(int32_t));
 
     // array  (best done with runs)
     for (uint32_t i = 0; i < 500; i++) {
@@ -2856,10 +2866,9 @@ DEFINE_TEST(test_remove_run_to_array) {
     free(ans);
 }
 
-
 DEFINE_TEST(test_remove_run_to_bitset_cow) {
     int ans_ctr = 0;
-    uint32_t *ans = (uint32_t*)calloc(100000, sizeof(int32_t));
+    uint32_t *ans = (uint32_t *)calloc(100000, sizeof(int32_t));
 
     // bitset container  (best done with runs)
     for (uint32_t i = 0; i < 50000; i++) {
@@ -2890,7 +2899,7 @@ DEFINE_TEST(test_remove_run_to_bitset_cow) {
 
 DEFINE_TEST(test_remove_run_to_array_cow) {
     int ans_ctr = 0;
-    uint32_t *ans = (uint32_t*)calloc(100000, sizeof(int32_t));
+    uint32_t *ans = (uint32_t *)calloc(100000, sizeof(int32_t));
 
     // array  (best done with runs)
     for (uint32_t i = 0; i < 500; i++) {
@@ -3063,7 +3072,7 @@ void test_negation_helper(bool runopt, uint32_t gap) {
         assert_true(hasrun);
     }
 
-    int orig_card = (int) roaring_bitmap_get_cardinality(r1);
+    int orig_card = (int)roaring_bitmap_get_cardinality(r1);
 
     // get the first batch of ones but not the second
     roaring_bitmap_t *notted_r1 = roaring_bitmap_flip(r1, 0U, 100000U);
@@ -3267,7 +3276,7 @@ void test_inplace_negation_helper(bool runopt, uint32_t gap) {
         assert_true(hasrun);
     }
 
-    int orig_card = (int) roaring_bitmap_get_cardinality(r1);
+    int orig_card = (int)roaring_bitmap_get_cardinality(r1);
     roaring_bitmap_t *r1_orig = roaring_bitmap_copy(r1);
 
     // get the first batch of ones but not the second
@@ -3334,8 +3343,8 @@ DEFINE_TEST(test_rand_flips) {
     const int min_runs = 1;
     const int flip_trials = 5;  // these are expensive tests
     const int range = 2000000;
-    char *input = (char*)malloc(range);
-    char *output = (char*)malloc(range);
+    char *input = (char *)malloc(range);
+    char *output = (char *)malloc(range);
 
     for (int card = 2; card < 1000000; card *= 8) {
         printf("test_rand_flips with attempted card %d", card);
@@ -3392,8 +3401,8 @@ DEFINE_TEST(test_inplace_rand_flips) {
     const int min_runs = 1;
     const int flip_trials = 5;  // these are expensive tests
     const int range = 2000000;
-    char *input = (char*)malloc(range);
-    char *output = (char*)malloc(range);
+    char *input = (char *)malloc(range);
+    char *output = (char *)malloc(range);
 
     for (int card = 2; card < 1000000; card *= 8) {
         roaring_bitmap_t *r = roaring_bitmap_create();
@@ -3479,18 +3488,18 @@ DEFINE_TEST(select_test) {
     srand(1234);
     const int min_runs = 1;
     const uint32_t range = 2000000;
-    char *input = (char*)malloc(range);
+    char *input = (char *)malloc(range);
 
     for (int card = 2; card < 1000000; card *= 8) {
-
         roaring_bitmap_t *r = roaring_bitmap_create();
         memset(input, 0, range);
         for (int i = 0; i < card; ++i) {
             double f1 = our_rand() / (double)OUR_RAND_MAX;
             double f2 = our_rand() / (double)OUR_RAND_MAX;
             double f3 = our_rand() / (double)OUR_RAND_MAX;
-            uint32_t pos = (uint32_t)(f1 * f2 * f3 *
-                            range);  // denser at the start, sparser at end
+            uint32_t pos =
+                (uint32_t)(f1 * f2 * f3 *
+                           range);  // denser at the start, sparser at end
             assert_true(pos < range);
             roaring_bitmap_add(r, pos);
             input[pos] = 1;
@@ -3589,7 +3598,7 @@ DEFINE_TEST(test_rank) {
             roaring_bitmap_add(r, x);
         }
         uint64_t card = roaring_bitmap_get_cardinality(r);
-        uint32_t *ans = (uint32_t*)malloc(card * sizeof(uint32_t));
+        uint32_t *ans = (uint32_t *)malloc(card * sizeof(uint32_t));
         roaring_bitmap_to_uint32_array(r, ans);
         for (uint32_t z = 0; z < 1000 + mymin + 10; z += 10) {
             uint64_t truerank = rank(ans, card, z);
@@ -3605,7 +3614,7 @@ DEFINE_TEST(test_rank) {
             roaring_bitmap_add(r, x);
         }
         card = roaring_bitmap_get_cardinality(r);
-        ans = (uint32_t*)malloc(card * sizeof(uint32_t));
+        ans = (uint32_t *)malloc(card * sizeof(uint32_t));
         roaring_bitmap_to_uint32_array(r, ans);
         for (uint32_t z = 0; z < 64000 + mymin + 10; z += 10) {
             uint64_t truerank = rank(ans, card, z);
@@ -3622,7 +3631,7 @@ DEFINE_TEST(test_rank) {
         }
         roaring_bitmap_run_optimize(r);
         card = roaring_bitmap_get_cardinality(r);
-        ans = (uint32_t*)malloc(card * sizeof(uint32_t));
+        ans = (uint32_t *)malloc(card * sizeof(uint32_t));
         roaring_bitmap_to_uint32_array(r, ans);
         for (uint32_t z = 0; z < 64000 + mymin + 10; z += 10) {
             uint64_t truerank = rank(ans, card, z);
@@ -3711,7 +3720,6 @@ DEFINE_TEST(test_intersect_small_run_bitset) {
     roaring_bitmap_free(rb2);
 }
 
-
 DEFINE_TEST(issue316) {
     roaring_bitmap_t *rb1 = roaring_bitmap_create();
     roaring_bitmap_set_copy_on_write(rb1, true);
@@ -3775,9 +3783,9 @@ DEFINE_TEST(test_subset) {
 }
 
 DEFINE_TEST(test_or_many_memory_leak) {
-    for(int i=0; i<10; i++) {
+    for (int i = 0; i < 10; i++) {
         roaring_bitmap_t *bm1 = roaring_bitmap_create();
-        for(int j=0; j<10; j++) {
+        for (int j = 0; j < 10; j++) {
             roaring_bitmap_t *bm2 = roaring_bitmap_create();
             const roaring_bitmap_t *buff[] = {bm1, bm2};
             roaring_bitmap_t *bm3 = roaring_bitmap_or_many(2, buff);
@@ -3789,74 +3797,74 @@ DEFINE_TEST(test_or_many_memory_leak) {
 }
 
 void test_iterator_generate_data(uint32_t **values_out, uint32_t *count_out) {
-    const size_t capacity = 1000*1000;
-    uint32_t* values =
-             (uint32_t*)malloc(sizeof(uint32_t) * capacity);  // ascending order
+    const size_t capacity = 1000 * 1000;
+    uint32_t *values =
+        (uint32_t *)malloc(sizeof(uint32_t) * capacity);  // ascending order
     uint32_t count = 0;
-    uint32_t base = 1234; // container index
+    uint32_t base = 1234;  // container index
 
     // min allowed value
     values[count++] = 0;
 
     // only the very first value in container is set
-    values[count++] = base*65536;
+    values[count++] = base * 65536;
     base += 2;
 
     // only the very last value in container is set
-    values[count++] = base*65536 + 65535;
+    values[count++] = base * 65536 + 65535;
     base += 2;
 
     // fully filled container
     for (uint32_t i = 0; i < 65536; i++) {
-        values[count++] = base*65536 + i;
+        values[count++] = base * 65536 + i;
     }
     base += 2;
 
     // even values
     for (uint32_t i = 0; i < 65536; i += 2) {
-        values[count++] = base*65536 + i;
+        values[count++] = base * 65536 + i;
     }
     base += 2;
 
     // odd values
     for (uint32_t i = 1; i < 65536; i += 2) {
-        values[count++] = base*65536 + i;
+        values[count++] = base * 65536 + i;
     }
     base += 2;
 
     // each next 64-bit word is ROR'd by one
     for (uint32_t i = 0; i < 65536; i += 65) {
-        values[count++] = base*65536 + i;
+        values[count++] = base * 65536 + i;
     }
     base += 2;
 
     // runs of increasing length: 0, 1,0, 1,1,0, 1,1,1,0, ...
     for (uint32_t i = 0, run_index = 0; i < 65536; i++) {
-      if (i != (run_index+1)*(run_index+2)/2-1) {
-        values[count++] = base*65536 + i;
-      } else {
-        run_index++;
-      }
+        if (i != (run_index + 1) * (run_index + 2) / 2 - 1) {
+            values[count++] = base * 65536 + i;
+        } else {
+            run_index++;
+        }
     }
     base += 2;
 
     // 00000XX, XXXXXX, XX0000
-    for (uint32_t i = 65536-100; i < 65536; i++) {
-        values[count++] = base*65536 + i;
+    for (uint32_t i = 65536 - 100; i < 65536; i++) {
+        values[count++] = base * 65536 + i;
     }
     base += 1;
     for (uint32_t i = 0; i < 65536; i++) {
-        values[count++] = base*65536 + i;
+        values[count++] = base * 65536 + i;
     }
     base += 1;
     for (uint32_t i = 0; i < 100; i++) {
-        values[count++] = base*65536 + i;
+        values[count++] = base * 65536 + i;
     }
     base += 2;
 
     // random
-    for (int i = 0; i < 65536; i += our_rand()%10+1) {
-        values[count++] = base*65536 + i;
+    for (int i = 0; i < 65536; i += our_rand() % 10 + 1) {
+        values[count++] = base * 65536 + i;
     }
     base += 2;
 
@@ -3870,12 +3878,14 @@ void test_iterator_generate_data(uint32_t **values_out, uint32_t *count_out) {
 
 /*
  * Read bitmap in steps of given size, compare with reference values.
- * If step is UINT32_MAX (special value), then read single non-empty container at a time.
+ * If step is UINT32_MAX (special value), then read single non-empty container
+ * at a time.
  */
-void read_compare(roaring_bitmap_t* r, const uint32_t* ref_values, uint32_t ref_count, uint32_t step) {
+void read_compare(roaring_bitmap_t *r, const uint32_t *ref_values,
+                  uint32_t ref_count, uint32_t step) {
     roaring_uint32_iterator_t *iter = roaring_create_iterator(r);
-    uint32_t* buffer = (uint32_t*)malloc(
-            sizeof(uint32_t) * (step == UINT32_MAX ? 65536 : step));
+    uint32_t *buffer = (uint32_t *)malloc(sizeof(uint32_t) *
+                                          (step == UINT32_MAX ? 65536 : step));
     while (ref_count > 0) {
         assert_true(iter->has_value == true);
         assert_true(iter->current_value == ref_values[0]);
@@ -3884,7 +3894,7 @@ void read_compare(roaring_bitmap_t* r, const uint32_t* ref_values, uint32_t ref_
         if (step == UINT32_MAX) {
             num_ask = 0;
             for (uint32_t i = 0; i < ref_count; i++) {
-                if ((ref_values[i]>>16) == (ref_values[0]>>16)) {
+                if ((ref_values[i] >> 16) == (ref_values[0] >> 16)) {
                     num_ask++;
                 } else {
                     break;
@@ -3913,7 +3923,7 @@ void read_compare(roaring_bitmap_t* r, const uint32_t* ref_values, uint32_t ref_
 }
 
 void test_read_uint32_iterator(uint8_t type) {
-    uint32_t* ref_values;
+    uint32_t *ref_values;
     uint32_t ref_count;
     test_iterator_generate_data(&ref_values, &ref_count);
 
@@ -3928,9 +3938,9 @@ void test_read_uint32_iterator(uint8_t type) {
     read_compare(r, ref_values, ref_count, 1);
     read_compare(r, ref_values, ref_count, 2);
     read_compare(r, ref_values, ref_count, 7);
-    read_compare(r, ref_values, ref_count, ref_count-1);
+    read_compare(r, ref_values, ref_count, ref_count - 1);
     read_compare(r, ref_values, ref_count, ref_count);
-    read_compare(r, ref_values, ref_count, UINT32_MAX); // special value
+    read_compare(r, ref_values, ref_count, UINT32_MAX);  // special value
 
     roaring_bitmap_free(r);
     free(ref_values);
@@ -3946,11 +3956,11 @@ DEFINE_TEST(test_read_uint32_iterator_run) {
     test_read_uint32_iterator(RUN_CONTAINER_TYPE);
 }
 DEFINE_TEST(test_read_uint32_iterator_native) {
-    test_read_uint32_iterator(UINT8_MAX); // special value
+    test_read_uint32_iterator(UINT8_MAX);  // special value
 }
 
 void test_previous_iterator(uint8_t type) {
-    uint32_t* ref_values;
+    uint32_t *ref_values;
     uint32_t ref_count;
     test_iterator_generate_data(&ref_values, &ref_count);
 
@@ -3969,7 +3979,7 @@ void test_previous_iterator(uint8_t type) {
     do {
         assert_true(iterator.has_value);
         ++count;
-        assert_true((int64_t)ref_count - (int64_t)count >= 0); // sanity check
+        assert_true((int64_t)ref_count - (int64_t)count >= 0);  // sanity check
         assert_true(ref_values[ref_count - count] == iterator.current_value);
     } while (roaring_previous_uint32_iterator(&iterator));
 
@@ -3992,17 +4002,17 @@ DEFINE_TEST(test_previous_iterator_run) {
 }
 
 DEFINE_TEST(test_previous_iterator_native) {
-    test_previous_iterator(UINT8_MAX); // special value
+    test_previous_iterator(UINT8_MAX);  // special value
 }
 
-void test_iterator_reuse_retry_count(int retry_count){
-    uint32_t* ref_values;
+void test_iterator_reuse_retry_count(int retry_count) {
+    uint32_t *ref_values;
     uint32_t ref_count;
     test_iterator_generate_data(&ref_values, &ref_count);
 
-    roaring_bitmap_t* with_edges = roaring_bitmap_create();
+    roaring_bitmap_t *with_edges = roaring_bitmap_create();
     // We don't want min and max values inside this bitmap
-    roaring_bitmap_t* without_edges = roaring_bitmap_create();
+    roaring_bitmap_t *without_edges = roaring_bitmap_create();
 
     for (uint32_t i = 0; i < ref_count; i++) {
         roaring_bitmap_add(with_edges, ref_values[i]);
@@ -4016,12 +4026,13 @@ void test_iterator_reuse_retry_count(int retry_count){
     assert_true(roaring_bitmap_contains(with_edges, UINT32_MAX));
     assert_true(!roaring_bitmap_contains(without_edges, 0));
     assert_true(!roaring_bitmap_contains(without_edges, UINT32_MAX));
-    assert_true(roaring_bitmap_get_cardinality(with_edges) - 2 == roaring_bitmap_get_cardinality(without_edges));
+    assert_true(roaring_bitmap_get_cardinality(with_edges) - 2 ==
+                roaring_bitmap_get_cardinality(without_edges));
 
-    const roaring_bitmap_t* bitmaps[] = {with_edges, without_edges};
+    const roaring_bitmap_t *bitmaps[] = {with_edges, without_edges};
     int num_bitmaps = sizeof(bitmaps) / sizeof(bitmaps[0]);
 
-    for (int i = 0; i < num_bitmaps; ++i){
+    for (int i = 0; i < num_bitmaps; ++i) {
         roaring_uint32_iterator_t iterator;
         roaring_init_iterator(bitmaps[i], &iterator);
         assert_true(iterator.has_value);
@@ -4058,145 +4069,144 @@ void test_iterator_reuse_retry_count(int retry_count){
         assert_true(first_value == iterator.current_value);
     }
 
-
     roaring_bitmap_free(without_edges);
     roaring_bitmap_free(with_edges);
     free(ref_values);
 }
 
-DEFINE_TEST(test_iterator_reuse) {
-    test_iterator_reuse_retry_count(0);
-}
+DEFINE_TEST(test_iterator_reuse) { test_iterator_reuse_retry_count(0); }
 
-DEFINE_TEST(test_iterator_reuse_many) {
-    test_iterator_reuse_retry_count(10);
-}
+DEFINE_TEST(test_iterator_reuse_many) { test_iterator_reuse_retry_count(10); }
 
 DEFINE_TEST(test_add_range) {
     // autoconversion: BITSET -> BITSET -> RUN
     {
-      sbs_t* sbs = sbs_create();
-      sbs_add_value(sbs, 100);
-      sbs_convert(sbs, BITSET_CONTAINER_TYPE);
-      sbs_add_range(sbs, 0, 299);
-      assert_true(sbs_check_type(sbs, BITSET_CONTAINER_TYPE));
-      sbs_add_range(sbs, 301, 65535);
-      assert_true(sbs_check_type(sbs, BITSET_CONTAINER_TYPE));
-      // after and only after BITSET becomes [0, 65535], it is converted to RUN
-      sbs_add_range(sbs, 300, 300);
-      assert_true(sbs_check_type(sbs, RUN_CONTAINER_TYPE));
-      sbs_compare(sbs);
-      sbs_free(sbs);
+        sbs_t *sbs = sbs_create();
+        sbs_add_value(sbs, 100);
+        sbs_convert(sbs, BITSET_CONTAINER_TYPE);
+        sbs_add_range(sbs, 0, 299);
+        assert_true(sbs_check_type(sbs, BITSET_CONTAINER_TYPE));
+        sbs_add_range(sbs, 301, 65535);
+        assert_true(sbs_check_type(sbs, BITSET_CONTAINER_TYPE));
+        // after and only after BITSET becomes [0, 65535], it is converted to
+        // RUN
+        sbs_add_range(sbs, 300, 300);
+        assert_true(sbs_check_type(sbs, RUN_CONTAINER_TYPE));
+        sbs_compare(sbs);
+        sbs_free(sbs);
     }
 
     // autoconversion: ARRAY -> ARRAY -> BITSET
     {
-      sbs_t* sbs = sbs_create();
-      sbs_add_value(sbs, 100);
-      sbs_convert(sbs, ARRAY_CONTAINER_TYPE);
+        sbs_t *sbs = sbs_create();
+        sbs_add_value(sbs, 100);
+        sbs_convert(sbs, ARRAY_CONTAINER_TYPE);
 
-      // unless threshold was hit, it is still ARRAY
-      for (int i = 0; i < 100; i += 2) {
-        sbs_add_value(sbs, i);
-        assert_true(sbs_check_type(sbs, ARRAY_CONTAINER_TYPE));
-      }
+        // unless threshold was hit, it is still ARRAY
+        for (int i = 0; i < 100; i += 2) {
+            sbs_add_value(sbs, i);
+            assert_true(sbs_check_type(sbs, ARRAY_CONTAINER_TYPE));
+        }
 
-      // after threshold on number of elements was hit, it is converted to BITSET
-      for (int i = 0; i < 65535; i += 2) {
-        sbs_add_value(sbs, i);
-      }
-      assert_true(sbs_check_type(sbs, BITSET_CONTAINER_TYPE));
+        // after threshold on number of elements was hit, it is converted to
+        // BITSET
+        for (int i = 0; i < 65535; i += 2) {
+            sbs_add_value(sbs, i);
+        }
+        assert_true(sbs_check_type(sbs, BITSET_CONTAINER_TYPE));
 
-      sbs_compare(sbs);
-      sbs_free(sbs);
+        sbs_compare(sbs);
+        sbs_free(sbs);
     }
 
-     // autoconversion: ARRAY -> RUN
-     {
-      sbs_t* sbs = sbs_create();
-      sbs_add_range(sbs, 0, 100);
-      sbs_convert(sbs, ARRAY_CONTAINER_TYPE);
+    // autoconversion: ARRAY -> RUN
+    {
+        sbs_t *sbs = sbs_create();
+        sbs_add_range(sbs, 0, 100);
+        sbs_convert(sbs, ARRAY_CONTAINER_TYPE);
 
-      // after ARRAY becomes full [0, 65535], it is converted to RUN
-      sbs_add_range(sbs, 100, 65535);
-      assert_true(sbs_check_type(sbs, RUN_CONTAINER_TYPE));
+        // after ARRAY becomes full [0, 65535], it is converted to RUN
+        sbs_add_range(sbs, 100, 65535);
+        assert_true(sbs_check_type(sbs, RUN_CONTAINER_TYPE));
 
-      sbs_compare(sbs);
-      sbs_free(sbs);
+        sbs_compare(sbs);
+        sbs_free(sbs);
     }
     // autoconversion: RUN -> RUN -> BITSET
     {
-      sbs_t* sbs = sbs_create();
-      // by default, RUN container is used
-      for (int i = 0; i < 100; i += 2) {
-        sbs_add_range(sbs, 4*i, 4*i + 1);
-        assert_true(sbs_check_type(sbs, RUN_CONTAINER_TYPE));
-      }
-      // after number of RLE runs exceeded threshold, it is converted to BITSET
-      for (int i = 0; i < 65535; i += 2) {
-        sbs_add_range(sbs, i, i);
-      }
-      assert_true(sbs_check_type(sbs, BITSET_CONTAINER_TYPE));
-      sbs_compare(sbs);
-      sbs_free(sbs);
+        sbs_t *sbs = sbs_create();
+        // by default, RUN container is used
+        for (int i = 0; i < 100; i += 2) {
+            sbs_add_range(sbs, 4 * i, 4 * i + 1);
+            assert_true(sbs_check_type(sbs, RUN_CONTAINER_TYPE));
+        }
+        // after number of RLE runs exceeded threshold, it is converted to
+        // BITSET
+        for (int i = 0; i < 65535; i += 2) {
+            sbs_add_range(sbs, i, i);
+        }
+        assert_true(sbs_check_type(sbs, BITSET_CONTAINER_TYPE));
+        sbs_compare(sbs);
+        sbs_free(sbs);
     }
 
     // autoconversion: ARRAY -> ARRAY -> BITSET
     {
-      sbs_t* sbs = sbs_create();
-      for (int i = 0; i < 100; i += 2) {
-        sbs_add_range(sbs, i, i);
-        assert_true(sbs_check_type(sbs, ARRAY_CONTAINER_TYPE));
-      }
-      // after number of RLE runs exceeded threshold, it is converted to BITSET
-      for (int i = 0; i < 65535; i += 2) {
-        sbs_add_range(sbs, i, i);
-      }
-      assert_true(sbs_check_type(sbs, BITSET_CONTAINER_TYPE));
-      sbs_compare(sbs);
-      sbs_free(sbs);
+        sbs_t *sbs = sbs_create();
+        for (int i = 0; i < 100; i += 2) {
+            sbs_add_range(sbs, i, i);
+            assert_true(sbs_check_type(sbs, ARRAY_CONTAINER_TYPE));
+        }
+        // after number of RLE runs exceeded threshold, it is converted to
+        // BITSET
+        for (int i = 0; i < 65535; i += 2) {
+            sbs_add_range(sbs, i, i);
+        }
+        assert_true(sbs_check_type(sbs, BITSET_CONTAINER_TYPE));
+        sbs_compare(sbs);
+        sbs_free(sbs);
     }
 
     // append new container to the end
     {
-      sbs_t* sbs = sbs_create();
-      sbs_add_value(sbs, 5);
-      sbs_add_range(sbs, 65536+5, 65536+20);
-      sbs_compare(sbs);
-      sbs_free(sbs);
+        sbs_t *sbs = sbs_create();
+        sbs_add_value(sbs, 5);
+        sbs_add_range(sbs, 65536 + 5, 65536 + 20);
+        sbs_compare(sbs);
+        sbs_free(sbs);
     }
 
     // prepend new container to the beginning
     {
-      sbs_t* sbs = sbs_create();
-      sbs_add_value(sbs, 65536*1+5);
-      sbs_add_range(sbs, 5, 20);
-      sbs_compare(sbs);
-      sbs_free(sbs);
+        sbs_t *sbs = sbs_create();
+        sbs_add_value(sbs, 65536 * 1 + 5);
+        sbs_add_range(sbs, 5, 20);
+        sbs_compare(sbs);
+        sbs_free(sbs);
     }
 
     // add new container between existing ones
     {
-      sbs_t* sbs = sbs_create();
-      sbs_add_value(sbs, 65536*0+5);
-      sbs_add_value(sbs, 65536*2+5);
-      sbs_add_range(sbs, 65536*1+5, 65536*1+20);
-      sbs_compare(sbs);
-      sbs_free(sbs);
+        sbs_t *sbs = sbs_create();
+        sbs_add_value(sbs, 65536 * 0 + 5);
+        sbs_add_value(sbs, 65536 * 2 + 5);
+        sbs_add_range(sbs, 65536 * 1 + 5, 65536 * 1 + 20);
+        sbs_compare(sbs);
+        sbs_free(sbs);
     }
 
     // invalid range
     {
-      sbs_t* sbs = sbs_create();
-      sbs_add_range(sbs, 200, 100);
-      sbs_compare(sbs);
-      sbs_free(sbs);
+        sbs_t *sbs = sbs_create();
+        sbs_add_range(sbs, 200, 100);
+        sbs_compare(sbs);
+        sbs_free(sbs);
     }
 
     // random data inside [0..span)
-    const uint32_t span = 16*65536;
+    const uint32_t span = 16 * 65536;
     for (uint32_t range_length = 1; range_length < 16384; range_length *= 3) {
-        sbs_t* sbs = sbs_create();
+        sbs_t *sbs = sbs_create();
         for (int i = 0; i < 50; i++) {
             uint32_t value = our_rand() % span;
             sbs_add_value(sbs, value);
@@ -4302,27 +4312,32 @@ DEFINE_TEST(test_remove_range) {
     // remove containers
     {
         sbs_t *sbs = sbs_create();
-        sbs_add_value(sbs, 65536*1+100);
-        sbs_add_value(sbs, 65536*3+100);
-        sbs_add_value(sbs, 65536*5+100);
-        sbs_add_value(sbs, 65536*7+100);
-        sbs_remove_range(sbs, 65536*3+0, 65536*3+65535); // from the middle
+        sbs_add_value(sbs, 65536 * 1 + 100);
+        sbs_add_value(sbs, 65536 * 3 + 100);
+        sbs_add_value(sbs, 65536 * 5 + 100);
+        sbs_add_value(sbs, 65536 * 7 + 100);
+        sbs_remove_range(sbs, 65536 * 3 + 0,
+                         65536 * 3 + 65535);  // from the middle
         sbs_compare(sbs);
-        sbs_remove_range(sbs, 65536*1+0, 65536*1+65535); // from the beginning
+        sbs_remove_range(sbs, 65536 * 1 + 0,
+                         65536 * 1 + 65535);  // from the beginning
         sbs_compare(sbs);
-        sbs_remove_range(sbs, 65536*7+0, 65536*7+65535); // from the end
+        sbs_remove_range(sbs, 65536 * 7 + 0,
+                         65536 * 7 + 65535);  // from the end
         sbs_compare(sbs);
-        sbs_remove_range(sbs, 65536*5+0, 65536*5+65535); // the last one
+        sbs_remove_range(sbs, 65536 * 5 + 0,
+                         65536 * 5 + 65535);  // the last one
         sbs_compare(sbs);
-        sbs_remove_range(sbs, 65536*9+0, 65536*9+65535); // non-existent
+        sbs_remove_range(sbs, 65536 * 9 + 0,
+                         65536 * 9 + 65535);  // non-existent
         sbs_compare(sbs);
         sbs_free(sbs);
     }
 
     // random data inside [0..span)
-    const uint32_t span = 16*65536;
+    const uint32_t span = 16 * 65536;
     for (uint32_t range_length = 3; range_length <= 16384; range_length *= 3) {
-        sbs_t* sbs = sbs_create();
+        sbs_t *sbs = sbs_create();
         for (int i = 0; i < 50; i++) {
             uint64_t range_start = our_rand() % (span - range_length);
             sbs_add_range(sbs, range_start, range_start + range_length - 1);
@@ -4340,9 +4355,10 @@ DEFINE_TEST(test_remove_many) {
     // multiple values per container (sorted)
     {
         sbs_t *sbs = sbs_create();
-        sbs_add_range(sbs, 0, 65536*2-1);
-        uint32_t values[] = {1, 3, 5, 7, 65536+1, 65536+3, 65536+5, 65536+7};
-        sbs_remove_many(sbs, sizeof(values)/sizeof(values[0]), values);
+        sbs_add_range(sbs, 0, 65536 * 2 - 1);
+        uint32_t values[] = {1,         3,         5,         7,
+                             65536 + 1, 65536 + 3, 65536 + 5, 65536 + 7};
+        sbs_remove_many(sbs, sizeof(values) / sizeof(values[0]), values);
         sbs_compare(sbs);
         sbs_free(sbs);
     }
@@ -4350,9 +4366,10 @@ DEFINE_TEST(test_remove_many) {
     // multiple values per container (interleaved)
     {
         sbs_t *sbs = sbs_create();
-        sbs_add_range(sbs, 0, 65536*2-1);
-        uint32_t values[] = {65536+7, 65536+5, 7, 5, 1, 65536+1, 65536+3, 3};
-        sbs_remove_many(sbs, sizeof(values)/sizeof(values[0]), values);
+        sbs_add_range(sbs, 0, 65536 * 2 - 1);
+        uint32_t values[] = {65536 + 7, 65536 + 5, 7,         5,
+                             1,         65536 + 1, 65536 + 3, 3};
+        sbs_remove_many(sbs, sizeof(values) / sizeof(values[0]), values);
         sbs_compare(sbs);
         sbs_free(sbs);
     }
@@ -4361,9 +4378,9 @@ DEFINE_TEST(test_remove_many) {
     {
         sbs_t *sbs = sbs_create();
         sbs_add_value(sbs, 500);
-        uint32_t values[] = {501, 80000}; // non-existent value/container
-        sbs_remove_many(sbs, sizeof(values)/sizeof(values[0]), values);
-        sbs_remove_many(sbs, 0, NULL); // NULL ptr is not dereferenced
+        uint32_t values[] = {501, 80000};  // non-existent value/container
+        sbs_remove_many(sbs, sizeof(values) / sizeof(values[0]), values);
+        sbs_remove_many(sbs, 0, NULL);  // NULL ptr is not dereferenced
         sbs_compare(sbs);
         sbs_free(sbs);
     }
@@ -4374,51 +4391,57 @@ DEFINE_TEST(test_remove_many) {
         sbs_add_range(sbs, 0, 65535);
         for (uint32_t v = 0; v <= 65535; v++) {
             sbs_remove_many(sbs, 1, &v);
-            assert_true(roaring_bitmap_get_cardinality(sbs->roaring) == 65535-v);
+            assert_true(roaring_bitmap_get_cardinality(sbs->roaring) ==
+                        65535 - v);
         }
         assert_true(sbs_is_empty(sbs));
         sbs_free(sbs);
     }
-
 }
 
 DEFINE_TEST(test_range_cardinality) {
     const uint64_t s = 65536;
 
     roaring_bitmap_t *r = roaring_bitmap_create();
-    roaring_bitmap_add_range(r, s*2, s*10);
+    roaring_bitmap_add_range(r, s * 2, s * 10);
 
     // single container (minhb == maxhb)
-    assert_true(roaring_bitmap_range_cardinality(r, s*2, s*3) == s);
-    assert_true(roaring_bitmap_range_cardinality(r, s*2+100, s*3) == s-100);
-    assert_true(roaring_bitmap_range_cardinality(r, s*2, s*3-200) == s-200);
-    assert_true(roaring_bitmap_range_cardinality(r, s*2+100, s*3-200) == s-300);
+    assert_true(roaring_bitmap_range_cardinality(r, s * 2, s * 3) == s);
+    assert_true(roaring_bitmap_range_cardinality(r, s * 2 + 100, s * 3) ==
+                s - 100);
+    assert_true(roaring_bitmap_range_cardinality(r, s * 2, s * 3 - 200) ==
+                s - 200);
+    assert_true(roaring_bitmap_range_cardinality(r, s * 2 + 100, s * 3 - 200) ==
+                s - 300);
 
     // multiple containers (maxhb > minhb)
-    assert_true(roaring_bitmap_range_cardinality(r, s*2, s*5) == s*3);
-    assert_true(roaring_bitmap_range_cardinality(r, s*2+100, s*5) == s*3-100);
-    assert_true(roaring_bitmap_range_cardinality(r, s*2, s*5-200) == s*3-200);
-    assert_true(roaring_bitmap_range_cardinality(r, s*2+100, s*5-200) == s*3-300);
+    assert_true(roaring_bitmap_range_cardinality(r, s * 2, s * 5) == s * 3);
+    assert_true(roaring_bitmap_range_cardinality(r, s * 2 + 100, s * 5) ==
+                s * 3 - 100);
+    assert_true(roaring_bitmap_range_cardinality(r, s * 2, s * 5 - 200) ==
+                s * 3 - 200);
+    assert_true(roaring_bitmap_range_cardinality(r, s * 2 + 100, s * 5 - 200) ==
+                s * 3 - 300);
 
     // boundary checks
-    assert_true(roaring_bitmap_range_cardinality(r, s*20, s*21) == 0);
+    assert_true(roaring_bitmap_range_cardinality(r, s * 20, s * 21) == 0);
     assert_true(roaring_bitmap_range_cardinality(r, 100, 100) == 0);
-    assert_true(roaring_bitmap_range_cardinality(r, 0, s*7) == s*5);
-    assert_true(roaring_bitmap_range_cardinality(r, s*7, UINT64_MAX) == s*3);
+    assert_true(roaring_bitmap_range_cardinality(r, 0, s * 7) == s * 5);
+    assert_true(roaring_bitmap_range_cardinality(r, s * 7, UINT64_MAX) ==
+                s * 3);
 
     roaring_bitmap_free(r);
 }
 
 void frozen_serialization_compare(roaring_bitmap_t *r1) {
     size_t num_bytes = roaring_bitmap_frozen_size_in_bytes(r1);
-    char *buf = (char*)roaring_aligned_malloc(32, num_bytes);
+    char *buf = (char *)roaring_aligned_malloc(32, num_bytes);
     roaring_bitmap_frozen_serialize(r1, buf);
 
-    const roaring_bitmap_t *r2 =
-        roaring_bitmap_frozen_view(buf, num_bytes);
+    const roaring_bitmap_t *r2 = roaring_bitmap_frozen_view(buf, num_bytes);
 
     assert_true(roaring_bitmap_equals(r1, r2));
-    assert_true(roaring_bitmap_frozen_view(buf+1, num_bytes-1) == NULL);
+    assert_true(roaring_bitmap_frozen_view(buf + 1, num_bytes - 1) == NULL);
 
     roaring_bitmap_free(r1);
     roaring_bitmap_free(r2);
@@ -4435,12 +4458,12 @@ DEFINE_TEST(test_frozen_serialization) {
     roaring_bitmap_add(r, 2000);
     roaring_bitmap_add(r, 100000);
     roaring_bitmap_add(r, 200000);
-    roaring_bitmap_add_range(r, s*10 + 100, s*13 - 100);
-    for (uint64_t i = 0; i < s*3; i += 2) {
-        roaring_bitmap_add(r, s*20 + i);
+    roaring_bitmap_add_range(r, s * 10 + 100, s * 13 - 100);
+    for (uint64_t i = 0; i < s * 3; i += 2) {
+        roaring_bitmap_add(r, s * 20 + i);
     }
     roaring_bitmap_run_optimize(r);
-    //roaring_bitmap_printf_describe(r);
+    // roaring_bitmap_printf_describe(r);
     frozen_serialization_compare(r);
 }
 
@@ -4465,7 +4488,7 @@ DEFINE_TEST(test_portable_deserialize_frozen) {
         roaring_bitmap_add(r1, 3 * i);
 
     uint32_t expectedsize = roaring_bitmap_portable_size_in_bytes(r1);
-    char *serialized = (char*)malloc(expectedsize);
+    char *serialized = (char *)malloc(expectedsize);
     serialize_len = roaring_bitmap_portable_serialize(r1, serialized);
     assert_int_equal(serialize_len, expectedsize);
     r2 = roaring_bitmap_portable_deserialize_frozen(serialized);
@@ -4490,7 +4513,7 @@ DEFINE_TEST(test_portable_deserialize_frozen) {
     r1 = roaring_bitmap_of(6, 2946000, 2997491, 10478289, 10490227, 10502444,
                            19866827);
     expectedsize = roaring_bitmap_portable_size_in_bytes(r1);
-    serialized = (char*)malloc(expectedsize);
+    serialized = (char *)malloc(expectedsize);
     serialize_len = roaring_bitmap_portable_serialize(r1, serialized);
     assert_int_equal(serialize_len, expectedsize);
     assert_int_equal(serialize_len, expectedsize);
@@ -4523,7 +4546,7 @@ DEFINE_TEST(test_portable_deserialize_frozen) {
 
     roaring_bitmap_run_optimize(r1);
     expectedsize = roaring_bitmap_portable_size_in_bytes(r1);
-    serialized = (char*)malloc(expectedsize);
+    serialized = (char *)malloc(expectedsize);
     serialize_len = roaring_bitmap_portable_serialize(r1, serialized);
     assert_int_equal(serialize_len, expectedsize);
 
@@ -4549,23 +4572,23 @@ DEFINE_TEST(test_portable_deserialize_frozen) {
 
 DEFINE_TEST(convert_to_bitset) {
     roaring_bitmap_t *r1 = roaring_bitmap_create();
-    for (uint32_t i = 100; i < 100000; i+= 1 + (i%5)) {
-     roaring_bitmap_add(r1, i);
+    for (uint32_t i = 100; i < 100000; i += 1 + (i % 5)) {
+        roaring_bitmap_add(r1, i);
     }
-    for (uint32_t i = 100000; i < 500000; i+= 100) {
-     roaring_bitmap_add(r1, i);
+    for (uint32_t i = 100000; i < 500000; i += 100) {
+        roaring_bitmap_add(r1, i);
     }
     roaring_bitmap_add_range(r1, 500000, 600000);
-    bitset_t * bitset = bitset_create();
+    bitset_t *bitset = bitset_create();
     bool success = roaring_bitmap_to_bitset(r1, bitset);
-    assert_true(success); // could fail due to memory allocation.
+    assert_true(success);  // could fail due to memory allocation.
     assert_true(bitset_count(bitset) == roaring_bitmap_get_cardinality(r1));
     // You can then query the bitset:
-    for (uint32_t i = 100; i < 100000; i+= 1 + (i%5)) {
-        assert_true(bitset_get(bitset,i));
+    for (uint32_t i = 100; i < 100000; i += 1 + (i % 5)) {
+        assert_true(bitset_get(bitset, i));
     }
-    for (uint32_t i = 100000; i < 500000; i+= 100) {
-        assert_true(bitset_get(bitset,i));
+    for (uint32_t i = 100000; i < 500000; i += 100) {
+        assert_true(bitset_get(bitset, i));
     }
     // you must free the memory:
     bitset_free(bitset);

From f3fe3f6c9ac39f5546647662a3bb81d5494bc829 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Tue, 26 Sep 2023 16:16:53 -0400
Subject: [PATCH 159/162] Version bump

---
 CMakeLists.txt                    | 4 ++--
 doxygen                           | 2 +-
 include/roaring/roaring_version.h | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5df00fcdd..a3d505cbb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -17,8 +17,8 @@ endif()
 set(ROARING_LIB_NAME roaring)
 set(PROJECT_VERSION_MAJOR 2)
 set(PROJECT_VERSION_MINOR 0)
-set(PROJECT_VERSION_PATCH 0)
-set(ROARING_LIB_VERSION "2.0.0" CACHE STRING "Roaring library version")
+set(PROJECT_VERSION_PATCH 1)
+set(ROARING_LIB_VERSION "2.0.1" CACHE STRING "Roaring library version")
 set(ROARING_LIB_SOVERSION "13" CACHE STRING "Roaring library soversion")
 
 option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON)
diff --git a/doxygen b/doxygen
index 7adc253f4..571e70754 100644
--- a/doxygen
+++ b/doxygen
@@ -48,7 +48,7 @@ PROJECT_NAME           = "CRoaring"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "2.0.0"
+PROJECT_NUMBER         = "2.0.1"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/include/roaring/roaring_version.h b/include/roaring/roaring_version.h
index 95d997074..fbc83b2ec 100644
--- a/include/roaring/roaring_version.h
+++ b/include/roaring/roaring_version.h
@@ -1,10 +1,10 @@
 // /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand 
 #ifndef ROARING_INCLUDE_ROARING_VERSION 
 #define ROARING_INCLUDE_ROARING_VERSION 
-#define ROARING_VERSION "2.0.0"
+#define ROARING_VERSION "2.0.1"
 enum { 
     ROARING_VERSION_MAJOR = 2,
     ROARING_VERSION_MINOR = 0,
-    ROARING_VERSION_REVISION = 0
+    ROARING_VERSION_REVISION = 1
 }; 
 #endif // ROARING_INCLUDE_ROARING_VERSION 

From 0a5833d8fd73a036df826fc5546fa396c27b5f93 Mon Sep 17 00:00:00 2001
From: Gabriela Gutierrez <gabigutierrez@google.com>
Date: Tue, 26 Sep 2023 21:52:56 -0300
Subject: [PATCH 160/162] Create dependabot.yml (#511)

Signed-off-by: Gabriela Gutierrez <gabigutierrez@google.com>
---
 .github/dependabot.yml | 8 ++++++++
 1 file changed, 8 insertions(+)
 create mode 100644 .github/dependabot.yml

diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 000000000..31e0e42d6
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,8 @@
+# Set update schedule for GitHub Actions
+version: 2
+updates:
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "monthly"
+    open-pull-requests-limit: 5

From 5f55c3f2aa06ef184c01d0e848c0e31fceb9dbc3 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Tue, 26 Sep 2023 20:54:31 -0400
Subject: [PATCH 161/162] Update

---
 fuzz/croaring_fuzzer.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fuzz/croaring_fuzzer.c b/fuzz/croaring_fuzzer.c
index 10a5391a5..1e62749a9 100644
--- a/fuzz/croaring_fuzzer.c
+++ b/fuzz/croaring_fuzzer.c
@@ -28,17 +28,17 @@ int LLVMFuzzerTestOneInput(const char *data, size_t size) {
         // The bitmap may not be usable if it does not follow the specification.
         // We can validate the bitmap we recovered to make sure it is proper.
         const char *reason_failure = NULL;
-        if (roaring_bitmap_internal_validate(t, &reason_failure)) {
+        if (roaring_bitmap_internal_validate(bitmap, &reason_failure)) {
             // the bitmap is ok!
-            uint32_t cardinality = roaring_bitmap_get_cardinality(t);
+            uint32_t cardinality = roaring_bitmap_get_cardinality(bitmap);
 
             for (uint32_t i = 100; i < 1000; i++) {
-                if (!roaring_bitmap_contains(t, i)) {
+                if (!roaring_bitmap_contains(bitmap, i)) {
                     cardinality++;
-                    roaring_bitmap_add(r1, i);
+                    roaring_bitmap_add(bitmap, i);
                 }
             }
-            uint32_t new_cardinality = roaring_bitmap_get_cardinality(t);
+            uint32_t new_cardinality = roaring_bitmap_get_cardinality(bitmap);
             if (cardinality != new_cardinality) {
                 printf("bug\n");
                 exit(1);

From 0b40505fbebdc043bc293bb0fb778194e9412376 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Wed, 27 Sep 2023 12:11:07 -0400
Subject: [PATCH 162/162] Caught a bug in roaring_bitmap_internal_validate
 (#516)

* Caught a bug in roaring_bitmap_internal_validate

* Putting back static.

* Comments.

---------

Co-authored-by: Daniel Lemire <dlemire@lemire.me>
---
 src/containers/run.c  |  5 ++++-
 tests/toplevel_unit.c | 38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/src/containers/run.c b/src/containers/run.c
index cbcd040d3..ddba08a3c 100644
--- a/src/containers/run.c
+++ b/src/containers/run.c
@@ -710,7 +710,10 @@ bool run_container_validate(const run_container_t *run, const char **reason) {
             *reason = "run start + length overflow";
             return false;
         }
-
+        if (end > (1<<16)) {
+            *reason = "run start + length too large";
+            return false;
+        }
         if (start < last_end) {
             *reason = "run start less than last end";
             return false;
diff --git a/tests/toplevel_unit.c b/tests/toplevel_unit.c
index 532ee0721..50f299967 100644
--- a/tests/toplevel_unit.c
+++ b/tests/toplevel_unit.c
@@ -4595,10 +4595,48 @@ DEFINE_TEST(convert_to_bitset) {
     roaring_bitmap_free(r1);
 }
 
+
+bool deserialization_test(const char *data, size_t size) {
+    // We test that deserialization never fails.
+    roaring_bitmap_t *bitmap =
+        roaring_bitmap_portable_deserialize_safe(data, size);
+    if (bitmap) {
+        // The bitmap may not be usable if it does not follow the specification.
+        // We can validate the bitmap we recovered to make sure it is proper.
+        const char *reason_failure = NULL;
+        if (roaring_bitmap_internal_validate(bitmap, &reason_failure)) {
+            // the bitmap is ok!
+            uint32_t cardinality = roaring_bitmap_get_cardinality(bitmap);
+
+            for (uint32_t i = 100; i < 1000; i++) {
+                if (!roaring_bitmap_contains(bitmap, i)) {
+                    cardinality++;
+                    roaring_bitmap_add(bitmap, i);
+                }
+            }
+
+            uint32_t new_cardinality = roaring_bitmap_get_cardinality(bitmap);
+            if (cardinality != new_cardinality) {
+                return false;
+            }
+        }
+        roaring_bitmap_free(bitmap);
+    }
+    return true;
+}
+
+DEFINE_TEST(robust_deserialization) {
+    assert_true(deserialization_test(NULL, 0));
+    // contains a run container that overflows the 16-bit boundary.
+    const char test1[] = "\x3b\x30\x00\x00\x01\x00\x00\xfa\x2e\x01\x00\x00\x02\xff\xff";
+    assert_true(deserialization_test(test1, sizeof(test1)));
+}
+
 int main() {
     tellmeall();
 
     const struct CMUnitTest tests[] = {
+        cmocka_unit_test(robust_deserialization),
         cmocka_unit_test(issue457),
         cmocka_unit_test(convert_to_bitset),
         cmocka_unit_test(issue440),