diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 000000000..31e0e42d6 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,8 @@ +# Set update schedule for GitHub Actions +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "monthly" + open-pull-requests-limit: 5 diff --git a/.github/workflows/alpine.yml b/.github/workflows/alpine.yml index 95491609c..d01355c78 100644 --- a/.github/workflows/alpine.yml +++ b/.github/workflows/alpine.yml @@ -2,11 +2,13 @@ name: Alpine Linux 'on': - push - pull_request +permissions: + contents: read jobs: ubuntu-build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0 - name: start docker run: | docker run -w /src -dit --name alpine -v $PWD:/src alpine:latest @@ -33,4 +35,4 @@ jobs: ./alpine.sh cmake --build build_for_alpine_debug - name: testdebug run: | - ./alpine.sh bash -c "cd build_for_alpine_debug && ctest" \ No newline at end of file + ./alpine.sh bash -c "cd build_for_alpine_debug && ctest" diff --git a/.github/workflows/cifuzz.yml b/.github/workflows/cifuzz.yml new file mode 100644 index 000000000..9da72b1b9 --- /dev/null +++ b/.github/workflows/cifuzz.yml @@ -0,0 +1,26 @@ +name: CIFuzz +on: [pull_request] +permissions: + contents: read +jobs: + Fuzzing: + runs-on: ubuntu-latest + steps: + - name: Build Fuzzers + id: build + uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@d318097b285bc695f785b98d40c2d058c0f438b5 # master + with: + oss-fuzz-project-name: 'croaring' + dry-run: false + - name: Run Fuzzers + uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@d318097b285bc695f785b98d40c2d058c0f438b5 # master + with: + oss-fuzz-project-name: 'croaring' + fuzz-seconds: 300 + dry-run: false + - name: Upload Crash + uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # v3.1.2 + if: failure() && steps.build.outcome == 'success' + with: + name: artifacts + path: ./out/artifacts diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 000000000..082400aa8 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,44 @@ +name: "CodeQL" + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + schedule: + - cron: "39 2 * * 6" + +permissions: + contents: read + +jobs: + analyze: + name: Analyze + runs-on: ubuntu-latest + permissions: + actions: read + contents: read + security-events: write + + strategy: + fail-fast: false + matrix: + language: [ cpp, python ] + + steps: + - name: Checkout + uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # v3.5.2 + + - name: Initialize CodeQL + uses: github/codeql-action/init@f3feb00acb00f31a6f60280e6ace9ca31d91c76a # v2.3.2 + with: + languages: ${{ matrix.language }} + + - name: Autobuild + uses: github/codeql-action/autobuild@f3feb00acb00f31a6f60280e6ace9ca31d91c76a # v2.3.2 + if: ${{ matrix.language == 'cpp' || matrix.language == 'python' }} + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@f3feb00acb00f31a6f60280e6ace9ca31d91c76a # v2.3.2 + with: + category: "/language:${{ matrix.language }}" diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml new file mode 100644 index 000000000..f88d499ea --- /dev/null +++ b/.github/workflows/documentation.yml @@ -0,0 +1,36 @@ +name: Doxygen GitHub Pages + +on: + push: + branches: + - master + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +permissions: + contents: read + +jobs: + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + permissions: + contents: write + pages: write + id-token: write + steps: + - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # v3.5.2 + - name: Install Doxygen + run: sudo apt-get install doxygen graphviz -y + - run: mkdir docs + - name: Install theme + run: ./tools/prepare_doxygen.sh + - name: Generate Doxygen Documentation + run: doxygen ./doxygen + - name: Deploy to GitHub Pages + uses: peaceiris/actions-gh-pages@373f7f263a76c20808c831209c920827a82a2847 # v3.9.3 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: docs/html diff --git a/.github/workflows/macos-ci.yml b/.github/workflows/macos-ci.yml index d926d172f..c97c5160f 100644 --- a/.github/workflows/macos-ci.yml +++ b/.github/workflows/macos-ci.yml @@ -4,13 +4,15 @@ name: Macos-CI - push - pull_request +permissions: + contents: read jobs: ci: name: macos-llvm runs-on: macos-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0 - name: Build and Test run: | mkdir build @@ -20,6 +22,15 @@ jobs: ctest . --output-on-failure cmake --install . cd ../tests/installation/find && mkdir build && cd build && cmake -DCMAKE_INSTALL_PREFIX:PATH=../../../build/destination .. && cmake --build . + - name: Build and Test (shared) + run: | + cmake -DBUILD_SHARED_LIBS=ON -B buildshared -DCMAKE_INSTALL_PREFIX:PATH=destinationshared + cmake --build buildshared + cmake --install buildshared + cd tests/installation/find + cmake -DCMAKE_INSTALL_PREFIX:PATH=../../../destinationshared -B buildshared + cmake --build buildshared + ./buildshared/repro - name: Build and Test Debug run: | mkdir builddebug diff --git a/.github/workflows/s390x.yml b/.github/workflows/s390x.yml new file mode 100644 index 000000000..c0045a235 --- /dev/null +++ b/.github/workflows/s390x.yml @@ -0,0 +1,33 @@ +name: Ubuntu s390x (GCC 11) + +on: + push: + branches: + - master + pull_request: + branches: + - master + +permissions: + contents: read + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@8e5e7e5ab8b370d6c329ec480221332ada57f0ab # v3.5.2 + - uses: uraimo/run-on-arch-action@a8003307a739516fdd80ee6d3da8924db811b8da # v2.5.0 + name: Test + id: runcmd + with: + arch: s390x + githubToken: ${{ github.token }} + distro: ubuntu_latest + install: | + apt-get update -q -y + apt-get install -y cmake make g++ + run: | + cmake -DCMAKE_BUILD_TYPE=Release -B build + cmake --build build -j=2 + ctest --output-on-failure --test-dir build + diff --git a/.github/workflows/ubuntu-ci.yml b/.github/workflows/ubuntu-ci.yml index 82956cca6..169124f04 100644 --- a/.github/workflows/ubuntu-ci.yml +++ b/.github/workflows/ubuntu-ci.yml @@ -4,6 +4,8 @@ name: Ubuntu-CI - push - pull_request +permissions: + contents: read jobs: ci: @@ -15,7 +17,7 @@ jobs: CXX: g++ steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0 - name: Build and Test run: | mkdir build diff --git a/.github/workflows/ubuntu-debug-sani-ci.yml b/.github/workflows/ubuntu-debug-sani-ci.yml new file mode 100644 index 000000000..cbda35a0c --- /dev/null +++ b/.github/workflows/ubuntu-debug-sani-ci.yml @@ -0,0 +1,27 @@ +name: Ubuntu-Debug-Sanitized-CI + +'on': + - push + - pull_request + +permissions: + contents: read + +jobs: + ci: + name: ubuntu-gcc + runs-on: ubuntu-latest + + env: + CC: gcc + CXX: g++ + + steps: + - uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0 + - name: Build and Test + run: | + mkdir build + cd build + cmake -DCMAKE_BUILD_TYPE=Debug -DROARING_SANITIZE=ON .. + cmake --build . + ctest . --output-on-failure diff --git a/.github/workflows/ubuntu-gcc10-ci.yml b/.github/workflows/ubuntu-gcc10-ci.yml index d617f4b43..f46bfb94f 100644 --- a/.github/workflows/ubuntu-gcc10-ci.yml +++ b/.github/workflows/ubuntu-gcc10-ci.yml @@ -4,6 +4,8 @@ name: Ubuntu-GCC10-CI - push - pull_request +permissions: + contents: read jobs: ci: @@ -13,7 +15,7 @@ jobs: CC: gcc-10 CXX: g++-10 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0 - run: | sudo apt update sudo apt install gcc-10 g++-10 diff --git a/.github/workflows/ubuntu-legacy-ci.yml b/.github/workflows/ubuntu-legacy-ci.yml index fd13acabb..108a49d68 100644 --- a/.github/workflows/ubuntu-legacy-ci.yml +++ b/.github/workflows/ubuntu-legacy-ci.yml @@ -4,6 +4,8 @@ name: Ubuntu-CI - push - pull_request +permissions: + contents: read jobs: ci: @@ -11,7 +13,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0 - name: Build and Test run: | mkdir build diff --git a/.github/workflows/ubuntu-noexcept-ci.yml b/.github/workflows/ubuntu-noexcept-ci.yml index 1b5382000..889d41ed1 100644 --- a/.github/workflows/ubuntu-noexcept-ci.yml +++ b/.github/workflows/ubuntu-noexcept-ci.yml @@ -4,6 +4,8 @@ name: Ubuntu-CI - push - pull_request +permissions: + contents: read jobs: ci: @@ -15,7 +17,7 @@ jobs: CXX: g++ steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0 - name: Build and Test run: | mkdir build diff --git a/.github/workflows/ubuntu-oldclang-18-ci.yml b/.github/workflows/ubuntu-oldclang-18-ci.yml deleted file mode 100644 index 8f6e50dde..000000000 --- a/.github/workflows/ubuntu-oldclang-18-ci.yml +++ /dev/null @@ -1,29 +0,0 @@ -name: Ubuntu-CI (old llvm) - -'on': - - push - - pull_request - - -jobs: - ci: - name: ubuntu-clangold-gcc - runs-on: ubuntu-18.04 - - env: - CC: clang-7 - CXX: clang++-7 - - steps: - - uses: actions/checkout@v2 - - name: install clang 7 - run: | - sudo apt update - sudo apt install clang-7 - - name: Build and Test - run: | - mkdir build - cd build - cmake .. - cmake --build . - ctest . --output-on-failure diff --git a/.github/workflows/ubuntu-sani-ci.yml b/.github/workflows/ubuntu-sani-ci.yml index c955b7e85..c0f8608e3 100644 --- a/.github/workflows/ubuntu-sani-ci.yml +++ b/.github/workflows/ubuntu-sani-ci.yml @@ -4,6 +4,8 @@ name: Ubuntu-Sanitized-CI - push - pull_request +permissions: + contents: read jobs: ci: @@ -15,7 +17,7 @@ jobs: CXX: g++ steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0 - name: Build and Test run: | mkdir build diff --git a/.github/workflows/ubuntu-18-ci.yml b/.github/workflows/ubuntu-sani-thread-ci.yml similarity index 55% rename from .github/workflows/ubuntu-18-ci.yml rename to .github/workflows/ubuntu-sani-thread-ci.yml index 554951fe2..067644aea 100644 --- a/.github/workflows/ubuntu-18-ci.yml +++ b/.github/workflows/ubuntu-sani-thread-ci.yml @@ -1,25 +1,27 @@ -name: Ubuntu-18-CI +name: Ubuntu-Sanitized-CI 'on': - push - pull_request +permissions: + contents: read jobs: ci: name: ubuntu-gcc - runs-on: ubuntu-18.04 + runs-on: ubuntu-latest env: CC: gcc CXX: g++ steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0 - name: Build and Test run: | mkdir build cd build - cmake .. + cmake -DROARING_SANITIZE_THREADS=ON .. cmake --build . ctest . --output-on-failure diff --git a/.github/workflows/vs16-arm-ci.yml b/.github/workflows/vs16-arm-ci.yml index 79017bd76..f68e049af 100644 --- a/.github/workflows/vs16-arm-ci.yml +++ b/.github/workflows/vs16-arm-ci.yml @@ -2,20 +2,27 @@ name: VS16-ARM-CI on: [push, pull_request] +permissions: + contents: read + jobs: ci: name: windows-vs16 - runs-on: windows-latest + runs-on: windows-2019 strategy: fail-fast: false matrix: include: - - {arch: ARM} - - {arch: ARM64} + - {gen: Visual Studio 16 2019, arch: ARM} + - {gen: Visual Studio 16 2019, arch: ARM64} steps: - name: checkout - uses: actions/checkout@v2 - - name: Use cmake + uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0 + - name: Configure run: | - cmake -A ${{ matrix.arch }} -DCMAKE_CROSSCOMPILING=1 -B build && - cmake --build build --verbose + mkdir build + cd build && cmake -G "${{matrix.gen}}" -A ${{matrix.arch}} .. + - name: Build + run: cmake --build build --config Release + - name: Build Debug + run: cmake --build build --config Debug \ No newline at end of file diff --git a/.github/workflows/vs16-ci.yml b/.github/workflows/vs16-ci.yml index de40efc81..5eeb8b6df 100644 --- a/.github/workflows/vs16-ci.yml +++ b/.github/workflows/vs16-ci.yml @@ -2,24 +2,35 @@ name: VS16-CI on: [push, pull_request] +permissions: + contents: read + jobs: ci: name: windows-vs16 - runs-on: windows-latest + runs-on: windows-2019 + strategy: + fail-fast: false + matrix: + include: + - {gen: Visual Studio 16 2019, arch: Win32} + - {gen: Visual Studio 16 2019, arch: x64} steps: - - uses: actions/checkout@v2 - - name: 'Run CMake with VS16' - uses: lukka/run-cmake@v2 - with: - cmakeListsOrSettingsJson: CMakeListsTxtAdvanced - cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt' - buildDirectory: "${{ github.workspace }}/../../_temp/windows" - cmakeBuildType: Release - buildWithCMake: true - cmakeGenerator: VS16Win64 - cmakeAppendedArgs: -DROARING_BUILD_STATIC=ON - buildWithCMakeArgs: --config Release - - - name: 'Run CTest' - run: ctest --verbose - working-directory: "${{ github.workspace }}/../../_temp/windows" + - name: checkout + uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0 + - name: Configure + run: | + mkdir build + cd build && cmake -G "${{matrix.gen}}" -A ${{matrix.arch}} .. + - name: Build + run: cmake --build build --config Release + - name: Run basic tests + run: | + cd build + ctest -C Release --output-on-failure + - name: Build Debug + run: cmake --build build --config Debug + - name: Run basic tests in Debug + run: | + cd build + ctest -C Debug --output-on-failure \ No newline at end of file diff --git a/.github/workflows/vs17-arm-ci.yml b/.github/workflows/vs17-arm-ci.yml index bb0532003..95ff1015c 100644 --- a/.github/workflows/vs17-arm-ci.yml +++ b/.github/workflows/vs17-arm-ci.yml @@ -2,6 +2,9 @@ name: VS17-ARM-CI on: [push, pull_request] +permissions: + contents: read + jobs: ci: name: windows-vs17 @@ -14,8 +17,8 @@ jobs: - {arch: ARM64} steps: - name: checkout - uses: actions/checkout@v2 + uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0 - name: Use cmake run: | cmake -A ${{ matrix.arch }} -DCMAKE_CROSSCOMPILING=1 -B build && - cmake --build build --verbose \ No newline at end of file + cmake --build build --verbose diff --git a/.github/workflows/vs17-ci.yml b/.github/workflows/vs17-ci.yml index cc5b8451b..eaabb130f 100644 --- a/.github/workflows/vs17-ci.yml +++ b/.github/workflows/vs17-ci.yml @@ -2,6 +2,9 @@ name: VS17-CI on: [push, pull_request] +permissions: + contents: read + jobs: ci: name: windows-vs17 @@ -12,9 +15,9 @@ jobs: include: - {gen: Visual Studio 17 2022, arch: Win32} - {gen: Visual Studio 17 2022, arch: x64} - steps: + steps: - name: checkout - uses: actions/checkout@v2 + uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0 - name: Configure run: | mkdir build @@ -24,4 +27,10 @@ jobs: - name: Run basic tests run: | cd build - ctest -C Release --output-on-failure \ No newline at end of file + ctest -C Release --output-on-failure + - name: Build Debug + run: cmake --build build --config Debug + - name: Run basic tests in Debug + run: | + cd build + ctest -C Debug --output-on-failure diff --git a/.github/workflows/vs17-clang-ci.yml b/.github/workflows/vs17-clang-ci.yml index 0f258a4dc..871de7eb7 100644 --- a/.github/workflows/vs17-clang-ci.yml +++ b/.github/workflows/vs17-clang-ci.yml @@ -2,6 +2,9 @@ name: VS17-CLANG-CI on: [push, pull_request] +permissions: + contents: read + jobs: ci: name: windows-vs17 @@ -14,7 +17,7 @@ jobs: - {gen: Visual Studio 17 2022, arch: x64} steps: - name: checkout - uses: actions/checkout@v2 + uses: actions/checkout@ee0669bd1cc54295c223e0bb666b733df41de1c5 # v2.7.0 - name: Configure run: | mkdir build @@ -24,4 +27,10 @@ jobs: - name: Run basic tests run: | cd build - ctest -C Release --output-on-failure \ No newline at end of file + ctest -C Release --output-on-failure + - name: Build Debug + run: cmake --build build --config Debug --parallel + - name: Run basic tests in Debug + run: | + cd build + ctest -C Debug --output-on-failure diff --git a/.gitignore b/.gitignore index 16a15f00d..656ac49c4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ # Downloaded dependencies tests/vendor/cmocka +dependencies # Object files *.o diff --git a/CMakeLists.txt b/CMakeLists.txt index 3d386aeab..a3d505cbb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,7 +5,6 @@ project(RoaringBitmap ) include(GNUInstallDirs) -set(CMAKE_MACOSX_RPATH OFF) if (NOT CMAKE_BUILD_TYPE) message(STATUS "No build type selected, default to Release") set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE) @@ -16,11 +15,11 @@ if(CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_C_COMPILER_VERSION VERSIO message(FATAL_ERROR "${PROJECT_NAME} requires at least apple-clang version 11 to support runtime dispatching.") endif() set(ROARING_LIB_NAME roaring) -set(PROJECT_VERSION_MAJOR 0) -set(PROJECT_VERSION_MINOR 6) -set(PROJECT_VERSION_PATCH 0) -set(ROARING_LIB_VERSION "0.6.0" CACHE STRING "Roaring library version") -set(ROARING_LIB_SOVERSION "4" CACHE STRING "Roaring library soversion") +set(PROJECT_VERSION_MAJOR 2) +set(PROJECT_VERSION_MINOR 0) +set(PROJECT_VERSION_PATCH 1) +set(ROARING_LIB_VERSION "2.0.1" CACHE STRING "Roaring library version") +set(ROARING_LIB_SOVERSION "13" CACHE STRING "Roaring library soversion") option(ROARING_EXCEPTIONS "Enable exception-throwing interface" ON) if(NOT ROARING_EXCEPTIONS) @@ -31,7 +30,7 @@ endif() option(ROARING_DISABLE_X64 "Forcefully disable x64 optimizations even if hardware supports it (this disables AVX)" OFF) option(ROARING_DISABLE_AVX "Forcefully disable AVX even if hardware supports it " OFF) option(ROARING_DISABLE_NEON "Forcefully disable NEON even if hardware supports it" OFF) -option(ROARING_DISABLE_NATIVE "Forcefully disable -march optimizations (obsolete)" OFF) +option(ROARING_DISABLE_AVX512 "Forcefully disable AVX512 even if compiler supports it" OFF) option(ROARING_BUILD_STATIC "Build a static library" ON) if(BUILD_SHARED_LIBS) @@ -44,6 +43,8 @@ option(ROARING_BUILD_LTO "Build library with Link Time Optimization" OFF) option(ROARING_BUILD_C_AS_CPP "Build library C files using C++ compilation" OFF) option(ROARING_BUILD_C_TESTS_AS_CPP "Build test C files using C++ compilation" OFF) option(ROARING_SANITIZE "Sanitize addresses" OFF) +option(ROARING_SANITIZE_THREADS "Sanitize threads" OFF) + option(ENABLE_ROARING_TESTS "If OFF, disable unit tests altogether" ON) set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/tools/cmake") @@ -62,7 +63,7 @@ install(FILES "${CMAKE_CURRENT_BINARY_DIR}/roaring.pc" DESTINATION ${CMAKE_INSTA add_library(roaring-headers INTERFACE) target_include_directories(roaring-headers INTERFACE - $ + $ $) add_library(roaring-headers-cpp INTERFACE) target_include_directories(roaring-headers-cpp INTERFACE @@ -73,11 +74,11 @@ target_include_directories(roaring-headers-cpp INTERFACE ### Some users want the C++ header files to be installed as well. ### C++ header files get installed to /usr/local/include/roaring typically SET(CPP_ROARING_HEADERS cpp/roaring64map.hh cpp/roaring.hh) # needs to be updated if we add more files -install(FILES ${CPP_ROARING_HEADERS} DESTINATION include/${ROARING_LIB_NAME}) -install(DIRECTORY include/${ROARING_LIB_NAME} DESTINATION include) +install(FILES ${CPP_ROARING_HEADERS} DESTINATION include/roaring) +install(DIRECTORY include/roaring DESTINATION include) install(TARGETS roaring-headers roaring-headers-cpp - EXPORT ${ROARING_LIB_NAME}-config + EXPORT roaring-config ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} @@ -98,9 +99,17 @@ configure_file ("${CMAKE_CURRENT_SOURCE_DIR}/tests/config.h.in" add_subdirectory(src) if(ENABLE_ROARING_TESTS) - add_subdirectory(benchmarks) + if(CMAKE_SIZEOF_VOID_P EQUAL 8) # we only include the benchmarks on 64-bit systems. + add_subdirectory(benchmarks) + endif() add_subdirectory(tests) endif() +option(ENABLE_ROARING_MICROBENCHMARKS "Enable microbenchmarks" OFF) +if(ENABLE_ROARING_MICROBENCHMARKS) + add_subdirectory(microbenchmarks) +else() + MESSAGE( STATUS "You may enable microbenchmarks by setting ENABLE_ROARING_MICROBENCHMARKS to ON " ) +endif() # Being terse is good, but knowing how the build is configured is important # and should not be hard to figure out. MESSAGE( STATUS "CMAKE_SYSTEM_PROCESSOR: " ${CMAKE_SYSTEM_PROCESSOR}) diff --git a/LICENSE b/LICENSE index 8f567d348..8b0ad80d7 100644 --- a/LICENSE +++ b/LICENSE @@ -232,4 +232,5 @@ PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR -IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER \ No newline at end of file +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md index 4589a8cbd..4f88b185e 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,11 @@ -# CRoaring [![Build status](https://ci.appveyor.com/api/projects/status/gr4ibsflqs9by1bc/branch/master?svg=true)](https://ci.appveyor.com/project/lemire/croaring/branch/master) [![Build Status](https://cloud.drone.io/api/badges/RoaringBitmap/CRoaring/status.svg)](https://cloud.drone.io/RoaringBitmap/CRoaring) +# CRoaring + +[![Ubuntu-CI](https://github.com/RoaringBitmap/CRoaring/actions/workflows/ubuntu-noexcept-ci.yml/badge.svg)](https://github.com/RoaringBitmap/CRoaring/actions/workflows/ubuntu-noexcept-ci.yml) [![VS17-CI](https://github.com/RoaringBitmap/CRoaring/actions/workflows/vs17-ci.yml/badge.svg)](https://github.com/RoaringBitmap/CRoaring/actions/workflows/vs17-ci.yml) +[![Fuzzing Status](https://oss-fuzz-build-logs.storage.googleapis.com/badges/croaring.svg)](https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=1&q=proj:croaring) + +[![Doxygen Documentation](https://img.shields.io/badge/docs-doxygen-green.svg)](http://roaringbitmap.github.io/CRoaring/) + + Portable Roaring bitmaps in C (and C++) with full support for your favorite compiler (GNU GCC, LLVM's clang, Visual Studio). Included in the [Awesome C](https://github.com/kozross/awesome-c) list of open source C software. @@ -9,7 +16,7 @@ Bitsets, also called bitmaps, are commonly used as fast data structures. Unfortu Roaring bitmaps are compressed bitmaps which tend to outperform conventional compressed bitmaps such as WAH, EWAH or Concise. They are used by several major systems such as [Apache Lucene][lucene] and derivative systems such as [Solr][solr] and -[Elasticsearch][elasticsearch], [Metamarkets' Druid][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas], [Apache Spark][spark], [OpenSearchServer][opensearchserver], [Cloud Torrent][cloudtorrent], [Whoosh][whoosh], [InfluxDB](https://www.influxdata.com), [Pilosa][pilosa], [Bleve](http://www.blevesearch.com), [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin]. The CRoaring library is used in several systems such as [Apache Doris](http://doris.incubator.apache.org). The YouTube SQL Engine, [Google Procella](https://research.google/pubs/pub48388/), uses Roaring bitmaps for indexing. +[Elasticsearch][elasticsearch], [Metamarkets' Druid][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas], [Apache Spark][spark], [OpenSearchServer][opensearchserver], [Cloud Torrent][cloudtorrent], [Whoosh][whoosh], [InfluxDB](https://www.influxdata.com), [Pilosa][pilosa], [Bleve](http://www.blevesearch.com), [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin]. The CRoaring library is used in several systems such as [Apache Doris](http://doris.incubator.apache.org), [StarRocks](https://github.com/StarRocks/starrocks). The YouTube SQL Engine, [Google Procella](https://research.google/pubs/pub48388/), uses Roaring bitmaps for indexing. We published a peer-reviewed article on the design and evaluation of this library: @@ -50,10 +57,15 @@ of the latest hardware. Roaring bitmaps are already available on a variety of pl - Linux, macOS, FreeBSD, Windows (MSYS2 and Microsoft Visual studio). - We test the library with ARM, x64/x86 and POWER processors. We only support little endian systems (big endian systems are vanishingly rare). -- Recent C compiler supporting the C11 standard (GCC 7 or better, LLVM 7.0 or better, Xcode 11 or better), there is also an optional C++ class that requires a C++ compiler supporting the C++11 standard. +- Recent C compiler supporting the C11 standard (GCC 7 or better, LLVM 7.0 or better, Xcode 11 or better, Microsoft Visual Studio 2022 or better, Intel oneAPI Compiler 2023.2 or better), there is also an optional C++ class that requires a C++ compiler supporting the C++11 standard. - CMake (to contribute to the project, users can rely on amalgamation/unity builds if they do not wish to use CMake). - Under x64 systems, the library provides runtime dispatch so that optimized functions are called based on the detected CPU features. It works with GCC, clang (version 9 and up) and Visual Studio (2017 and up). Other systems (e.g., ARM) do not need runtime dispatch. +Hardly anyone has access to an actual big-endian system. Nevertheless, +We support big-endian systems such as IBM s390x through emulators---except for +IO serialization which is only supported on little-endian systems (see [issue 423](https://github.com/RoaringBitmap/CRoaring/issues/423)). + + # Using as a CMake dependency If you like CMake, you can just a few lines in you `CMakeLists.txt` file to grab a `CRoaring` release. [See our demonstration for further details](https://github.com/RoaringBitmap/croaring_cmake_demo_single_file). @@ -168,6 +180,43 @@ The C interface is found in the file ``include/roaring/roaring.h``. We have C++ Some users have to deal with large volumes of data. It may be important for these users to be aware of the `addMany` (C++) `roaring_bitmap_or_many` (C) functions as it is much faster and economical to add values in batches when possible. Furthermore, calling periodically the `runOptimize` (C++) or `roaring_bitmap_run_optimize` (C) functions may help. + +# Running microbenchmarks + +We have microbenchmarks constructed with the Google Benchmarks. +Under Linux or macOS, you may run them as follows: + +``` +cmake -B build +cmake --build build +./build/microbenchmarks/bench +``` + +By default, the benchmark tools picks one data set (e.g., `CRoaring/benchmarks/realdata/census1881`). +We have several data sets and you may pick others: + +``` +./build/microbenchmarks/bench benchmarks/realdata/wikileaks-noquotes +``` + +You may disable some functionality for the purpose of benchmarking. For example, assuming you +have an x64 processor, you could benchmark the code without AVX-512 even if both your processor +and compiler supports it: + +``` +cmake -B buildnoavx512 -D ROARING_DISABLE_AVX512=ON +cmake --build buildnoavx512 +./buildnoavx512/microbenchmarks/bench +``` + +You can benchmark without AVX or AVX-512 as well: + +``` +cmake -B buildnoavx -D ROARING_DISABLE_AVX=ON +cmake --build buildnoavx +./buildnoavx/microbenchmarks/bench +``` + # Custom memory allocators For general users, CRoaring would apply default allocator without extra codes. But global memory hook is also provided for those who want a custom memory allocator. Here is an example: ```C @@ -276,7 +325,15 @@ int main() { uint32_t expectedsize = roaring_bitmap_portable_size_in_bytes(r1); char *serializedbytes = malloc(expectedsize); roaring_bitmap_portable_serialize(r1, serializedbytes); - roaring_bitmap_t *t = roaring_bitmap_portable_deserialize(serializedbytes); + // Note: it is expected that the input follows the specification + // https://github.com/RoaringBitmap/RoaringFormatSpec + // otherwise the result may be unusable. + roaring_bitmap_t *t = roaring_bitmap_portable_deserialize_safe(serializedbytes, expectedsize); + if(t == NULL) { return EXIT_FAILURE; } + const char *reason = NULL; + if (!roaring_bitmap_internal_validate(t, &reason)) { + return EXIT_FAILURE; + } assert(roaring_bitmap_equals(r1, t)); // what we recover is equal roaring_bitmap_free(t); // we can also check whether there is a bitmap at a memory location without @@ -287,6 +344,21 @@ int main() { expectedsize); // sizeofbitmap would be zero if no bitmap were found // we can also read the bitmap "safely" by specifying a byte size limit: t = roaring_bitmap_portable_deserialize_safe(serializedbytes, expectedsize); + if(t == NULL) { + printf("Problem during deserialization.\n"); + // We could clear any memory and close any file here. + return EXIT_FAILURE; + } + // We can validate the bitmap we recovered to make sure it is proper. + const char *reason_failure = NULL; + if (!roaring_bitmap_internal_validate(t, &reason_failure)) { + printf("safely deserialized invalid bitmap: %s\n", reason_failure); + // We could clear any memory and close any file here. + return EXIT_FAILURE; + } + // It is still necessary for the content of seriallizedbytes to follow + // the standard: https://github.com/RoaringBitmap/RoaringFormatSpec + // This is guaranted when calling 'roaring_bitmap_portable_deserialize'. assert(roaring_bitmap_equals(r1, t)); // what we recover is equal roaring_bitmap_free(t); @@ -330,6 +402,77 @@ int main() { } ``` +# Conventional bitsets (C) + +We support convention bitsets (uncompressed) as part of the library. + +Simple example: + +```C +bitset_t * b = bitset_create(); +bitset_set(b,10); +bitset_get(b,10);// returns true +bitset_free(b); // frees memory +``` + +More advanced example: + +```C + bitset_t *b = bitset_create(); + for (int k = 0; k < 1000; ++k) { + bitset_set(b, 3 * k); + } + // We have bitset_count(b) == 1000. + // We have bitset_get(b, 3) is true + // You can iterate through the values: + size_t k = 0; + for (size_t i = 0; bitset_next_set_bit(b, &i); i++) { + // You will have i == k + k += 3; + } + // We support a wide range of operations on two bitsets such as + // bitset_inplace_symmetric_difference(b1,b2); + // bitset_inplace_symmetric_difference(b1,b2); + // bitset_inplace_difference(b1,b2);// should make no difference + // bitset_inplace_union(b1,b2); + // bitset_inplace_intersection(b1,b2); + // bitsets_disjoint + // bitsets_intersect +``` + +In some instances, you may want to convert a Roaring bitmap into a conventional (uncompressed) bitset. +Indeed, bitsets have advantages such as higher query performances in some cases. The following code +illustrates how you may do so: + +```C + roaring_bitmap_t *r1 = roaring_bitmap_create(); + for (uint32_t i = 100; i < 100000; i+= 1 + (i%5)) { + roaring_bitmap_add(r1, i); + } + for (uint32_t i = 100000; i < 500000; i+= 100) { + roaring_bitmap_add(r1, i); + } + roaring_bitmap_add_range(r1, 500000, 600000); + bitset_t * bitset = bitset_create(); + bool success = roaring_bitmap_to_bitset(r1, bitset); + assert(success); // could fail due to memory allocation. + assert(bitset_count(bitset) == roaring_bitmap_get_cardinality(r1)); + // You can then query the bitset: + for (uint32_t i = 100; i < 100000; i+= 1 + (i%5)) { + assert(bitset_get(bitset,i)); + } + for (uint32_t i = 100000; i < 500000; i+= 100) { + assert(bitset_get(bitset,i)); + } + // you must free the memory: + bitset_free(bitset); + roaring_bitmap_free(r1); +``` + +You should be aware that a convention bitset (`bitset_t *`) may use much more +memory than a Roaring bitmap in some cases. You should run benchmarks to determine +whether the conversion to a bitset has performance benefits in your case. + # Example (C++) @@ -372,6 +515,11 @@ int main() { r2.printf(); printf("\n"); + // create a new bitmap with initializer list + Roaring r2i = Roaring::bitmapOfList({1, 2, 3, 5, 6}); + + assert(r2i == r2); + // we can also create a bitmap from a pointer to 32-bit integers const uint32_t values[] = {2, 3, 4}; Roaring r3(3, values); @@ -406,7 +554,10 @@ int main() { uint32_t expectedsize = r1.getSizeInBytes(); char *serializedbytes = new char[expectedsize]; r1.write(serializedbytes); - Roaring t = Roaring::read(serializedbytes); + // readSafe will not overflow, but the resulting bitmap + // is only valid and usable if the input follows the + // Roaring specification: https://github.com/RoaringBitmap/RoaringFormatSpec/ + Roaring t = Roaring::readSafe(serializedbytes, expectedsize); assert(r1 == t); delete[] serializedbytes; @@ -477,14 +628,6 @@ ctest ``` -To run real-data benchmark - -``` -./real_bitmaps_benchmark ../benchmarks/realdata/census1881 -``` -where you must adjust the path "../benchmarks/realdata/census1881" so that it points to one of the directories in the benchmarks/realdata directory. - - To check that your code abides by the style convention (make sure that ``clang-format`` is installed): ``` @@ -517,7 +660,7 @@ To build with at least Visual Studio 2017 directly in the IDE: - For testing, in the Standard toolbar, drop the ``Select Startup Item...`` menu and choose one of the tests. Run the test by pressing the button to the left of the dropdown. -We have optimizations specific to AVX2 in the code, and they are turned dynamically based on the detected hardware at runtime. +We have optimizations specific to AVX2 and AVX-512 in the code, and they are turned dynamically based on the detected hardware at runtime. ## Usage (Using `conan`) @@ -560,14 +703,25 @@ These commands will also print out instructions on how to use the library from M If you find the version of `roaring` shipped with `vcpkg` is out-of-date, feel free to report it to `vcpkg` community either by submiting an issue or by creating a PR. -# AVX2-related throttling +# SIMD-related throttling -Our AVX2 code does not use floating-point numbers or multiplications, so it is not subject to turbo frequency throttling on many-core Intel processors. +Our AVX2 code does not use floating-point numbers or multiplications, so it is not subject to turbo frequency throttling on many-core Intel processors. + +Our AVX-512 code is only enabled on recent hardware (Intel Ice Lake or better and AMD Zen 4) where SIMD-specific frequency throttling is not observed. # Thread safety Like, for example, STL containers or Java's default data structures, the CRoaring library has no built-in thread support. Thus whenever you modify a bitmap in one thread, it is unsafe to query it in others. It is safe however to query bitmaps (without modifying them) from several distinct threads, as long as you do not use the copy-on-write attribute. For example, you can safely copy a bitmap and use both copies in concurrently. One should probably avoid the use of the copy-on-write attribute in a threaded environment. +Some of our users rely on "copy-on-write" (default to disabled). A bitmap with the copy-on-write flag +set to true might generate shared containers. A shared container is just a reference to a single +container with reference counting (we keep track of the number of shallow copies). If you copy shared +containers over several threads, this might be unsafe due to the need to update the counter concurrently. +Thus for shared containers, we use reference counting with an atomic counter. If the library is compiled +as a C library (the default), we use C11 atomics. Unfortunately, Visual Studio does not support C11 +atomics at this times (though this is subject to change). To compensate, we +use Windows-specific code in such instances (`_InterlockedDecrement` `_InterlockedIncrement`). + # How to best aggregate bitmaps? @@ -577,8 +731,8 @@ different strategies. You can use `roaring_bitmap_or_many(bitmapcount, bitmaps)` or `roaring_bitmap_or_many_heap(bitmapcount, bitmaps)` or you may even roll your own aggregation: -``` -roaring_bitmap_t *answer = roaring_bitmap_copy(bitmaps[0]); +```C +roaring_bitmap_t *answer = roaring_bitmap_copy(bitmaps[0]); for (size_t i = 1; i < bitmapcount; i++) { roaring_bitmap_or_inplace(answer, bitmaps[i]); } @@ -599,8 +753,9 @@ later `roaring_bitmap_or_inplace` will be very fast. You should benchmark these alternatives on your own data to decide what is best. -# Python Wrapper +# Wrappers +## Python Tom Cornebize wrote a Python wrapper available at https://github.com/Ezibenroc/PyRoaringBitMap Installing it is as easy as typing... @@ -608,7 +763,7 @@ Installing it is as easy as typing... pip install pyroaring ``` -# JavaScript Wrapper +## JavaScript Salvatore Previti wrote a Node/JavaScript wrapper available at https://github.com/SalvatorePreviti/roaring-node Installing it is as easy as typing... @@ -617,32 +772,35 @@ Installing it is as easy as typing... npm install roaring ``` -# Swift Wrapper +## Swift Jérémie Piotte wrote a [Swift wrapper](https://github.com/RoaringBitmap/SwiftRoaring). -# C# Wrapper +## C# Brandon Smith wrote a C# wrapper available at https://github.com/RogueException/CRoaring.Net (works for Windows and Linux under x64 processors) -# Go (golang) Wrapper +## Go (golang) There is a Go (golang) wrapper available at https://github.com/RoaringBitmap/gocroaring -# Rust Wrapper +## Rust Saulius Grigaliunas wrote a Rust wrapper available at https://github.com/saulius/croaring-rs -# D Wrapper +## D Yuce Tekol wrote a D wrapper available at https://github.com/yuce/droaring -# Redis Module +## Redis Antonio Guilherme Ferreira Viggiano wrote a Redis Module available at https://github.com/aviggiano/redis-roaring +## Zig + +Justin Whear wrote a Zig wrapper available at https://github.com/jwhear/roaring-zig # Mailing list/discussion group @@ -651,10 +809,9 @@ https://groups.google.com/forum/#!forum/roaring-bitmaps # References about Roaring -- Daniel Lemire, Owen Kaser, Nathan Kurz, Luca Deri, Chris O'Hara, François Saint-Jacques, Gregory Ssi-Yan-Kai, Roaring Bitmaps: Implementation of an Optimized Software Library, Software: Practice and Experience (to appear) [arXiv:1709.07821](https://arxiv.org/abs/1709.07821) +- Daniel Lemire, Owen Kaser, Nathan Kurz, Luca Deri, Chris O'Hara, François Saint-Jacques, Gregory Ssi-Yan-Kai, Roaring Bitmaps: Implementation of an Optimized Software Library, Software: Practice and Experience Volume 48, Issue 4 April 2018 Pages 867-895 [arXiv:1709.07821](https://arxiv.org/abs/1709.07821) - Samy Chambi, Daniel Lemire, Owen Kaser, Robert Godin, Better bitmap performance with Roaring bitmaps, -Software: Practice and Experience Volume 46, Issue 5, pages 709–719, May 2016 -http://arxiv.org/abs/1402.6407 This paper used data from http://lemire.me/data/realroaring2014.html -- Daniel Lemire, Gregory Ssi-Yan-Kai, Owen Kaser, Consistently faster and smaller compressed bitmaps with Roaring, Software: Practice and Experience (accepted in 2016, to appear) http://arxiv.org/abs/1603.06549 +Software: Practice and Experience Volume 46, Issue 5, pages 709–719, May 2016 [arXiv:1402.6407](http://arxiv.org/abs/1402.6407) +- Daniel Lemire, Gregory Ssi-Yan-Kai, Owen Kaser, Consistently faster and smaller compressed bitmaps with Roaring, Software: Practice and Experience Volume 46, Issue 11, pages 1547-1569, November 2016 [arXiv:1603.06549](http://arxiv.org/abs/1603.06549) - Samy Chambi, Daniel Lemire, Robert Godin, Kamel Boukhalfa, Charles Allen, Fangjin Yang, Optimizing Druid with Roaring bitmaps, IDEAS 2016, 2016. http://r-libre.teluq.ca/950/ diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 000000000..1d9c45c86 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,9 @@ +# Security Policy + +## Reporting a Vulnerability + +Please use the following contact information for reporting a vulnerability: + +- [Daniel Lemire]( https://www.teluq.ca/siteweb/univ/en/dlemire.html) - daniel@lemire.me + + diff --git a/amalgamation.sh b/amalgamation.sh index 1281384ef..545f1f7ed 100755 --- a/amalgamation.sh +++ b/amalgamation.sh @@ -5,7 +5,11 @@ ######################################################################## SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" -timestamp=$(date) # capture to label files with their generation time +case $SCRIPTPATH in + (*\ *) echo "Path ($SCRIPTPATH) cannot contain whitespace"; exit 1 ;; +esac + +timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ") # capture to label files with their generation time function newline { echo "" @@ -31,6 +35,8 @@ DEMOCPP="amalgamation_demo.cpp" ALL_PUBLIC_H=" $SCRIPTPATH/include/roaring/roaring_version.h $SCRIPTPATH/include/roaring/roaring_types.h +$SCRIPTPATH/include/roaring/portability.h +$SCRIPTPATH/include/roaring/bitset/bitset.h $SCRIPTPATH/include/roaring/roaring.h $SCRIPTPATH/include/roaring/memory.h " @@ -43,12 +49,11 @@ $SCRIPTPATH/cpp/roaring64map.hh " # internal .h files => These are used in the implementation but aren't part of -# the API. They're all embedded at the head of the amalgamated C file, and +# the API. They are all embedded at the head of the amalgamated C file, and # need to be in this order. # ALL_PRIVATE_H=" $SCRIPTPATH/include/roaring/isadetection.h -$SCRIPTPATH/include/roaring/portability.h $SCRIPTPATH/include/roaring/containers/perfparameters.h $SCRIPTPATH/include/roaring/containers/container_defs.h $SCRIPTPATH/include/roaring/array_util.h @@ -67,7 +72,6 @@ $SCRIPTPATH/include/roaring/containers/mixed_union.h $SCRIPTPATH/include/roaring/containers/mixed_xor.h $SCRIPTPATH/include/roaring/containers/containers.h $SCRIPTPATH/include/roaring/roaring_array.h -$SCRIPTPATH/include/roaring/misc/configreport.h " # .c implementation files @@ -76,13 +80,13 @@ $SCRIPTPATH/include/roaring/misc/configreport.h # has the definitions available from all the header files. Since the order of # the top level declarations doesn't matter after that point, the file list is # generated automatically from git-tracked C files in the /src/ directory. +# Sort them so every run uses the same order. # -ALL_PRIVATE_C=$( ( \ +ALL_PRIVATE_C=$( ( ( \ [ -d $SCRIPTPATH/.git ] \ && ( type git >/dev/null 2>&1 ) \ - && ( git ls-files $SCRIPTPATH/src/*.c $SCRIPTPATH/src/**/*c ) \ - ) || ( find $SCRIPTPATH/src -name '*.c' ) ) - + && ( git -C $SCRIPTPATH ls-files 'src/*.c' ) \ + ) || ( find $SCRIPTPATH/src -name '*.c' ) ) | sort ) # Verify up-front that all the files exist # for i in ${ALL_PUBLIC_H} ${ALL_PUBLIC_HH} ${ALL_PRIVATE_H} ${ALL_PRIVATE_C}; do @@ -166,13 +170,21 @@ echo "Creating ${DEMOC}..." cat <<< ' #include +#include #include "roaring.c" int main() { roaring_bitmap_t *r1 = roaring_bitmap_create(); for (uint32_t i = 100; i < 1000; i++) roaring_bitmap_add(r1, i); printf("cardinality = %d\n", (int) roaring_bitmap_get_cardinality(r1)); roaring_bitmap_free(r1); - return 0; + + bitset_t *b = bitset_create(); + for (int k = 0; k < 1000; ++k) { + bitset_set(b, 3 * k); + } + printf("%zu \n", bitset_count(b)); + bitset_free(b); + return EXIT_SUCCESS; } ' } > "${DEMOC}" @@ -242,10 +254,10 @@ CPPBIN=${DEMOCPP%%.*} echo "The interface is found in the file 'include/roaring/roaring.h'." newline echo "For C, try:" -echo "cc -march=native -O3 -std=c11 -o ${CBIN} ${DEMOC} && ./${CBIN} " +echo "cc -O3 -std=c11 -o ${CBIN} ${DEMOC} && ./${CBIN} " newline echo "For C++, try:" -echo "c++ -march=native -O3 -std=c++11 -o ${CPPBIN} ${DEMOCPP} && ./${CPPBIN} " +echo "c++ -O3 -std=c++11 -o ${CPPBIN} ${DEMOCPP} && ./${CPPBIN} " lowercase(){ echo "$1" | tr 'A-Z' 'a-z' @@ -257,8 +269,8 @@ newline echo "You can build a shared library with the following command:" if [ $OS == "darwin" ]; then - echo "cc -march=native -O3 -std=c11 -shared -o libroaring.dylib -fPIC roaring.c" + echo "cc -O3 -std=c11 -shared -o libroaring.dylib -fPIC roaring.c" else - echo "cc -march=native -O3 -std=c11 -shared -o libroaring.so -fPIC roaring.c" + echo "cc -O3 -std=c11 -shared -o libroaring.so -fPIC roaring.c" fi diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 0d5fabcaa..71be77ee7 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -12,6 +12,8 @@ if(NOT WIN32) add_c_benchmark(intersect_range_benchmark) target_link_libraries(add_benchmark m) add_c_benchmark(frozen_benchmark) + add_c_benchmark(containsmulti_benchmark) + add_cpp_benchmark(fastunion_benchmark) endif() add_c_benchmark(bitset_container_benchmark) add_c_benchmark(array_container_benchmark) diff --git a/benchmarks/add_benchmark.c b/benchmarks/add_benchmark.c index cc143e3dd..574b285c8 100644 --- a/benchmarks/add_benchmark.c +++ b/benchmarks/add_benchmark.c @@ -96,15 +96,33 @@ void run_test(uint32_t spanlen, uint32_t intvlen, double density, order_t order) printf(" %6.1f\n", array_min(results, num_passes)); printf(" roaring_bitmap_add_many():"); + for (int p = 0; p < num_passes; p++) { + roaring_bitmap_t *r = roaring_bitmap_create(); + uint32_t values[intvlen * count]; + for (int64_t i = 0; i < count; i++) { + for (uint32_t j = 0; j < intvlen; j++) { + values[i * intvlen + j] = offsets[i] + j; + } + } + RDTSC_START(cycles_start); + for (int64_t i = 0; i < count; i++) { + roaring_bitmap_add_many(r, intvlen, values + (i * intvlen)); + } + RDTSC_FINAL(cycles_final); + results[p] = (cycles_final - cycles_start) * 1.0 / count / intvlen; + roaring_bitmap_free(r); + } + printf(" %6.1f\n", array_min(results, num_passes)); + + printf(" roaring_bitmap_add_bulk():"); for (int p = 0; p < num_passes; p++) { roaring_bitmap_t *r = roaring_bitmap_create(); RDTSC_START(cycles_start); - uint32_t values[intvlen]; + roaring_bulk_context_t context = {0}; for (int64_t i = 0; i < count; i++) { for (uint32_t j = 0; j < intvlen; j++) { - values[j] = offsets[i] + j; + roaring_bitmap_add_bulk(r, &context, offsets[i] + j); } - roaring_bitmap_add_many(r, intvlen, values); } RDTSC_FINAL(cycles_final); results[p] = (cycles_final - cycles_start) * 1.0 / count / intvlen; diff --git a/benchmarks/array_container_benchmark.c b/benchmarks/array_container_benchmark.c index a76844ac9..fc6a2f196 100644 --- a/benchmarks/array_container_benchmark.c +++ b/benchmarks/array_container_benchmark.c @@ -23,7 +23,7 @@ void array_cache_flush(array_container_t* B) { (void)B; } // tries to put the array in cache void array_cache_prefetch(array_container_t* B) { #if !CROARING_REGULAR_VISUAL_STUDIO -#ifdef CROARING_IS_X64 +#if CROARING_IS_X64 const int32_t CACHELINESIZE = computecacheline(); // 64 bytes per cache line #else @@ -137,8 +137,8 @@ int main() { printf("intersection cardinality = %d \n", answer); BEST_TIME(intersection_test(B1, B2, BO), answer, repeat, answer); printf("==intersection and union test 2 \n"); - array_container_clear(B1); - array_container_clear(B2); + B1->cardinality = 0; + B2->cardinality = 0; for (int x = 0; x < 1 << 16; x += 16) { array_container_add(B1, (uint16_t)x); } diff --git a/benchmarks/benchmark.h b/benchmarks/benchmark.h index fee613fd9..e3a6ad166 100644 --- a/benchmarks/benchmark.h +++ b/benchmarks/benchmark.h @@ -37,69 +37,39 @@ (cycles) = ((uint64_t)cyc_high << 32) | cyc_low; \ } while (0) -#elif defined(__linux__) && defined(__GLIBC__) - -#include -#ifdef CLOCK_THREAD_CPUTIME_ID -#define RDTSC_START(cycles) \ - do { \ - struct timespec ts; \ - clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts); \ - cycles = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; \ - } while (0) - -#define RDTSC_FINAL(cycles) \ - do { \ - struct timespec ts; \ - clock_gettime(CLOCK_REALTIME, &ts); \ - cycles = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; \ - } while (0) - -#elif defined(CLOCK_REALTIME) // #ifdef CLOCK_THREAD_CPUTIME_ID -#define RDTSC_START(cycles) \ - do { \ - struct timespec ts; \ - clock_gettime(CLOCK_REALTIME, &ts); \ - cycles = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; \ - } while (0) - -#define RDTSC_FINAL(cycles) \ - do { \ - struct timespec ts; \ - clock_gettime(CLOCK_REALTIME, &ts); \ - cycles = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; \ - } while (0) - -#else -#define RDTSC_START(cycles) \ - do { \ - cycles = clock(); \ - } while(0) +#else // defined(CROARING_IS_X64) && defined(ROARING_INLINE_ASM) + +#if defined(CLOCK_THREAD_CPUTIME_ID) +#define RDTSC_CLOCK_ID CLOCK_THREAD_CPUTIME_ID +#elif defined(CLOCK_MONOTONIC) +#define RDTSC_CLOCK_ID CLOCK_MONOTONIC +#elif defined(CLOCK_REALTIME) +#define RDTSC_CLOCK_ID CLOCK_REALTIME +#endif -#define RDTSC_FINAL(cycles) \ - do { \ - cycles = clock(); \ - } while(0) +#if defined(RDTSC_CLOCK_ID) +#define RDTSC_START(cycles) \ + do { \ + struct timespec ts; \ + clock_gettime(RDTSC_CLOCK_ID, &ts); \ + cycles = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; \ + } while (0) -#endif // #ifdef CLOCK_THREAD_CPUTIME_ID +#define RDTSC_FINAL(cycles) RDTSC_START(cycles) -#else +#else // defined(RDTSC_CLOCK_ID) /** -* Other architectures do not support rdtsc ? +* Fall back to the `clock` function */ -#include - #define RDTSC_START(cycles) \ do { \ cycles = clock(); \ } while (0) -#define RDTSC_FINAL(cycles) \ - do { \ - cycles = clock(); \ - } while (0) +#define RDTSC_FINAL(cycles) RDTSC_START(cycles) +#endif #endif /* diff --git a/benchmarks/bitset_container_benchmark.c b/benchmarks/bitset_container_benchmark.c index 3d418ee2c..932398a97 100644 --- a/benchmarks/bitset_container_benchmark.c +++ b/benchmarks/bitset_container_benchmark.c @@ -29,7 +29,7 @@ void bitset_cache_flush(bitset_container_t* B) { (void)B; } // tries to put array of words in cache void bitset_cache_prefetch(bitset_container_t* B) { #if !CROARING_REGULAR_VISUAL_STUDIO -#ifdef CROARING_IS_X64 +#if CROARING_IS_X64 const int32_t CACHELINESIZE = computecacheline(); // 64 bytes per cache line #else @@ -69,7 +69,7 @@ int set_test(bitset_container_t* B) { int unset_test(bitset_container_t* B) { int x; for (x = 0; x < 1 << 16; x += 3) { - bitset_container_unset(B, (uint16_t)x); + bitset_container_remove(B, (uint16_t)x); } return 0; } diff --git a/benchmarks/containsmulti_benchmark.c b/benchmarks/containsmulti_benchmark.c new file mode 100644 index 000000000..e92d82e5a --- /dev/null +++ b/benchmarks/containsmulti_benchmark.c @@ -0,0 +1,121 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include "benchmark.h" +#include "random.h" +#include "numbersfromtextfiles.h" + +void contains_multi_via_contains(roaring_bitmap_t* bm, const uint32_t* values, bool* results, const size_t count) { + for (size_t i = 0; i < count; ++i) { + results[i] = roaring_bitmap_contains(bm, values[i]); + } +} + +void contains_multi_bulk(roaring_bitmap_t* bm, const uint32_t* values, bool* results, const size_t count) { + roaring_bulk_context_t context = {0}; + for (size_t i = 0; i < count; ++i) { + results[i] = roaring_bitmap_contains_bulk(bm, &context, values[i]); + } +} + +int compare_uint32(const void* a, const void* b) { + uint32_t arg1 = *(const uint32_t*)a; + uint32_t arg2 = *(const uint32_t*)b; + if (arg1 < arg2) return -1; + if (arg1 > arg2) return 1; + return 0; +} + +int main(int argc, char* argv[]) { + (void)&read_all_integer_files; // suppress unused warning + + if (argc < 2) { + printf("Usage: %s ...\n", argv[0]); + printf("Example: %s ~/CRoaring/benchmarks/realdata/weather_sept_85/*\n", argv[0]); + return 1; + } + + size_t fields = argc-1; + uint32_t* values[argc]; + size_t count[argc]; + + roaring_bitmap_t* bm = roaring_bitmap_create(); + for (int i = 1; i < argc; i++) { + size_t t_count = 0; + uint32_t* t_values = read_integer_file(argv[i], &t_count); + if (t_count == 0) { + printf("No integers found in %s\n", argv[i]); + return 1; + } + roaring_bitmap_add_many(bm, t_count, t_values); + + shuffle_uint32(t_values, t_count); + + values[i-1] = t_values; + count[i-1] = t_count; + } + //roaring_bitmap_run_optimize(bm); + + printf("Data:\n"); + printf(" cardinality: %"PRIu64"\n", roaring_bitmap_get_cardinality(bm)); + printf(" buckets: %d\n", (int)bm->high_low_container.size); + printf(" range: %"PRIu32"-%"PRIu32"\n", roaring_bitmap_minimum(bm) >> 16, roaring_bitmap_maximum(bm) >> 16); + + const int num_passes = 10; + printf("Cycles/element: %d\n", num_passes); + uint64_t cycles_start, cycles_final; + + printf(" roaring_bitmap_contains:"); + for (int p = 0; p < num_passes; p++) { + bool result[count[p]]; + RDTSC_START(cycles_start); + contains_multi_via_contains(bm, values[p], result, count[p]); + RDTSC_FINAL(cycles_final); + printf(" %10f", (cycles_final - cycles_start) * 1.0 / count[p]); + } + printf("\n"); + + printf(" roaring_bitmap_contains_bulk:"); + for (int p = 0; p < num_passes; p++) { + bool result[count[p]]; + RDTSC_START(cycles_start); + contains_multi_bulk(bm, values[p], result, count[p]); + RDTSC_FINAL(cycles_final); + printf(" %10f", (cycles_final - cycles_start) * 1.0 / count[p]); + } + printf("\n"); + + // sort input array + for (size_t i = 0; i < fields; ++i) { + qsort(values[i], count[i], sizeof(uint32_t), compare_uint32); + } + + printf(" roaring_bitmap_contains with sorted input:"); + for (int p = 0; p < num_passes; p++) { + bool result[count[p]]; + RDTSC_START(cycles_start); + contains_multi_via_contains(bm, values[p], result, count[p]); + RDTSC_FINAL(cycles_final); + printf(" %10f", (cycles_final - cycles_start) * 1.0 / count[p]); + } + printf("\n"); + + printf(" roaring_bitmap_contains_bulk with sorted input:"); + for (int p = 0; p < num_passes; p++) { + bool result[count[p]]; + RDTSC_START(cycles_start); + contains_multi_bulk(bm, values[p], result, count[p]); + RDTSC_FINAL(cycles_final); + printf(" %10f", (cycles_final - cycles_start) * 1.0 / count[p]); + } + printf("\n"); + + roaring_bitmap_free(bm); + for (size_t i = 0; i < fields; ++i) { + free(values[i]); + } + return 0; +} diff --git a/benchmarks/fastunion_benchmark.cpp b/benchmarks/fastunion_benchmark.cpp new file mode 100644 index 000000000..ad693f6b8 --- /dev/null +++ b/benchmarks/fastunion_benchmark.cpp @@ -0,0 +1,101 @@ +#include +#include +#include +#include "roaring64map.hh" +#include "benchmark.h" + +using roaring::Roaring64Map; + +namespace { +const uint32_t num_iterations = 10; + +const uint32_t num_bitmaps = 100; +const uint32_t num_outer_slots = 1000; +const uint32_t num_inner_values = 2000; + +/** + * Creates the input maps for the test. This method creates 'num_bitmaps' maps, + * each of which contains 'num_outer_slots' 32-bit Roarings, each of which + * contains 'num_inner_values' bits. The inner bits are separated by + * 'num_bitmaps' and their starting offset is offset by 1 from one bitmap to the + * next. The intent is that in the result of the union, all the bits in a given + * 32 bit Roaring slot will end up densely packed together, which seemed like an + * interesting thing to do. + */ +std::vector makeMaps() { + std::vector result; + for (uint32_t bm_index = 0; bm_index != num_bitmaps; ++bm_index) { + Roaring64Map roaring; + + for (uint32_t slot = 0; slot != num_outer_slots; ++slot) { + auto value = (uint64_t(slot) << 32) + bm_index + 0x98765432; + for (uint32_t inner_index = 0; inner_index != num_inner_values; + ++inner_index) { + roaring.add(value); + value += num_bitmaps; + } + } + result.push_back(std::move(roaring)); + } + return result; +} + +Roaring64Map legacy_fastunion(size_t n, const Roaring64Map **inputs) { + Roaring64Map ans; + // not particularly fast + for (size_t lcv = 0; lcv < n; ++lcv) { + ans |= *(inputs[lcv]); + } + return ans; +} + +void benchmarkLegacyFastUnion() { + std::cout << "*** Legacy fastunion ***\n"; + auto maps = makeMaps(); + + // Need pointers to the above + std::vector result_ptrs; + for (auto &map : maps) { + result_ptrs.push_back(&map); + } + + for (uint32_t iter = 0; iter < num_iterations; ++iter) { + uint64_t cycles_start, cycles_final; + RDTSC_START(cycles_start); + auto result = legacy_fastunion(result_ptrs.size(), result_ptrs.data()); + RDTSC_FINAL(cycles_final); + + auto num_cycles = cycles_final - cycles_start; + uint64_t cycles_per_map = num_cycles / maps.size(); + std::cout << "Iteration " << iter << ": " << cycles_per_map << " per map\n"; + } +} + +void benchmarkNewFastUnion() { + std::cout << "*** New fastunion() ***\n"; + auto maps = makeMaps(); + + // Need pointers to the above + std::vector result_ptrs; + for (auto &map : maps) { + result_ptrs.push_back(&map); + } + + for (uint32_t iter = 0; iter < num_iterations; ++iter) { + uint64_t cycles_start, cycles_final; + RDTSC_START(cycles_start); + auto result = + Roaring64Map::fastunion(result_ptrs.size(), result_ptrs.data()); + RDTSC_FINAL(cycles_final); + + auto num_cycles = cycles_final - cycles_start; + uint64_t cycles_per_map = num_cycles / maps.size(); + std::cout << "Iteration " << iter << ": " << cycles_per_map << " per map\n"; + } +} +} // namespace + +int main() { + benchmarkLegacyFastUnion(); + benchmarkNewFastUnion(); +} diff --git a/benchmarks/real_bitmaps_benchmark.c b/benchmarks/real_bitmaps_benchmark.c index 87840a926..7d8c4847e 100644 --- a/benchmarks/real_bitmaps_benchmark.c +++ b/benchmarks/real_bitmaps_benchmark.c @@ -174,6 +174,46 @@ int main(int argc, char **argv) { " cycles\n", count, total_count, cycles_final - cycles_start); + uint64_t portable_cycle_count = 0, portable_frozen_cycle_count = 0, + frozen_cycle_count = 0; + for(int i = 0; i < (int)count; i++) { + int size = roaring_bitmap_portable_size_in_bytes(bitmaps[i]); + char *buf = malloc(size); + roaring_bitmap_portable_serialize(bitmaps[i], buf); + + int frozen_size = roaring_bitmap_frozen_size_in_bytes(bitmaps[i]); + char *frozen_buf = roaring_aligned_malloc(32, frozen_size); + roaring_bitmap_frozen_serialize(bitmaps[i], frozen_buf); + + RDTSC_START(cycles_start); + roaring_bitmap_t *r1 = roaring_bitmap_portable_deserialize(buf); + RDTSC_FINAL(cycles_final); + portable_cycle_count += cycles_final - cycles_start; + + RDTSC_START(cycles_start); + roaring_bitmap_t *r2 = roaring_bitmap_portable_deserialize_frozen(buf); + RDTSC_FINAL(cycles_final); + portable_frozen_cycle_count += cycles_final - cycles_start; + + RDTSC_START(cycles_start); + const roaring_bitmap_t *r3 = roaring_bitmap_frozen_view(frozen_buf, frozen_size); + RDTSC_FINAL(cycles_final); + frozen_cycle_count += cycles_final - cycles_start; + + roaring_bitmap_free(r1); + roaring_bitmap_free(r2); + roaring_bitmap_free(r3); + free(buf); + roaring_aligned_free(frozen_buf); + } + + printf("Deserializing %zu bitmaps took %" PRIu64 " cycles for portable format\n", + count, portable_cycle_count); + printf("Deserializing %zu bitmaps took %" PRIu64 " cycles for portable frozen format\n", + count, portable_frozen_cycle_count); + printf("Deserializing %zu bitmaps took %" PRIu64 " cycles for frozen format\n", + count, frozen_cycle_count); + for (int i = 0; i < (int)count; ++i) { free(numbers[i]); numbers[i] = NULL; // paranoid diff --git a/benchmarks/run_container_benchmark.c b/benchmarks/run_container_benchmark.c index b04170257..e2fdc495a 100644 --- a/benchmarks/run_container_benchmark.c +++ b/benchmarks/run_container_benchmark.c @@ -23,7 +23,7 @@ void run_cache_flush(run_container_t* B) { (void)B; } // tries to put array in cache void run_cache_prefetch(run_container_t* B) { #if !CROARING_REGULAR_VISUAL_STUDIO -#ifdef CROARING_IS_X64 +#if CROARING_IS_X64 const int32_t CACHELINESIZE = computecacheline(); // 64 bytes per cache line #else @@ -141,8 +141,8 @@ int main() { printf("intersection cardinality = %d \n", answer); BEST_TIME(intersection_test(B1, B2, BO), answer, repeat, answer); printf("==intersection and union test 2 \n"); - run_container_clear(B1); - run_container_clear(B2); + B1->n_runs = 0; + B2->n_runs = 0; for (int x = 0; x < (1 << 16); x += 64) { int length = x % 11; for (int y = 0; y < length; ++y) diff --git a/cmake/import.cmake b/cmake/import.cmake new file mode 100644 index 000000000..a9b6ffe5d --- /dev/null +++ b/cmake/import.cmake @@ -0,0 +1,52 @@ +set(dep_root "${PROJEC_SOURCE_DIR}/dependencies/.cache") +if(DEFINED ENV{roaring_DEPENDENCY_CACHE_DIR}) + set(dep_root "$ENV{roaring_DEPENDENCY_CACHE_DIR}") +endif() + +function(import_dependency NAME GITHUB_REPO COMMIT) + message(STATUS "Importing ${NAME} (${GITHUB_REPO}@${COMMIT})") + set(target "${dep_root}/${NAME}") + + # If the folder exists in the cache, then we assume that everything is as + # should be and do nothing + if(EXISTS "${target}") + set("${NAME}_SOURCE_DIR" "${target}" PARENT_SCOPE) + return() + endif() + + set(zip_url "https://github.com/${GITHUB_REPO}/archive/${COMMIT}.zip") + set(archive "${dep_root}/archive.zip") + set(dest "${dep_root}/_extract") + + file(DOWNLOAD "${zip_url}" "${archive}") + file(MAKE_DIRECTORY "${dest}") + execute_process( + WORKING_DIRECTORY "${dest}" + COMMAND "${CMAKE_COMMAND}" -E tar xf "${archive}") + file(REMOVE "${archive}") + + # GitHub archives only ever have one folder component at the root, so this + # will always match that single folder + file(GLOB dir LIST_DIRECTORIES YES "${dest}/*") + + file(RENAME "${dir}" "${target}") + + set("${NAME}_SOURCE_DIR" "${target}" PARENT_SCOPE) +endfunction() + +# Delegates to the dependency +macro(add_dependency NAME) + if(NOT DEFINED "${NAME}_SOURCE_DIR") + message(FATAL_ERROR "Missing ${NAME}_SOURCE_DIR variable") + endif() + + add_subdirectory("${${NAME}_SOURCE_DIR}" "${PROJECT_BINARY_DIR}/_deps/${NAME}" EXCLUDE_FROM_ALL) +endmacro() + +function(set_off NAME) + set("${NAME}" OFF CACHE INTERNAL "") +endfunction() + +function(set_on NAME) + set("${NAME}" ON CACHE INTERNAL "") +endfunction() \ No newline at end of file diff --git a/cpp/roaring.hh b/cpp/roaring.hh index 8ee9e7b30..f14150baf 100644 --- a/cpp/roaring.hh +++ b/cpp/roaring.hh @@ -7,6 +7,7 @@ A C++ header for Roaring Bitmaps. #include #include +#include #include #include #include @@ -41,6 +42,28 @@ namespace roaring { class RoaringSetBitForwardIterator; +/** + * A bit of context usable with `*Bulk()` functions. + * + * A context may only be used with a single bitmap, and any modification to a bitmap + * (other than modifications performed with `Bulk()` functions with the context + * passed) will invalidate any contexts associated with that bitmap. + */ +class BulkContext { + public: + friend class Roaring; + using roaring_bitmap_bulk_context_t = api::roaring_bulk_context_t; + BulkContext() : context_{nullptr, 0, 0, 0} {} + + BulkContext(const BulkContext&) = delete; + BulkContext& operator=(const BulkContext&) = delete; + BulkContext(BulkContext&&) noexcept = default; + BulkContext& operator=(BulkContext&&) noexcept = default; + + private: + roaring_bitmap_bulk_context_t context_; +}; + class Roaring { typedef api::roaring_bitmap_t roaring_bitmap_t; // class-local name alias @@ -55,14 +78,22 @@ public: } /** - * Construct a bitmap from a list of integer values. + * Construct a bitmap from a list of 32-bit integer values. */ Roaring(size_t n, const uint32_t *data) : Roaring() { api::roaring_bitmap_add_many(&roaring, n, data); } /** - * Copy constructor + * Construct a bitmap from an initializer list. + */ + Roaring(std::initializer_list l) : Roaring() { + addMany(l.size(), l.begin()); + } + + /** + * Copy constructor. + * It may throw std::runtime_error if there is insufficient memory. */ Roaring(const Roaring &r) : Roaring() { if (!api::roaring_bitmap_overwrite(&roaring, &r.roaring)) { @@ -74,8 +105,8 @@ public: } /** - * Move constructor. The moved object remains valid, i.e. - * all methods can still be called on it. + * Move constructor. The moved-from object remains valid but empty, i.e. + * it behaves as though it was just freshly constructed. */ Roaring(Roaring &&r) noexcept : roaring(r.roaring) { // @@ -99,7 +130,7 @@ public: } /** - * Construct a bitmap from a list of integer values. + * Construct a bitmap from a list of uint32_t values. */ static Roaring bitmapOf(size_t n, ...) { Roaring ans; @@ -112,69 +143,124 @@ public: return ans; } + /** + * Construct a bitmap from a list of uint32_t values. + * E.g., bitmapOfList({1,2,3}). + */ + static Roaring bitmapOfList(std::initializer_list l) { + Roaring ans; + ans.addMany(l.size(), l.begin()); + return ans; + } + /** * Add value x */ - void add(uint32_t x) { api::roaring_bitmap_add(&roaring, x); } + void add(uint32_t x) noexcept { api::roaring_bitmap_add(&roaring, x); } /** * Add value x * Returns true if a new value was added, false if the value was already * existing. */ - bool addChecked(uint32_t x) { + bool addChecked(uint32_t x) noexcept { return api::roaring_bitmap_add_checked(&roaring, x); } /** - * Add all values from x (included) to y (excluded) + * Add all values in range [min, max) */ - void addRange(const uint64_t x, const uint64_t y) { - return api::roaring_bitmap_add_range(&roaring, x, y); + void addRange(const uint64_t min, const uint64_t max) noexcept { + return api::roaring_bitmap_add_range(&roaring, min, max); + } + + /** + * Add all values in range [min, max] + */ + void addRangeClosed(const uint32_t min, const uint32_t max) noexcept { + return api::roaring_bitmap_add_range_closed(&roaring, min, max); } /** * Add value n_args from pointer vals */ - void addMany(size_t n_args, const uint32_t *vals) { + void addMany(size_t n_args, const uint32_t *vals) noexcept { api::roaring_bitmap_add_many(&roaring, n_args, vals); } + /** + * Add value val, using context from a previous insert for speed + * optimization. + * + * `context` will be used to store information between calls to make bulk + * operations faster. `context` should be default-initialized before the + * first call to this function. + */ + void addBulk(BulkContext &context, uint32_t x) noexcept { + api::roaring_bitmap_add_bulk(&roaring, &context.context_, x); + } + + /** + * Check if item x is present, using context from a previous insert or search + * for speed optimization. + * + * `context` will be used to store information between calls to make bulk + * operations faster. `context` should be default-initialized before the + * first call to this function. + */ + bool containsBulk(BulkContext& context, uint32_t x) const noexcept { + return api::roaring_bitmap_contains_bulk(&roaring, &context.context_, x); + } + /** * Remove value x */ - void remove(uint32_t x) { api::roaring_bitmap_remove(&roaring, x); } + void remove(uint32_t x) noexcept { api::roaring_bitmap_remove(&roaring, x); } /** * Remove value x * Returns true if a new value was removed, false if the value was not * existing. */ - bool removeChecked(uint32_t x) { + bool removeChecked(uint32_t x) noexcept { return api::roaring_bitmap_remove_checked(&roaring, x); } + /** + * Remove all values in range [min, max) + */ + void removeRange(uint64_t min, uint64_t max) noexcept { + return api::roaring_bitmap_remove_range(&roaring, min, max); + } + + /** + * Remove all values in range [min, max] + */ + void removeRangeClosed(uint32_t min, uint32_t max) noexcept { + return api::roaring_bitmap_remove_range_closed(&roaring, min, max); + } + /** * Return the largest value (if not empty) */ - uint32_t maximum() const { return api::roaring_bitmap_maximum(&roaring); } + uint32_t maximum() const noexcept { return api::roaring_bitmap_maximum(&roaring); } /** * Return the smallest value (if not empty) */ - uint32_t minimum() const { return api::roaring_bitmap_minimum(&roaring); } + uint32_t minimum() const noexcept { return api::roaring_bitmap_minimum(&roaring); } /** * Check if value x is present */ - bool contains(uint32_t x) const { + bool contains(uint32_t x) const noexcept { return api::roaring_bitmap_contains(&roaring, x); } /** * Check if all values from x (included) to y (excluded) are present */ - bool containsRange(const uint64_t x, const uint64_t y) const { + bool containsRange(const uint64_t x, const uint64_t y) const noexcept { return api::roaring_bitmap_contains_range(&roaring, x, y); } @@ -202,6 +288,7 @@ public: /** * Copies the content of the provided bitmap, and * discard the current content. + * It may throw std::runtime_error if there is insufficient memory. */ Roaring &operator=(const Roaring &r) { if (!api::roaring_bitmap_overwrite(&roaring, &r.roaring)) { @@ -228,12 +315,24 @@ public: return *this; } + /** + * Assignment from an initializer list. + */ + Roaring &operator=(std::initializer_list l) { + // Delegate to move assignment operator + *this = Roaring(l); + return *this; + } + /** * Compute the intersection between the current bitmap and the provided * bitmap, writing the result in the current bitmap. The provided bitmap * is not modified. + * + * Performance hint: if you are computing the intersection between several + * bitmaps, two-by-two, it is best to start with the smallest bitmap. */ - Roaring &operator&=(const Roaring &r) { + Roaring &operator&=(const Roaring &r) noexcept { api::roaring_bitmap_and_inplace(&roaring, &r.roaring); return *this; } @@ -243,7 +342,7 @@ public: * bitmap, writing the result in the current bitmap. The provided bitmap * is not modified. */ - Roaring &operator-=(const Roaring &r) { + Roaring &operator-=(const Roaring &r) noexcept { api::roaring_bitmap_andnot_inplace(&roaring, &r.roaring); return *this; } @@ -255,7 +354,7 @@ public: * * See also the fastunion function to aggregate many bitmaps more quickly. */ - Roaring &operator|=(const Roaring &r) { + Roaring &operator|=(const Roaring &r) noexcept { api::roaring_bitmap_or_inplace(&roaring, &r.roaring); return *this; } @@ -265,7 +364,7 @@ public: * bitmap, writing the result in the current bitmap. The provided bitmap * is not modified. */ - Roaring &operator^=(const Roaring &r) { + Roaring &operator^=(const Roaring &r) noexcept { api::roaring_bitmap_xor_inplace(&roaring, &r.roaring); return *this; } @@ -273,31 +372,31 @@ public: /** * Exchange the content of this bitmap with another. */ - void swap(Roaring &r) { std::swap(r.roaring, roaring); } + void swap(Roaring &r) noexcept { std::swap(r.roaring, roaring); } /** * Get the cardinality of the bitmap (number of elements). */ - uint64_t cardinality() const { + uint64_t cardinality() const noexcept { return api::roaring_bitmap_get_cardinality(&roaring); } /** * Returns true if the bitmap is empty (cardinality is zero). */ - bool isEmpty() const { return api::roaring_bitmap_is_empty(&roaring); } + bool isEmpty() const noexcept { return api::roaring_bitmap_is_empty(&roaring); } /** * Returns true if the bitmap is subset of the other. */ - bool isSubset(const Roaring &r) const { + bool isSubset(const Roaring &r) const noexcept { return api::roaring_bitmap_is_subset(&roaring, &r.roaring); } /** * Returns true if the bitmap is strict subset of the other. */ - bool isStrictSubset(const Roaring &r) const { + bool isStrictSubset(const Roaring &r) const noexcept { return api::roaring_bitmap_is_strict_subset(&roaring, &r.roaring); } @@ -306,37 +405,45 @@ public: * responsible to ensure that there is enough memory allocated * (e.g., ans = new uint32[mybitmap.cardinality()];) */ - void toUint32Array(uint32_t *ans) const { + void toUint32Array(uint32_t *ans) const noexcept { api::roaring_bitmap_to_uint32_array(&roaring, ans); } /** * To int array with pagination */ - void rangeUint32Array(uint32_t *ans, size_t offset, size_t limit) const { + void rangeUint32Array(uint32_t *ans, size_t offset, size_t limit) const noexcept { api::roaring_bitmap_range_uint32_array(&roaring, offset, limit, ans); } /** * Return true if the two bitmaps contain the same elements. */ - bool operator==(const Roaring &r) const { + bool operator==(const Roaring &r) const noexcept { return api::roaring_bitmap_equals(&roaring, &r.roaring); } /** - * Compute the negation of the roaring bitmap within a specified interval. - * interval: [range_start, range_end). - * Areas outside the range are passed through unchanged. + * Compute the negation of the roaring bitmap within the half-open interval + * [range_start, range_end). Areas outside the interval are unchanged. */ - void flip(uint64_t range_start, uint64_t range_end) { + void flip(uint64_t range_start, uint64_t range_end) noexcept { api::roaring_bitmap_flip_inplace(&roaring, range_start, range_end); } + /** + * Compute the negation of the roaring bitmap within the closed interval + * [range_start, range_end]. Areas outside the interval are unchanged. + */ + void flipClosed(uint32_t range_start, uint32_t range_end) noexcept { + api::roaring_bitmap_flip_inplace( + &roaring, range_start, uint64_t(range_end) + 1); + } + /** * Remove run-length encoding even when it is more space efficient. * Return whether a change was applied. */ - bool removeRunCompression() { + bool removeRunCompression() noexcept { return api::roaring_bitmap_remove_run_compression(&roaring); } @@ -346,13 +453,13 @@ public: * Returns true if the result has at least one run container. Additional * savings might be possible by calling shrinkToFit(). */ - bool runOptimize() { return api::roaring_bitmap_run_optimize(&roaring); } + bool runOptimize() noexcept { return api::roaring_bitmap_run_optimize(&roaring); } /** * If needed, reallocate memory to shrink the memory usage. Returns * the number of bytes saved. */ - size_t shrinkToFit() { return api::roaring_bitmap_shrink_to_fit(&roaring); } + size_t shrinkToFit() noexcept { return api::roaring_bitmap_shrink_to_fit(&roaring); } /** * Iterate over the bitmap elements. The function iterator is called once @@ -375,21 +482,21 @@ public: * this function returns true and sets element to the element of given rank. * Otherwise, it returns false. */ - bool select(uint32_t rnk, uint32_t *element) const { + bool select(uint32_t rnk, uint32_t *element) const noexcept { return api::roaring_bitmap_select(&roaring, rnk, element); } /** * Computes the size of the intersection between two bitmaps. */ - uint64_t and_cardinality(const Roaring &r) const { + uint64_t and_cardinality(const Roaring &r) const noexcept { return api::roaring_bitmap_and_cardinality(&roaring, &r.roaring); } /** * Check whether the two bitmaps intersect. */ - bool intersect(const Roaring &r) const { + bool intersect(const Roaring &r) const noexcept { return api::roaring_bitmap_intersect(&roaring, &r.roaring); } @@ -400,21 +507,21 @@ public: * * The Jaccard index is undefined if both bitmaps are empty. */ - double jaccard_index(const Roaring &r) const { + double jaccard_index(const Roaring &r) const noexcept { return api::roaring_bitmap_jaccard_index(&roaring, &r.roaring); } /** * Computes the size of the union between two bitmaps. */ - uint64_t or_cardinality(const Roaring &r) const { + uint64_t or_cardinality(const Roaring &r) const noexcept { return api::roaring_bitmap_or_cardinality(&roaring, &r.roaring); } /** * Computes the size of the difference (andnot) between two bitmaps. */ - uint64_t andnot_cardinality(const Roaring &r) const { + uint64_t andnot_cardinality(const Roaring &r) const noexcept { return api::roaring_bitmap_andnot_cardinality(&roaring, &r.roaring); } @@ -422,7 +529,7 @@ public: * Computes the size of the symmetric difference (andnot) between two * bitmaps. */ - uint64_t xor_cardinality(const Roaring &r) const { + uint64_t xor_cardinality(const Roaring &r) const noexcept { return api::roaring_bitmap_xor_cardinality(&roaring, &r.roaring); } @@ -434,10 +541,21 @@ public: * 1 when ranking the smallest value, but the select function returns the * smallest value when using index 0. */ - uint64_t rank(uint32_t x) const { + uint64_t rank(uint32_t x) const noexcept { return api::roaring_bitmap_rank(&roaring, x); } + /** + * Returns the index of x in the set, index start from 0. + * If the set doesn't contain x , this function will return -1. + * The difference with rank function is that this function will return -1 + * when x isn't in the set, but the rank function will return a + * non-negative number. + */ + int64_t getIndex(uint32_t x) const noexcept { + return api::roaring_bitmap_get_index(&roaring, x); + } + /** * Write a bitmap to a char buffer. This is meant to be compatible with * the Java and Go versions. Returns how many bytes were written which @@ -477,11 +595,12 @@ public: * } // namespace serialization * } // namespace boost */ - size_t write(char *buf, bool portable = true) const { - if (portable) + size_t write(char *buf, bool portable = true) const noexcept { + if (portable) { return api::roaring_bitmap_portable_serialize(&roaring, buf); - else + } else { return api::roaring_bitmap_serialize(&roaring, buf); + } } /** @@ -494,6 +613,11 @@ public: * * This function is unsafe in the sense that if you provide bad data, * many, many bytes could be read. See also readSafe. + * + * The function may throw std::runtime_error if a bitmap could not be read. Not that even + * if it does not throw, the bitmap could still be unusable if the loaded + * data does not match the portable Roaring specification: you should + * ensure that the data you load come from a serialized bitmap. */ static Roaring read(const char *buf, bool portable = true) { roaring_bitmap_t * r = portable @@ -508,7 +632,23 @@ public: /** * Read a bitmap from a serialized version, reading no more than maxbytes * bytes. This is meant to be compatible with the Java and Go versions. + * The function itself is safe in the sense that it will not cause buffer overflows. + * However, for correct operations, it is assumed that the bitmap read was once + * serialized from a valid bitmap. If you provided an incorrect input (garbage), then the + * bitmap read may not be in a valid state and following operations may not lead + * to sensible results. It is your responsability to ensure that the input bytes + * follow the format specification if you want a usable bitmap: + * https://github.com/RoaringBitmap/RoaringFormatSpec + * In particular, the serialized array containers need to be in sorted order, and the + * run containers should be in sorted non-overlapping order. This is is guaranteed to + * happen when serializing an existing bitmap, but not for random inputs. + * Note that this function assumes that your bitmap was serialized in *portable* mode + * (which is the default with the 'write' method). * + * The function may throw std::runtime_error if a bitmap could not be read. Not that even + * if it does not throw, the bitmap could still be unusable if the loaded + * data does not match the portable Roaring specification: you should + * ensure that the data you load come from a serialized bitmap. */ static Roaring readSafe(const char *buf, size_t maxbytes) { roaring_bitmap_t * r = @@ -527,13 +667,18 @@ public: * can save space compared to the portable format (e.g., for very * sparse bitmaps). */ - size_t getSizeInBytes(bool portable = true) const { - if (portable) + size_t getSizeInBytes(bool portable = true) const noexcept { + if (portable) { return api::roaring_bitmap_portable_size_in_bytes(&roaring); - else + } else { return api::roaring_bitmap_size_in_bytes(&roaring); + } } + /** + * For advanced users. + * This function may throw std::runtime_error. + */ static const Roaring frozenView(const char *buf, size_t length) { const roaring_bitmap_t *s = api::roaring_bitmap_frozen_view(buf, length); @@ -545,17 +690,29 @@ public: return r; } - void writeFrozen(char *buf) const { + /** + * For advanced users. + */ + void writeFrozen(char *buf) const noexcept { roaring_bitmap_frozen_serialize(&roaring, buf); } - size_t getFrozenSizeInBytes() const { + /** + * For advanced users. + */ + size_t getFrozenSizeInBytes() const noexcept { return roaring_bitmap_frozen_size_in_bytes(&roaring); } /** * Computes the intersection between two bitmaps and returns new bitmap. * The current bitmap and the provided bitmap are unchanged. + * + * Performance hint: if you are computing the intersection between several + * bitmaps, two-by-two, it is best to start with the smallest bitmap. + * Consider also using the operator &= to avoid needlessly creating + * many temporary bitmaps. + * This function may throw std::runtime_error. */ Roaring operator&(const Roaring &o) const { roaring_bitmap_t *r = api::roaring_bitmap_and(&roaring, &o.roaring); @@ -568,6 +725,7 @@ public: /** * Computes the difference between two bitmaps and returns new bitmap. * The current bitmap and the provided bitmap are unchanged. + * This function may throw std::runtime_error. */ Roaring operator-(const Roaring &o) const { roaring_bitmap_t *r = api::roaring_bitmap_andnot(&roaring, &o.roaring); @@ -580,6 +738,7 @@ public: /** * Computes the union between two bitmaps and returns new bitmap. * The current bitmap and the provided bitmap are unchanged. + * This function may throw std::runtime_error. */ Roaring operator|(const Roaring &o) const { roaring_bitmap_t *r = api::roaring_bitmap_or(&roaring, &o.roaring); @@ -592,6 +751,7 @@ public: /** * Computes the symmetric union between two bitmaps and returns new bitmap. * The current bitmap and the provided bitmap are unchanged. + * This function may throw std::runtime_error. */ Roaring operator^(const Roaring &o) const { roaring_bitmap_t *r = api::roaring_bitmap_xor(&roaring, &o.roaring); @@ -604,19 +764,19 @@ public: /** * Whether or not we apply copy and write. */ - void setCopyOnWrite(bool val) { + void setCopyOnWrite(bool val) noexcept { api::roaring_bitmap_set_copy_on_write(&roaring, val); } /** * Print the content of the bitmap */ - void printf() const { api::roaring_bitmap_printf(&roaring); } + void printf() const noexcept { api::roaring_bitmap_printf(&roaring); } /** * Print the content of the bitmap into a string */ - std::string toString() const { + std::string toString() const noexcept { struct iter_data { std::string str{}; // The empty constructor silences warnings from pedantic static analyzers. char first_char = '{'; @@ -641,13 +801,14 @@ public: /** * Whether or not copy and write is active. */ - bool getCopyOnWrite() const { + bool getCopyOnWrite() const noexcept { return api::roaring_bitmap_get_copy_on_write(&roaring); } /** * Computes the logical or (union) between "n" bitmaps (referenced by a * pointer). + * This function may throw std::runtime_error. */ static Roaring fastunion(size_t n, const Roaring **inputs) { const roaring_bitmap_t **x = diff --git a/cpp/roaring64map.hh b/cpp/roaring64map.hh index 6ec9ccdff..6caea14a7 100644 --- a/cpp/roaring64map.hh +++ b/cpp/roaring64map.hh @@ -1,27 +1,36 @@ -/* -A C++ header for 64-bit Roaring Bitmaps, implemented by way of a map of many -32-bit Roaring Bitmaps. +/** + * A C++ header for 64-bit Roaring Bitmaps, + * implemented by way of a map of many + * 32-bit Roaring Bitmaps. + * + * Reference (format specification) : + * https://github.com/RoaringBitmap/RoaringFormatSpec#extention-for-64-bit-implementations */ #ifndef INCLUDE_ROARING_64_MAP_HH_ #define INCLUDE_ROARING_64_MAP_HH_ #include +#include // PRIu64 macro #include // for va_list handling in bitmapOf() #include // for std::printf() in the printf() method #include // for std::memcpy() +#include +#include #include #include #include #include +#include #include #include #include #include "roaring.hh" -using roaring::Roaring; namespace roaring { +using roaring::Roaring; + class Roaring64MapSetBitForwardIterator; class Roaring64MapSetBitBiDirectionalIterator; @@ -44,6 +53,13 @@ public: */ Roaring64Map(size_t n, const uint64_t *data) { addMany(n, data); } + /** + * Construct a bitmap from an initializer list. + */ + Roaring64Map(std::initializer_list l) { + addMany(l.size(), l.begin()); + } + /** * Construct a 64-bit map from a 32-bit one */ @@ -75,10 +91,19 @@ public: /** * Move assignment operator. */ - Roaring64Map &operator=(Roaring64Map &&r) noexcept = default; + Roaring64Map &operator=(Roaring64Map &&r) noexcept = default; /** - * Construct a bitmap from a list of integer values. + * Assignment from an initializer list. + */ + Roaring64Map &operator=(std::initializer_list l) { + // Delegate to move assignment operator + *this = Roaring64Map(l); + return *this; + } + + /** + * Construct a bitmap from a list of uint64_t values. */ static Roaring64Map bitmapOf(size_t n...) { Roaring64Map ans; @@ -92,74 +117,328 @@ public: } /** - * Add value x + * Construct a bitmap from a list of uint64_t values. + * E.g., bitmapOfList({1,2,3}). + */ + static Roaring64Map bitmapOfList(std::initializer_list l) { + Roaring64Map ans; + ans.addMany(l.size(), l.begin()); + return ans; + } + + /** + * Adds value x. */ void add(uint32_t x) { - roarings[0].add(x); - roarings[0].setCopyOnWrite(copyOnWrite); + lookupOrCreateInner(0).add(x); } + + /** + * Adds value x. + */ void add(uint64_t x) { - roarings[highBytes(x)].add(lowBytes(x)); - roarings[highBytes(x)].setCopyOnWrite(copyOnWrite); + lookupOrCreateInner(highBytes(x)).add(lowBytes(x)); } /** - * Add value x - * Returns true if a new value was added, false if the value was already existing. + * Adds value x. + * Returns true if a new value was added, false if the value was already + * present. */ bool addChecked(uint32_t x) { - bool result = roarings[0].addChecked(x); - roarings[0].setCopyOnWrite(copyOnWrite); - return result; + return lookupOrCreateInner(0).addChecked(x); } + + /** + * Adds value x. + * Returns true if a new value was added, false if the value was already + * present. + */ bool addChecked(uint64_t x) { - bool result = roarings[highBytes(x)].addChecked(lowBytes(x)); - roarings[highBytes(x)].setCopyOnWrite(copyOnWrite); - return result; + return lookupOrCreateInner(highBytes(x)).addChecked(lowBytes(x)); } /** - * Add value n_args from pointer vals + * Adds all values in the half-open interval [min, max). + */ + void addRange(uint64_t min, uint64_t max) { + if (min >= max) { + return; + } + addRangeClosed(min, max - 1); + } + + /** + * Adds all values in the closed interval [min, max]. + */ + void addRangeClosed(uint32_t min, uint32_t max) { + lookupOrCreateInner(0).addRangeClosed(min, max); + } + + /** + * Adds all values in the closed interval [min, max] + */ + void addRangeClosed(uint64_t min, uint64_t max) { + if (min > max) { + return; + } + uint32_t start_high = highBytes(min); + uint32_t start_low = lowBytes(min); + uint32_t end_high = highBytes(max); + uint32_t end_low = lowBytes(max); + + // We put std::numeric_limits<>::max in parentheses to avoid a + // clash with the Windows.h header under Windows. + const uint32_t uint32_max = (std::numeric_limits::max)(); + + // Fill in any nonexistent slots with empty Roarings. This simplifies + // the logic below, allowing it to simply iterate over the map between + // 'start_high' and 'end_high' in a linear fashion. + auto current_iter = ensureRangePopulated(start_high, end_high); + + // If start and end land on the same inner bitmap, then we can do the + // whole operation in one call. + if (start_high == end_high) { + auto &bitmap = current_iter->second; + bitmap.addRangeClosed(start_low, end_low); + return; + } + + // Because start and end don't land on the same inner bitmap, + // we need to do this in multiple steps: + // 1. Partially fill the first bitmap with values from the closed + // interval [start_low, uint32_max] + // 2. Fill intermediate bitmaps completely: [0, uint32_max] + // 3. Partially fill the last bitmap with values from the closed + // interval [0, end_low] + auto num_intermediate_bitmaps = end_high - start_high - 1; + + // Step 1: Partially fill the first bitmap. + { + auto &bitmap = current_iter->second; + bitmap.addRangeClosed(start_low, uint32_max); + ++current_iter; + } + + // Step 2. Fill intermediate bitmaps completely. + if (num_intermediate_bitmaps != 0) { + auto &first_intermediate = current_iter->second; + first_intermediate.addRangeClosed(0, uint32_max); + ++current_iter; + + // Now make (num_intermediate_bitmaps - 1) copies of this. + for (uint32_t i = 1; i != num_intermediate_bitmaps; ++i) { + auto &next_intermediate = current_iter->second; + next_intermediate = first_intermediate; + ++current_iter; + } + } + + // Step 3: Partially fill the last bitmap. + auto &bitmap = current_iter->second; + bitmap.addRangeClosed(0, end_low); + } + + /** + * Adds 'n_args' values from the contiguous memory range starting at 'vals'. */ void addMany(size_t n_args, const uint32_t *vals) { - Roaring &roaring = roarings[0]; - roaring.addMany(n_args, vals); - roaring.setCopyOnWrite(copyOnWrite); + lookupOrCreateInner(0).addMany(n_args, vals); } + /** + * Adds 'n_args' values from the contiguous memory range starting at 'vals'. + */ void addMany(size_t n_args, const uint64_t *vals) { + // Potentially reduce outer map lookups by optimistically + // assuming that adjacent values will belong to the same inner bitmap. + Roaring *last_inner_bitmap = nullptr; + uint32_t last_value_high = 0; for (size_t lcv = 0; lcv < n_args; lcv++) { - roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv])); - roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite); + auto value = vals[lcv]; + auto value_high = highBytes(value); + auto value_low = lowBytes(value); + if (last_inner_bitmap == nullptr || value_high != last_value_high) { + last_inner_bitmap = &lookupOrCreateInner(value_high); + last_value_high = value_high; + } + last_inner_bitmap->add(value_low); } } /** - * Remove value x + * Removes value x. + */ + void remove(uint32_t x) { + auto iter = roarings.begin(); + // Since x is a uint32_t, highbytes(x) == 0. The inner bitmap we are + // looking for, if it exists, will be at the first slot of 'roarings'. + if (iter == roarings.end() || iter->first != 0) { + return; + } + auto &bitmap = iter->second; + bitmap.remove(x); + eraseIfEmpty(iter); + } + + /** + * Removes value x. */ - void remove(uint32_t x) { roarings[0].remove(x); } void remove(uint64_t x) { - auto roaring_iter = roarings.find(highBytes(x)); - if (roaring_iter != roarings.cend()) - roaring_iter->second.remove(lowBytes(x)); + auto iter = roarings.find(highBytes(x)); + if (iter == roarings.end()) { + return; + } + auto &bitmap = iter->second; + bitmap.remove(lowBytes(x)); + eraseIfEmpty(iter); } /** - * Remove value x - * Returns true if a new value was removed, false if the value was not existing. + * Removes value x + * Returns true if a new value was removed, false if the value was not + * present. */ bool removeChecked(uint32_t x) { - return roarings[0].removeChecked(x); + auto iter = roarings.begin(); + // Since x is a uint32_t, highbytes(x) == 0. The inner bitmap we are + // looking for, if it exists, will be at the first slot of 'roarings'. + if (iter == roarings.end() || iter->first != 0) { + return false; + } + auto &bitmap = iter->second; + if (!bitmap.removeChecked(x)) { + return false; + } + eraseIfEmpty(iter); + return true; } + + /** + * Remove value x + * Returns true if a new value was removed, false if the value was not + * present. + */ bool removeChecked(uint64_t x) { - auto roaring_iter = roarings.find(highBytes(x)); - if (roaring_iter != roarings.cend()) - return roaring_iter->second.removeChecked(lowBytes(x)); - return false; + auto iter = roarings.find(highBytes(x)); + if (iter == roarings.end()) { + return false; + } + auto &bitmap = iter->second; + if (!bitmap.removeChecked(lowBytes(x))) { + return false; + } + eraseIfEmpty(iter); + return true; + } + + /** + * Removes all values in the half-open interval [min, max). + */ + void removeRange(uint64_t min, uint64_t max) { + if (min >= max) { + return; + } + return removeRangeClosed(min, max - 1); } /** - * Clear the bitmap + * Removes all values in the closed interval [min, max]. + */ + void removeRangeClosed(uint32_t min, uint32_t max) { + auto iter = roarings.begin(); + // Since min and max are uint32_t, highbytes(min or max) == 0. The inner + // bitmap we are looking for, if it exists, will be at the first slot of + // 'roarings'. + if (iter == roarings.end() || iter->first != 0) { + return; + } + auto &bitmap = iter->second; + bitmap.removeRangeClosed(min, max); + eraseIfEmpty(iter); + } + + /** + * Removes all values in the closed interval [min, max]. + */ + void removeRangeClosed(uint64_t min, uint64_t max) { + if (min > max) { + return; + } + uint32_t start_high = highBytes(min); + uint32_t start_low = lowBytes(min); + uint32_t end_high = highBytes(max); + uint32_t end_low = lowBytes(max); + + // We put std::numeric_limits<>::max in parentheses to avoid a + // clash with the Windows.h header under Windows. + const uint32_t uint32_max = (std::numeric_limits::max)(); + + // If the outer map is empty, end_high is less than the first key, + // or start_high is greater than the last key, then exit now because + // there is no work to do. + if (roarings.empty() || end_high < roarings.cbegin()->first || + start_high > (roarings.crbegin())->first) { + return; + } + + // If we get here, start_iter points to the first entry in the outer map + // with key >= start_high. Such an entry is known to exist (i.e. the + // iterator will not be equal to end()) because start_high <= the last + // key in the map (thanks to the above if statement). + auto start_iter = roarings.lower_bound(start_high); + // end_iter points to the first entry in the outer map with + // key >= end_high, if such a key exists. Otherwise, it equals end(). + auto end_iter = roarings.lower_bound(end_high); + + // Note that the 'lower_bound' method will find the start and end slots, + // if they exist; otherwise it will find the next-higher slots. + // In the case where 'start' landed on an existing slot, we need to do a + // partial erase of that slot, and likewise for 'end'. But all the slots + // in between can be fully erased. More precisely: + // + // 1. If the start point falls on an existing entry, there are two + // subcases: + // a. if the end point falls on that same entry, remove the closed + // interval [start_low, end_low] from that entry and we are done. + // b. Otherwise, remove the closed interval [start_low, uint32_max] + // from that entry, advance start_iter, and fall through to step 2. + // 2. Completely erase all slots in the half-open interval + // [start_iter, end_iter) + // 3. If the end point falls on an existing entry, remove the closed + // interval [0, end_high] from it. + + // Step 1. If the start point falls on an existing entry... + if (start_iter->first == start_high) { + auto &start_inner = start_iter->second; + // 1a. if the end point falls on that same entry... + if (start_iter == end_iter) { + start_inner.removeRangeClosed(start_low, end_low); + eraseIfEmpty(start_iter); + return; + } + + // 1b. Otherwise, remove the closed range [start_low, uint32_max]... + start_inner.removeRangeClosed(start_low, uint32_max); + // Advance start_iter, but keep the old value so we can check the + // bitmap we just modified for emptiness and erase if it necessary. + auto temp = start_iter++; + eraseIfEmpty(temp); + } + + // 2. Completely erase all slots in the half-open interval... + roarings.erase(start_iter, end_iter); + + // 3. If the end point falls on an existing entry... + if (end_iter != roarings.end() && end_iter->first == end_high) { + auto &end_inner = end_iter->second; + end_inner.removeRangeClosed(0, end_low); + eraseIfEmpty(end_iter); + } + } + + /** + * Clears the bitmap. */ void clear() { roarings.clear(); @@ -210,16 +489,61 @@ public: } /** - * Compute the intersection between the current bitmap and the provided - * bitmap, writing the result in the current bitmap. The provided bitmap - * is not modified. + * Compute the intersection of the current bitmap and the provided bitmap, + * writing the result in the current bitmap. The provided bitmap is not + * modified. + * + * Performance hint: if you are computing the intersection between several + * bitmaps, two-by-two, it is best to start with the smallest bitmap. */ - Roaring64Map &operator&=(const Roaring64Map &r) { - for (auto &map_entry : roarings) { - if (r.roarings.count(map_entry.first) == 1) - map_entry.second &= r.roarings.at(map_entry.first); - else - map_entry.second = Roaring(); + Roaring64Map &operator&=(const Roaring64Map &other) { + if (this == &other) { + // ANDing *this with itself is a no-op. + return *this; + } + + // Logic table summarizing what to do when a given outer key is + // present vs. absent from self and other. + // + // self other (self & other) work to do + // -------------------------------------------- + // absent absent empty None + // absent present empty None + // present absent empty Erase self + // present present empty or not Intersect self with other, but + // erase self if result is empty. + // + // Because there is only work to do when a key is present in 'self', the + // main for loop iterates over entries in 'self'. + + decltype(roarings.begin()) self_next; + for (auto self_iter = roarings.begin(); self_iter != roarings.end(); + self_iter = self_next) { + // Do the 'next' operation now, so we don't have to worry about + // invalidation of self_iter down below with the 'erase' operation. + self_next = std::next(self_iter); + + auto self_key = self_iter->first; + auto &self_bitmap = self_iter->second; + + auto other_iter = other.roarings.find(self_key); + if (other_iter == other.roarings.end()) { + // 'other' doesn't have self_key. In the logic table above, + // this reflects the case (self.present & other.absent). + // So, erase self. + roarings.erase(self_iter); + continue; + } + + // Both sides have self_key. In the logic table above, this reflects + // the case (self.present & other.present). So, intersect self with + // other. + const auto &other_bitmap = other_iter->second; + self_bitmap &= other_bitmap; + if (self_bitmap.isEmpty()) { + // ...but if intersection is empty, remove it altogether. + roarings.erase(self_iter); + } } return *this; } @@ -229,44 +553,177 @@ public: * bitmap, writing the result in the current bitmap. The provided bitmap * is not modified. */ - Roaring64Map &operator-=(const Roaring64Map &r) { - for (auto &map_entry : roarings) { - if (r.roarings.count(map_entry.first) == 1) - map_entry.second -= r.roarings.at(map_entry.first); + Roaring64Map &operator-=(const Roaring64Map &other) { + if (this == &other) { + // Subtracting *this from itself results in the empty map. + roarings.clear(); + return *this; + } + + // Logic table summarizing what to do when a given outer key is + // present vs. absent from self and other. + // + // self other (self - other) work to do + // -------------------------------------------- + // absent absent empty None + // absent present empty None + // present absent unchanged None + // present present empty or not Subtract other from self, but + // erase self if result is empty + // + // Because there is only work to do when a key is present in both 'self' + // and 'other', the main while loop ping-pongs back and forth until it + // finds the next key that is the same on both sides. + + auto self_iter = roarings.begin(); + auto other_iter = other.roarings.cbegin(); + + while (self_iter != roarings.end() && + other_iter != other.roarings.cend()) { + auto self_key = self_iter->first; + auto other_key = other_iter->first; + if (self_key < other_key) { + // Because self_key is < other_key, advance self_iter to the + // first point where self_key >= other_key (or end). + self_iter = roarings.lower_bound(other_key); + continue; + } + + if (self_key > other_key) { + // Because self_key is > other_key, advance other_iter to the + // first point where other_key >= self_key (or end). + other_iter = other.roarings.lower_bound(self_key); + continue; + } + + // Both sides have self_key. In the logic table above, this reflects + // the case (self.present & other.present). So subtract other from + // self. + auto &self_bitmap = self_iter->second; + const auto &other_bitmap = other_iter->second; + self_bitmap -= other_bitmap; + + if (self_bitmap.isEmpty()) { + // ...but if subtraction is empty, remove it altogether. + self_iter = roarings.erase(self_iter); + } else { + ++self_iter; + } + ++other_iter; } return *this; } /** - * Compute the union between the current bitmap and the provided bitmap, + * Compute the union of the current bitmap and the provided bitmap, * writing the result in the current bitmap. The provided bitmap is not * modified. * * See also the fastunion function to aggregate many bitmaps more quickly. */ - Roaring64Map &operator|=(const Roaring64Map &r) { - for (const auto &map_entry : r.roarings) { - if (roarings.count(map_entry.first) == 0) { - roarings[map_entry.first] = map_entry.second; - roarings[map_entry.first].setCopyOnWrite(copyOnWrite); - } else - roarings[map_entry.first] |= map_entry.second; + Roaring64Map &operator|=(const Roaring64Map &other) { + if (this == &other) { + // ORing *this with itself is a no-op. + return *this; + } + + // Logic table summarizing what to do when a given outer key is + // present vs. absent from self and other. + // + // self other (self | other) work to do + // -------------------------------------------- + // absent absent empty None + // absent present not empty Copy other to self and set flags + // present absent unchanged None + // present present not empty self |= other + // + // Because there is only work to do when a key is present in 'other', + // the main for loop iterates over entries in 'other'. + + for (const auto &other_entry : other.roarings) { + const auto &other_bitmap = other_entry.second; + + // Try to insert other_bitmap into self at other_key. We take + // advantage of the fact that std::map::insert will not overwrite an + // existing entry. + auto insert_result = roarings.insert(other_entry); + auto self_iter = insert_result.first; + auto insert_happened = insert_result.second; + auto &self_bitmap = self_iter->second; + + if (insert_happened) { + // Key was not present in self, so insert was performed above. + // In the logic table above, this reflects the case + // (self.absent | other.present). Because the copy has already + // happened, thanks to the 'insert' operation above, we just + // need to set the copyOnWrite flag. + self_bitmap.setCopyOnWrite(copyOnWrite); + continue; + } + + // Both sides have self_key, and the insert was not performed. In + // the logic table above, this reflects the case + // (self.present & other.present). So OR other into self. + self_bitmap |= other_bitmap; } return *this; } /** - * Compute the symmetric union between the current bitmap and the provided - * bitmap, writing the result in the current bitmap. The provided bitmap - * is not modified. + * Compute the XOR of the current bitmap and the provided bitmap, writing + * the result in the current bitmap. The provided bitmap is not modified. */ - Roaring64Map &operator^=(const Roaring64Map &r) { - for (const auto &map_entry : r.roarings) { - if (roarings.count(map_entry.first) == 0) { - roarings[map_entry.first] = map_entry.second; - roarings[map_entry.first].setCopyOnWrite(copyOnWrite); - } else - roarings[map_entry.first] ^= map_entry.second; + Roaring64Map &operator^=(const Roaring64Map &other) { + if (this == &other) { + // XORing *this with itself results in the empty map. + roarings.clear(); + return *this; + } + + // Logic table summarizing what to do when a given outer key is + // present vs. absent from self and other. + // + // self other (self ^ other) work to do + // -------------------------------------------- + // absent absent empty None + // absent present non-empty Copy other to self and set flags + // present absent unchanged None + // present present empty or not XOR other into self, but erase self + // if result is empty. + // + // Because there is only work to do when a key is present in 'other', + // the main for loop iterates over entries in 'other'. + + for (const auto &other_entry : other.roarings) { + const auto &other_bitmap = other_entry.second; + + // Try to insert other_bitmap into self at other_key. We take + // advantage of the fact that std::map::insert will not overwrite an + // existing entry. + auto insert_result = roarings.insert(other_entry); + auto self_iter = insert_result.first; + auto insert_happened = insert_result.second; + auto &self_bitmap = self_iter->second; + + if (insert_happened) { + // Key was not present in self, so insert was performed above. + // In the logic table above, this reflects the case + // (self.absent ^ other.present). Because the copy has already + // happened, thanks to the 'insert' operation above, we just + // need to set the copyOnWrite flag. + self_bitmap.setCopyOnWrite(copyOnWrite); + continue; + } + + // Both sides have self_key, and the insert was not performed. In + // the logic table above, this reflects the case + // (self.present ^ other.present). So XOR other into self. + self_bitmap ^= other_bitmap; + + if (self_bitmap.isEmpty()) { + // ...but if intersection is empty, remove it altogether. + roarings.erase(self_iter); + } } return *this; } @@ -338,6 +795,9 @@ public: */ bool isSubset(const Roaring64Map &r) const { for (const auto &map_entry : roarings) { + if (map_entry.second.isEmpty()) { + continue; + } auto roaring_iter = r.roarings.find(map_entry.first); if (roaring_iter == r.roarings.cend()) return false; @@ -420,36 +880,98 @@ public: } /** - * Compute the negation of the roaring bitmap within a specified interval. - * areas outside the range are passed through unchanged. + * Computes the negation of the roaring bitmap within the half-open interval + * [min, max). Areas outside the interval are unchanged. + */ + void flip(uint64_t min, uint64_t max) { + if (min >= max) { + return; + } + flipClosed(min, max - 1); + } + + /** + * Computes the negation of the roaring bitmap within the closed interval + * [min, max]. Areas outside the interval are unchanged. */ - void flip(uint64_t range_start, uint64_t range_end) { - uint32_t start_high = highBytes(range_start); - uint32_t start_low = lowBytes(range_start); - uint32_t end_high = highBytes(range_end); - uint32_t end_low = lowBytes(range_end); + void flipClosed(uint32_t min, uint32_t max) { + auto iter = roarings.begin(); + // Since min and max are uint32_t, highbytes(min or max) == 0. The inner + // bitmap we are looking for, if it exists, will be at the first slot of + // 'roarings'. If it does not exist, we have to create it. + if (iter == roarings.end() || iter->first != 0) { + iter = roarings.emplace_hint(iter, std::piecewise_construct, + std::forward_as_tuple(0), + std::forward_as_tuple()); + auto &bitmap = iter->second; + bitmap.setCopyOnWrite(copyOnWrite); + } + auto &bitmap = iter->second; + bitmap.flipClosed(min, max); + eraseIfEmpty(iter); + } + /** + * Computes the negation of the roaring bitmap within the closed interval + * [min, max]. Areas outside the interval are unchanged. + */ + void flipClosed(uint64_t min, uint64_t max) { + if (min > max) { + return; + } + uint32_t start_high = highBytes(min); + uint32_t start_low = lowBytes(min); + uint32_t end_high = highBytes(max); + uint32_t end_low = lowBytes(max); + + // We put std::numeric_limits<>::max in parentheses to avoid a + // clash with the Windows.h header under Windows. + const uint32_t uint32_max = (std::numeric_limits::max)(); + + // Fill in any nonexistent slots with empty Roarings. This simplifies + // the logic below, allowing it to simply iterate over the map between + // 'start_high' and 'end_high' in a linear fashion. + auto current_iter = ensureRangePopulated(start_high, end_high); + + // If start and end land on the same inner bitmap, then we can do the + // whole operation in one call. if (start_high == end_high) { - roarings[start_high].flip(start_low, end_low); + auto &bitmap = current_iter->second; + bitmap.flipClosed(start_low, end_low); + eraseIfEmpty(current_iter); return; } - // we put std::numeric_limits<>::max/min in parentheses - // to avoid a clash with the Windows.h header under Windows - // flip operates on the range [lower_bound, upper_bound) - const uint64_t max_upper_bound = - static_cast((std::numeric_limits::max)()) + 1; - roarings[start_high].flip(start_low, max_upper_bound); - roarings[start_high++].setCopyOnWrite(copyOnWrite); - for (; start_high <= highBytes(range_end) - 1; ++start_high) { - roarings[start_high].flip((std::numeric_limits::min)(), - max_upper_bound); - roarings[start_high].setCopyOnWrite(copyOnWrite); + // Because start and end don't land on the same inner bitmap, + // we need to do this in multiple steps: + // 1. Partially flip the first bitmap in the closed interval + // [start_low, uint32_max] + // 2. Flip intermediate bitmaps completely: [0, uint32_max] + // 3. Partially flip the last bitmap in the closed interval + // [0, end_low] + + auto num_intermediate_bitmaps = end_high - start_high - 1; + + // 1. Partially flip the first bitmap. + { + auto &bitmap = current_iter->second; + bitmap.flipClosed(start_low, uint32_max); + auto temp = current_iter++; + eraseIfEmpty(temp); + } + + // 2. Flip intermediate bitmaps completely. + for (uint32_t i = 0; i != num_intermediate_bitmaps; ++i) { + auto &bitmap = current_iter->second; + bitmap.flipClosed(0, uint32_max); + auto temp = current_iter++; + eraseIfEmpty(temp); } - roarings[start_high].flip((std::numeric_limits::min)(), - end_low); - roarings[start_high].setCopyOnWrite(copyOnWrite); + // 3. Partially flip the last bitmap. + auto &bitmap = current_iter->second; + bitmap.flipClosed(0, end_low); + eraseIfEmpty(current_iter); } /** @@ -521,20 +1043,29 @@ public: } /** - * If the size of the roaring bitmap is strictly greater than rank, then - * this function returns true and set element to the element of given - * rank. Otherwise, it returns false. + * Selects the value at index 'rank' in the bitmap, where the smallest value + * is at index 0. If 'rank' < cardinality(), returns true with *element set + * to the element of the specified rank. Otherwise, returns false and the + * contents of *element are unspecified. */ - bool select(uint64_t rnk, uint64_t *element) const { + bool select(uint64_t rank, uint64_t *element) const { for (const auto &map_entry : roarings) { - uint64_t sub_cardinality = (uint64_t)map_entry.second.cardinality(); - if (rnk < sub_cardinality) { - *element = ((uint64_t)map_entry.first) << 32; - // assuming little endian - return map_entry.second.select((uint32_t)rnk, - ((uint32_t *)element)); + auto key = map_entry.first; + const auto &bitmap = map_entry.second; + + uint64_t sub_cardinality = bitmap.cardinality(); + if (rank < sub_cardinality) { + uint32_t low_bytes; + // Casting rank to uint32_t is safe because + // rank < sub_cardinality and sub_cardinality <= 2^32. + if (!bitmap.select((uint32_t)rank, &low_bytes)) { + ROARING_TERMINATE("Logic error: bitmap.select() " + "returned false despite rank < cardinality()"); + } + *element = uniteBytes(key, low_bytes); + return true; } - rnk -= sub_cardinality; + rank -= sub_cardinality; } return false; } @@ -544,21 +1075,40 @@ public: */ uint64_t rank(uint64_t x) const { uint64_t result = 0; + // Find the first bitmap >= x's bucket. If that is the bucket x would be in, find it's rank in that bucket. + // Either way, we're left with a range of all buckets strictly smaller than x's bucket, add all their + // cardinalities together. + auto end = roarings.lower_bound(highBytes(x)); + if (end != roarings.cend() && end->first == highBytes(x)) { + result += end->second.rank(lowBytes(x)); + } + for (auto iter = roarings.cbegin(); iter != end; ++iter) { + result += iter->second.cardinality(); + } + return result; + } + + /** + * Returns the index of x in the set, index start from 0. + * If the set doesn't contain x , this function will return -1. + * The difference with rank function is that this function will return -1 + * when x isn't in the set, but the rank function will return a + * non-negative number. + */ + int64_t getIndex(uint64_t x) const { + int64_t index = 0; auto roaring_destination = roarings.find(highBytes(x)); if (roaring_destination != roarings.cend()) { for (auto roaring_iter = roarings.cbegin(); roaring_iter != roaring_destination; ++roaring_iter) { - result += roaring_iter->second.cardinality(); + index += roaring_iter->second.cardinality(); } - result += roaring_destination->second.rank(lowBytes(x)); - return result; + auto low_idx = roaring_destination->second.getIndex(lowBytes(x)); + if (low_idx < 0) return -1; + index += low_idx; + return index; } - roaring_destination = roarings.lower_bound(highBytes(x)); - for (auto roaring_iter = roarings.cbegin(); - roaring_iter != roaring_destination; ++roaring_iter) { - result += roaring_iter->second.cardinality(); - } - return result; + return -1; } /** @@ -632,19 +1182,17 @@ public: * space compared to the portable format (e.g., for very sparse bitmaps). */ static Roaring64Map readSafe(const char *buf, size_t maxbytes) { + if (maxbytes < sizeof(uint64_t)) { + ROARING_TERMINATE("ran out of bytes"); + } Roaring64Map result; - // get map size uint64_t map_size; std::memcpy(&map_size, buf, sizeof(uint64_t)); buf += sizeof(uint64_t); + maxbytes -= sizeof(uint64_t); for (uint64_t lcv = 0; lcv < map_size; lcv++) { - // get map key if(maxbytes < sizeof(uint32_t)) { -#if ROARING_EXCEPTIONS - throw std::runtime_error("ran out of bytes"); -#else ROARING_TERMINATE("ran out of bytes"); -#endif } uint32_t key; std::memcpy(&key, buf, sizeof(uint32_t)); @@ -778,6 +1326,11 @@ public: /** * Computes the intersection between two bitmaps and returns new bitmap. * The current bitmap and the provided bitmap are unchanged. + * + * Performance hint: if you are computing the intersection between several + * bitmaps, two-by-two, it is best to start with the smallest bitmap. + * Consider also using the operator &= to avoid needlessly creating + * many temporary bitmaps. */ Roaring64Map operator&(const Roaring64Map &o) const { return Roaring64Map(*this) &= o; @@ -820,90 +1373,27 @@ public: } /** - * Print the content of the bitmap + * Print the contents of the bitmap to stdout. + * Note: this method adds a final newline, but toString() does not. */ void printf() const { - if (!isEmpty()) { - auto map_iter = roarings.cbegin(); - while (map_iter->second.isEmpty()) ++map_iter; - struct iter_data { - uint32_t high_bits{}; - char first_char{'{'}; - } outer_iter_data; - outer_iter_data.high_bits = roarings.begin()->first; - map_iter->second.iterate( - [](uint32_t low_bits, void *inner_iter_data) -> bool { - std::printf("%c%llu", - ((iter_data *)inner_iter_data)->first_char, - (long long unsigned)uniteBytes( - ((iter_data *)inner_iter_data)->high_bits, - low_bits)); - ((iter_data *)inner_iter_data)->first_char = ','; - return true; - }, - (void *)&outer_iter_data); - std::for_each( - ++map_iter, roarings.cend(), - [](const std::pair &map_entry) { - map_entry.second.iterate( - [](uint32_t low_bits, void *high_bits) -> bool { - std::printf(",%llu", - (long long unsigned)uniteBytes( - *(uint32_t *)high_bits, low_bits)); - return true; - }, - (void *)&map_entry.first); - }); - } else - std::printf("{"); - std::printf("}\n"); - } - - /** - * Print the content of the bitmap into a string + auto sink = [](const std::string &s) { + fputs(s.c_str(), stdout); + }; + printToSink(sink); + sink("\n"); + } + + /** + * Print the contents of the bitmap into a string. */ std::string toString() const { - struct iter_data { - std::string str{}; // The empty constructor silences warnings from pedantic static analyzers. - uint32_t high_bits{0}; - char first_char{'{'}; - } outer_iter_data; - if (!isEmpty()) { - auto map_iter = roarings.cbegin(); - while (map_iter->second.isEmpty()) ++map_iter; - outer_iter_data.high_bits = roarings.begin()->first; - map_iter->second.iterate( - [](uint32_t low_bits, void *inner_iter_data) -> bool { - ((iter_data *)inner_iter_data)->str += - ((iter_data *)inner_iter_data)->first_char; - ((iter_data *)inner_iter_data)->str += std::to_string( - uniteBytes(((iter_data *)inner_iter_data)->high_bits, - low_bits)); - ((iter_data *)inner_iter_data)->first_char = ','; - return true; - }, - (void *)&outer_iter_data); - std::for_each( - ++map_iter, roarings.cend(), - [&outer_iter_data]( - const std::pair &map_entry) { - outer_iter_data.high_bits = map_entry.first; - map_entry.second.iterate( - [](uint32_t low_bits, void *inner_iter_data) -> bool { - ((iter_data *)inner_iter_data)->str += - ((iter_data *)inner_iter_data)->first_char; - ((iter_data *)inner_iter_data)->str += - std::to_string(uniteBytes( - ((iter_data *)inner_iter_data)->high_bits, - low_bits)); - return true; - }, - (void *)&outer_iter_data); - }); - } else - outer_iter_data.str = '{'; - outer_iter_data.str += '}'; - return outer_iter_data.str; + std::string result; + auto sink = [&result](const std::string &s) { + result += s; + }; + printToSink(sink); + return result; } /** @@ -916,12 +1406,124 @@ public: * pointer). */ static Roaring64Map fastunion(size_t n, const Roaring64Map **inputs) { - Roaring64Map ans; - // not particularly fast - for (size_t lcv = 0; lcv < n; ++lcv) { - ans |= *(inputs[lcv]); + // The strategy here is to basically do a "group by" operation. + // We group the input roarings by key, do a 32-bit + // roaring_bitmap_or_many on each group, and collect the results. + // We accomplish the "group by" operation using a priority queue, which + // tracks the next key for each of our input maps. At each step, our + // algorithm takes the next subset of maps that share the same next key, + // runs roaring_bitmap_or_many on those bitmaps, and then advances the + // current_iter on all the affected entries and then repeats. + + // There is an entry in our priority queue for each of the 'n' inputs. + // For a given Roaring64Map, we look at its underlying 'roarings' + // std::map, and take its begin() and end(). This forms our half-open + // interval [current_iter, end_iter), which we keep in the priority + // queue as a pq_entry. These entries are updated (removed and then + // reinserted with the pq_entry.iterator field advanced by one step) as + // our algorithm progresses. But when a given interval becomes empty + // (i.e. pq_entry.iterator == pq_entry.end) it is not returned to the + // priority queue. + struct pq_entry { + roarings_t::const_iterator iterator; + roarings_t::const_iterator end; + }; + + // Custom comparator for the priority queue. + auto pq_comp = [](const pq_entry &lhs, const pq_entry &rhs) { + auto left_key = lhs.iterator->first; + auto right_key = rhs.iterator->first; + + // We compare in the opposite direction than normal because priority + // queues normally order from largest to smallest, but we want + // smallest to largest. + return left_key > right_key; + }; + + // Create and populate the priority queue. + std::priority_queue, decltype(pq_comp)> pq(pq_comp); + for (size_t i = 0; i < n; ++i) { + const auto &roarings = inputs[i]->roarings; + if (roarings.begin() != roarings.end()) { + pq.push({roarings.begin(), roarings.end()}); + } } - return ans; + + // A reusable vector that holds the pointers to the inner bitmaps that + // we pass to the underlying 32-bit fastunion operation. + std::vector group_bitmaps; + + // Summary of the algorithm: + // 1. While the priority queue is not empty: + // A. Get its lowest key. Call this group_key + // B. While the lowest entry in the priority queue has a key equal to + // group_key: + // 1. Remove this entry (the pair {current_iter, end_iter}) from + // the priority queue. + // 2. Add the bitmap pointed to by current_iter to a list of + // 32-bit bitmaps to process. + // 3. Advance current_iter. Now it will point to a bitmap entry + // with some key greater than group_key (or it will point to + // end()). + // 4. If current_iter != end_iter, reinsert the pair into the + // priority queue. + // C. Invoke the 32-bit roaring_bitmap_or_many() and add to result + Roaring64Map result; + while (!pq.empty()) { + // Find the next key (the lowest key) in the priority queue. + auto group_key = pq.top().iterator->first; + + // The purpose of the inner loop is to gather all the inner bitmaps + // that share "group_key" into "group_bitmaps" so that they can be + // fed to roaring_bitmap_or_many(). While we are doing this, we + // advance those iterators to their next value and reinsert them + // into the priority queue (unless they reach their end). + group_bitmaps.clear(); + while (!pq.empty()) { + auto candidate_current_iter = pq.top().iterator; + auto candidate_end_iter = pq.top().end; + + auto candidate_key = candidate_current_iter->first; + const auto &candidate_bitmap = candidate_current_iter->second; + + // This element will either be in the group (having + // key == group_key) or it will not be in the group (having + // key > group_key). (Note it cannot have key < group_key + // because of the ordered nature of the priority queue itself + // and the ordered nature of all the underlying roaring maps). + if (candidate_key != group_key) { + // This entry, and (thanks to the nature of the priority + // queue) all other entries as well, are all greater than + // group_key, so we're done collecting elements for the + // current group. Because of the way this loop was written, + // the group will will always contain at least one element. + break; + } + + group_bitmaps.push_back(&candidate_bitmap.roaring); + // Remove this entry from the priority queue. Note this + // invalidates pq.top() so make sure you don't have any dangling + // references to it. + pq.pop(); + + // Advance 'candidate_current_iter' and insert a new entry + // {candidate_current_iter, candidate_end_iter} into the + // priority queue (unless it has reached its end). + ++candidate_current_iter; + if (candidate_current_iter != candidate_end_iter) { + pq.push({candidate_current_iter, candidate_end_iter}); + } + } + + // Use the fast inner union to combine these. + auto *inner_result = roaring_bitmap_or_many(group_bitmaps.size(), + group_bitmaps.data()); + // Insert the 32-bit result at end of the 'roarings' map of the + // result we are building. + result.roarings.insert(result.roarings.end(), + std::make_pair(group_key, Roaring(inner_result))); + } + return result; } friend class Roaring64MapSetBitForwardIterator; @@ -947,12 +1549,13 @@ public: const_iterator end() const; private: - std::map roarings{}; // The empty constructor silences warnings from pedantic static analyzers. + typedef std::map roarings_t; + roarings_t roarings{}; // The empty constructor silences warnings from pedantic static analyzers. bool copyOnWrite{false}; - static uint32_t highBytes(const uint64_t in) { return uint32_t(in >> 32); } - static uint32_t lowBytes(const uint64_t in) { return uint32_t(in); } - static uint64_t uniteBytes(const uint32_t highBytes, - const uint32_t lowBytes) { + static constexpr uint32_t highBytes(const uint64_t in) { return uint32_t(in >> 32); } + static constexpr uint32_t lowBytes(const uint64_t in) { return uint32_t(in); } + static constexpr uint64_t uniteBytes(const uint32_t highBytes, + const uint32_t lowBytes) { return (uint64_t(highBytes) << 32) | uint64_t(lowBytes); } // this is needed to tolerate gcc's C++11 libstdc++ lacking emplace @@ -972,6 +1575,102 @@ private: roarings.emplace(key, std::move(value)); #endif } + + /* + * Look up 'key' in the 'roarings' map. If it does not exist, create it. + * Also, set its copyOnWrite flag to 'copyOnWrite'. Then return a reference + * to the (already existing or newly created) inner bitmap. + */ + Roaring &lookupOrCreateInner(uint32_t key) { + auto &bitmap = roarings[key]; + bitmap.setCopyOnWrite(copyOnWrite); + return bitmap; + } + + /** + * Prints the contents of the bitmap to a caller-provided sink function. + */ + void printToSink(const std::function &sink) const { + sink("{"); + + // Storage for snprintf. Big enough to store the decimal representation + // of the largest uint64_t value and trailing \0. + char buffer[32]; + const char *separator = ""; + // Reusable, and therefore avoids many repeated heap allocations. + std::string callback_string; + for (const auto &entry : roarings) { + auto high_bits = entry.first; + const auto &bitmap = entry.second; + for (const auto low_bits : bitmap) { + auto value = uniteBytes(high_bits, low_bits); + snprintf(buffer, sizeof(buffer), "%" PRIu64, value); + callback_string = separator; + callback_string.append(buffer); + sink(callback_string); + separator = ","; + } + } + sink("}"); + } + + /** + * Ensures that every key in the closed interval [start_high, end_high] + * refers to a Roaring bitmap rather being an empty slot. Inserts empty + * Roaring bitmaps if necessary. The interval must be valid and non-empty. + * Returns an iterator to the bitmap at start_high. + */ + roarings_t::iterator ensureRangePopulated(uint32_t start_high, + uint32_t end_high) { + if (start_high > end_high) { + ROARING_TERMINATE("Logic error: start_high > end_high"); + } + // next_populated_iter points to the first entry in the outer map with + // key >= start_high, or end(). + auto next_populated_iter = roarings.lower_bound(start_high); + + // Use uint64_t to avoid an infinite loop when end_high == uint32_max. + roarings_t::iterator start_iter{}; // Definitely assigned in loop. + for (uint64_t slot = start_high; slot <= end_high; ++slot) { + roarings_t::iterator slot_iter; + if (next_populated_iter != roarings.end() && + next_populated_iter->first == slot) { + // 'slot' index has caught up to next_populated_iter. + // Note it here and advance next_populated_iter. + slot_iter = next_populated_iter++; + } else { + // 'slot' index has not yet caught up to next_populated_iter. + // Make a fresh entry {key = 'slot', value = Roaring()}, insert + // it just prior to next_populated_iter, and set its copy + // on write flag. We take pains to use emplace_hint and + // piecewise_construct to minimize effort. + slot_iter = roarings.emplace_hint( + next_populated_iter, std::piecewise_construct, + std::forward_as_tuple(uint32_t(slot)), + std::forward_as_tuple()); + auto &bitmap = slot_iter->second; + bitmap.setCopyOnWrite(copyOnWrite); + } + + // Make a note of the iterator of the starting slot. It will be + // needed for the return value. + if (slot == start_high) { + start_iter = slot_iter; + } + } + return start_iter; + } + + /** + * Erases the entry pointed to by 'iter' from the 'roarings' map. Warning: + * this invalidates 'iter'. + */ + void eraseIfEmpty(roarings_t::iterator iter) { + const auto &bitmap = iter->second; + if (bitmap.isEmpty()) { + roarings.erase(iter); + } + } }; /** @@ -981,7 +1680,7 @@ class Roaring64MapSetBitForwardIterator { public: typedef std::forward_iterator_tag iterator_category; typedef uint64_t *pointer; - typedef uint64_t &reference_type; + typedef uint64_t &reference; typedef uint64_t value_type; typedef int64_t difference_type; typedef Roaring64MapSetBitForwardIterator type_of_iterator; diff --git a/doxygen b/doxygen new file mode 100644 index 000000000..571e70754 --- /dev/null +++ b/doxygen @@ -0,0 +1,2741 @@ +# Doxyfile 1.9.6 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project. +# +# All text after a double hash (##) is considered a comment and is placed in +# front of the TAG it is preceding. +# +# All text after a single hash (#) is considered a comment and will be ignored. +# The format is: +# TAG = value [value, ...] +# For lists, items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (\" \"). +# +# Note: +# +# Use doxygen to compare the used configuration file with the template +# configuration file: +# doxygen -x [configFile] +# Use doxygen to compare the used configuration file with the template +# configuration file without replacing the environment variables or CMake type +# replacement variables: +# doxygen -x_noenv [configFile] + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# This tag specifies the encoding used for all characters in the configuration +# file that follow. The default is UTF-8 which is also the encoding used for all +# text before the first occurrence of this tag. Doxygen uses libiconv (or the +# iconv built into libc) for the transcoding. See +# https://www.gnu.org/software/libiconv/ for the list of possible encodings. +# The default value is: UTF-8. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by +# double-quotes, unless you are using Doxywizard) that should identify the +# project for which the documentation is generated. This name is used in the +# title of most generated pages and in a few other places. +# The default value is: My Project. + +PROJECT_NAME = "CRoaring" + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. This +# could be handy for archiving the generated documentation or if some version +# control system is used. + +PROJECT_NUMBER = "2.0.1" + +# Using the PROJECT_BRIEF tag one can provide an optional one line description +# for a project that appears at the top of each page and should give viewer a +# quick idea about the purpose of the project. Keep the description short. + +PROJECT_BRIEF = "Roaring bitmaps in C (and C++)" + +# With the PROJECT_LOGO tag one can specify a logo or an icon that is included +# in the documentation. The maximum height of the logo should not exceed 55 +# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy +# the logo to the output directory. + +PROJECT_LOGO = + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path +# into which the generated documentation will be written. If a relative path is +# entered, it will be relative to the location where doxygen was started. If +# left blank the current directory will be used. + +OUTPUT_DIRECTORY = "docs" + +# If the CREATE_SUBDIRS tag is set to YES then doxygen will create up to 4096 +# sub-directories (in 2 levels) under the output directory of each output format +# and will distribute the generated files over these directories. Enabling this +# option can be useful when feeding doxygen a huge amount of source files, where +# putting all generated files in the same directory would otherwise causes +# performance problems for the file system. Adapt CREATE_SUBDIRS_LEVEL to +# control the number of sub-directories. +# The default value is: NO. + +CREATE_SUBDIRS = YES + +# Controls the number of sub-directories that will be created when +# CREATE_SUBDIRS tag is set to YES. Level 0 represents 16 directories, and every +# level increment doubles the number of directories, resulting in 4096 +# directories at level 8 which is the default and also the maximum value. The +# sub-directories are organized in 2 levels, the first level always has a fixed +# number of 16 directories. +# Minimum value: 0, maximum value: 8, default value: 8. +# This tag requires that the tag CREATE_SUBDIRS is set to YES. + +CREATE_SUBDIRS_LEVEL = 8 + +# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII +# characters to appear in the names of generated files. If set to NO, non-ASCII +# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode +# U+3044. +# The default value is: NO. + +ALLOW_UNICODE_NAMES = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Bulgarian, +# Catalan, Chinese, Chinese-Traditional, Croatian, Czech, Danish, Dutch, English +# (United States), Esperanto, Farsi (Persian), Finnish, French, German, Greek, +# Hindi, Hungarian, Indonesian, Italian, Japanese, Japanese-en (Japanese with +# English messages), Korean, Korean-en (Korean with English messages), Latvian, +# Lithuanian, Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, +# Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, +# Swedish, Turkish, Ukrainian and Vietnamese. +# The default value is: English. + +OUTPUT_LANGUAGE = English + +# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member +# descriptions after the members that are listed in the file and class +# documentation (similar to Javadoc). Set to NO to disable this. +# The default value is: YES. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief +# description of a member or function before the detailed description +# +# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. +# The default value is: YES. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator that is +# used to form the text in various listings. Each string in this list, if found +# as the leading text of the brief description, will be stripped from the text +# and the result, after processing the whole list, is used as the annotated +# text. Otherwise, the brief description is used as-is. If left blank, the +# following values are used ($name is automatically replaced with the name of +# the entity):The $name class, The $name widget, The $name file, is, provides, +# specifies, contains, represents, a, an and the. + +ABBREVIATE_BRIEF = "The $name class" \ + "The $name widget" \ + "The $name file" \ + is \ + provides \ + specifies \ + contains \ + represents \ + a \ + an \ + the + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# doxygen will generate a detailed section even if there is only a brief +# description. +# The default value is: NO. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. +# The default value is: NO. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path +# before files name in the file list and in the header files. If set to NO the +# shortest path that makes the file name unique will be used +# The default value is: YES. + +FULL_PATH_NAMES = YES + +# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. +# Stripping is only done if one of the specified strings matches the left-hand +# part of the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the path to +# strip. +# +# Note that you can specify absolute paths here, but also relative paths, which +# will be relative from the directory where doxygen is started. +# This tag requires that the tag FULL_PATH_NAMES is set to YES. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the +# path mentioned in the documentation of a class, which tells the reader which +# header file to include in order to use a class. If left blank only the name of +# the header file containing the class definition is used. Otherwise one should +# specify the list of include paths that are normally passed to the compiler +# using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but +# less readable) file names. This can be useful is your file systems doesn't +# support long names like on DOS, Mac, or CD-ROM. +# The default value is: NO. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the +# first line (until the first dot) of a Javadoc-style comment as the brief +# description. If set to NO, the Javadoc-style will behave just like regular Qt- +# style comments (thus requiring an explicit @brief command for a brief +# description.) +# The default value is: NO. + +JAVADOC_AUTOBRIEF = NO + +# If the JAVADOC_BANNER tag is set to YES then doxygen will interpret a line +# such as +# /*************** +# as being the beginning of a Javadoc-style comment "banner". If set to NO, the +# Javadoc-style will behave just like regular comments and it will not be +# interpreted by doxygen. +# The default value is: NO. + +JAVADOC_BANNER = YES + +# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first +# line (until the first dot) of a Qt-style comment as the brief description. If +# set to NO, the Qt-style will behave just like regular Qt-style comments (thus +# requiring an explicit \brief command for a brief description.) +# The default value is: NO. + +QT_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a +# multi-line C++ special comment block (i.e. a block of //! or /// comments) as +# a brief description. This used to be the default behavior. The new default is +# to treat a multi-line C++ comment block as a detailed description. Set this +# tag to YES if you prefer the old behavior instead. +# +# Note that setting this tag to YES also means that rational rose comments are +# not recognized any more. +# The default value is: NO. + +MULTILINE_CPP_IS_BRIEF = NO + +# By default Python docstrings are displayed as preformatted text and doxygen's +# special commands cannot be used. By setting PYTHON_DOCSTRING to NO the +# doxygen's special commands can be used and the contents of the docstring +# documentation blocks is shown as doxygen documentation. +# The default value is: YES. + +PYTHON_DOCSTRING = YES + +# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the +# documentation from any documented member that it re-implements. +# The default value is: YES. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new +# page for each member. If set to NO, the documentation of a member will be part +# of the file/class/namespace that contains it. +# The default value is: NO. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen +# uses this value to replace tabs by spaces in code fragments. +# Minimum value: 1, maximum value: 16, default value: 4. + +TAB_SIZE = 2 + +# This tag can be used to specify a number of aliases that act as commands in +# the documentation. An alias has the form: +# name=value +# For example adding +# "sideeffect=@par Side Effects:^^" +# will allow you to put the command \sideeffect (or @sideeffect) in the +# documentation, which will result in a user-defined paragraph with heading +# "Side Effects:". Note that you cannot put \n's in the value part of an alias +# to insert newlines (in the resulting output). You can put ^^ in the value part +# of an alias to insert a newline as if a physical newline was in the original +# file. When you need a literal { or } or , in the value part of an alias you +# have to escape them by means of a backslash (\), this can lead to conflicts +# with the commands \{ and \} for these it is advised to use the version @{ and +# @} or use a double escape (\\{ and \\}) + +ALIASES = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources +# only. Doxygen will then generate output that is more tailored for C. For +# instance, some of the names that are used will be different. The list of all +# members will be omitted, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_FOR_C = NO + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or +# Python sources only. Doxygen will then generate output that is more tailored +# for that language. For instance, namespaces will be presented as packages, +# qualified scopes will look different, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran +# sources. Doxygen will then generate output that is tailored for Fortran. +# The default value is: NO. + +OPTIMIZE_FOR_FORTRAN = NO + +# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL +# sources. Doxygen will then generate output that is tailored for VHDL. +# The default value is: NO. + +OPTIMIZE_OUTPUT_VHDL = NO + +# Set the OPTIMIZE_OUTPUT_SLICE tag to YES if your project consists of Slice +# sources only. Doxygen will then generate output that is more tailored for that +# language. For instance, namespaces will be presented as modules, types will be +# separated into more groups, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_SLICE = NO + +# Doxygen selects the parser to use depending on the extension of the files it +# parses. With this tag you can assign which parser to use for a given +# extension. Doxygen has a built-in mapping, but you can override or extend it +# using this tag. The format is ext=language, where ext is a file extension, and +# language is one of the parsers supported by doxygen: IDL, Java, JavaScript, +# Csharp (C#), C, C++, Lex, D, PHP, md (Markdown), Objective-C, Python, Slice, +# VHDL, Fortran (fixed format Fortran: FortranFixed, free formatted Fortran: +# FortranFree, unknown formatted Fortran: Fortran. In the later case the parser +# tries to guess whether the code is fixed or free formatted code, this is the +# default for Fortran type files). For instance to make doxygen treat .inc files +# as Fortran files (default is PHP), and .f files as C (default is Fortran), +# use: inc=Fortran f=C. +# +# Note: For files without extension you can use no_extension as a placeholder. +# +# Note that for custom extensions you also need to set FILE_PATTERNS otherwise +# the files are not read by doxygen. When specifying no_extension you should add +# * to the FILE_PATTERNS. +# +# Note see also the list of default file extension mappings. + +EXTENSION_MAPPING = + +# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments +# according to the Markdown format, which allows for more readable +# documentation. See https://daringfireball.net/projects/markdown/ for details. +# The output of markdown processing is further processed by doxygen, so you can +# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in +# case of backward compatibilities issues. +# The default value is: YES. + +MARKDOWN_SUPPORT = YES + +# When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up +# to that level are automatically included in the table of contents, even if +# they do not have an id attribute. +# Note: This feature currently applies only to Markdown headings. +# Minimum value: 0, maximum value: 99, default value: 5. +# This tag requires that the tag MARKDOWN_SUPPORT is set to YES. + +TOC_INCLUDE_HEADINGS = 5 + +# When enabled doxygen tries to link words that correspond to documented +# classes, or namespaces to their corresponding documentation. Such a link can +# be prevented in individual cases by putting a % sign in front of the word or +# globally by setting AUTOLINK_SUPPORT to NO. +# The default value is: YES. + +AUTOLINK_SUPPORT = YES + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want +# to include (a tag file for) the STL sources as input, then you should set this +# tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); +# versus func(std::string) {}). This also make the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. +# The default value is: NO. + +BUILTIN_STL_SUPPORT = NO + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. +# The default value is: NO. + +CPP_CLI_SUPPORT = NO + +# Set the SIP_SUPPORT tag to YES if your project consists of sip (see: +# https://www.riverbankcomputing.com/software/sip/intro) sources only. Doxygen +# will parse them like normal C++ but will assume all classes use public instead +# of private inheritance when no explicit protection keyword is present. +# The default value is: NO. + +SIP_SUPPORT = NO + +# For Microsoft's IDL there are propget and propput attributes to indicate +# getter and setter methods for a property. Setting this option to YES will make +# doxygen to replace the get and set methods by a property in the documentation. +# This will only work if the methods are indeed getting or setting a simple +# type. If this is not the case, or you want to show the methods anyway, you +# should set this option to NO. +# The default value is: YES. + +IDL_PROPERTY_SUPPORT = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. +# The default value is: NO. + +DISTRIBUTE_GROUP_DOC = YES + +# If one adds a struct or class to a group and this option is enabled, then also +# any nested class or struct is added to the same group. By default this option +# is disabled and one has to add nested compounds explicitly via \ingroup. +# The default value is: NO. + +GROUP_NESTED_COMPOUNDS = NO + +# Set the SUBGROUPING tag to YES to allow class member groups of the same type +# (for instance a group of public functions) to be put as a subgroup of that +# type (e.g. under the Public Functions section). Set it to NO to prevent +# subgrouping. Alternatively, this can be done per class using the +# \nosubgrouping command. +# The default value is: YES. + +SUBGROUPING = YES + +# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions +# are shown inside the group in which they are included (e.g. using \ingroup) +# instead of on a separate page (for HTML and Man pages) or section (for LaTeX +# and RTF). +# +# Note that this feature does not work in combination with +# SEPARATE_MEMBER_PAGES. +# The default value is: NO. + +INLINE_GROUPED_CLASSES = NO + +# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions +# with only public data fields or simple typedef fields will be shown inline in +# the documentation of the scope in which they are defined (i.e. file, +# namespace, or group documentation), provided this scope is documented. If set +# to NO, structs, classes, and unions are shown on a separate page (for HTML and +# Man pages) or section (for LaTeX and RTF). +# The default value is: NO. + +INLINE_SIMPLE_STRUCTS = NO + +# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or +# enum is documented as struct, union, or enum with the name of the typedef. So +# typedef struct TypeS {} TypeT, will appear in the documentation as a struct +# with name TypeT. When disabled the typedef will appear as a member of a file, +# namespace, or class. And the struct will be named TypeS. This can typically be +# useful for C code in case the coding convention dictates that all compound +# types are typedef'ed and only the typedef is referenced, never the tag name. +# The default value is: NO. + +TYPEDEF_HIDES_STRUCT = NO + +# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This +# cache is used to resolve symbols given their name and scope. Since this can be +# an expensive process and often the same symbol appears multiple times in the +# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small +# doxygen will become slower. If the cache is too large, memory is wasted. The +# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range +# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 +# symbols. At the end of a run doxygen will report the cache usage and suggest +# the optimal cache size from a speed point of view. +# Minimum value: 0, maximum value: 9, default value: 0. + +LOOKUP_CACHE_SIZE = 0 + +# The NUM_PROC_THREADS specifies the number of threads doxygen is allowed to use +# during processing. When set to 0 doxygen will based this on the number of +# cores available in the system. You can set it explicitly to a value larger +# than 0 to get more control over the balance between CPU load and processing +# speed. At this moment only the input processing can be done using multiple +# threads. Since this is still an experimental feature the default is set to 1, +# which effectively disables parallel processing. Please report any issues you +# encounter. Generating dot graphs in parallel is controlled by the +# DOT_NUM_THREADS setting. +# Minimum value: 0, maximum value: 32, default value: 1. + +NUM_PROC_THREADS = 1 + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in +# documentation are documented, even if no documentation was available. Private +# class members and static file members will be hidden unless the +# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. +# Note: This will also disable the warnings about undocumented members that are +# normally produced when WARNINGS is set to YES. +# The default value is: NO. + +EXTRACT_ALL = YES + +# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will +# be included in the documentation. +# The default value is: NO. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_PRIV_VIRTUAL tag is set to YES, documented private virtual +# methods of a class will be included in the documentation. +# The default value is: NO. + +EXTRACT_PRIV_VIRTUAL = NO + +# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal +# scope will be included in the documentation. +# The default value is: NO. + +EXTRACT_PACKAGE = YES + +# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be +# included in the documentation. +# The default value is: NO. + +EXTRACT_STATIC = YES + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined +# locally in source files will be included in the documentation. If set to NO, +# only classes defined in header files are included. Does not have any effect +# for Java sources. +# The default value is: YES. + +EXTRACT_LOCAL_CLASSES = YES + +# This flag is only useful for Objective-C code. If set to YES, local methods, +# which are defined in the implementation section but not in the interface are +# included in the documentation. If set to NO, only methods in the interface are +# included. +# The default value is: NO. + +EXTRACT_LOCAL_METHODS = YES + +# If this flag is set to YES, the members of anonymous namespaces will be +# extracted and appear in the documentation as a namespace called +# 'anonymous_namespace{file}', where file will be replaced with the base name of +# the file that contains the anonymous namespace. By default anonymous namespace +# are hidden. +# The default value is: NO. + +EXTRACT_ANON_NSPACES = YES + +# If this flag is set to YES, the name of an unnamed parameter in a declaration +# will be determined by the corresponding definition. By default unnamed +# parameters remain unnamed in the output. +# The default value is: YES. + +RESOLVE_UNNAMED_PARAMS = YES + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all +# undocumented members inside documented classes or files. If set to NO these +# members will be included in the various overviews, but no documentation +# section is generated. This option has no effect if EXTRACT_ALL is enabled. +# The default value is: NO. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. If set +# to NO, these classes will be included in the various overviews. This option +# will also hide undocumented C++ concepts if enabled. This option has no effect +# if EXTRACT_ALL is enabled. +# The default value is: NO. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend +# declarations. If set to NO, these declarations will be included in the +# documentation. +# The default value is: NO. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any +# documentation blocks found inside the body of a function. If set to NO, these +# blocks will be appended to the function's detailed documentation block. +# The default value is: NO. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation that is typed after a +# \internal command is included. If the tag is set to NO then the documentation +# will be excluded. Set it to YES to include the internal documentation. +# The default value is: NO. + +INTERNAL_DOCS = NO + +# With the correct setting of option CASE_SENSE_NAMES doxygen will better be +# able to match the capabilities of the underlying filesystem. In case the +# filesystem is case sensitive (i.e. it supports files in the same directory +# whose names only differ in casing), the option must be set to YES to properly +# deal with such files in case they appear in the input. For filesystems that +# are not case sensitive the option should be set to NO to properly deal with +# output files written for symbols that only differ in casing, such as for two +# classes, one named CLASS and the other named Class, and to also support +# references to files without having to specify the exact matching casing. On +# Windows (including Cygwin) and MacOS, users should typically set this option +# to NO, whereas on Linux or other Unix flavors it should typically be set to +# YES. +# Possible values are: SYSTEM, NO and YES. +# The default value is: SYSTEM. + +CASE_SENSE_NAMES = SYSTEM + +# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with +# their full class and namespace scopes in the documentation. If set to YES, the +# scope will be hidden. +# The default value is: NO. + +HIDE_SCOPE_NAMES = NO + +# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will +# append additional text to a page's title, such as Class Reference. If set to +# YES the compound reference will be hidden. +# The default value is: NO. + +HIDE_COMPOUND_REFERENCE= NO + +# If the SHOW_HEADERFILE tag is set to YES then the documentation for a class +# will show which file needs to be included to use the class. +# The default value is: YES. + +SHOW_HEADERFILE = YES + +# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of +# the files that are included by a file in the documentation of that file. +# The default value is: YES. + +SHOW_INCLUDE_FILES = YES + +# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each +# grouped member an include statement to the documentation, telling the reader +# which file to include in order to use the member. +# The default value is: NO. + +SHOW_GROUPED_MEMB_INC = NO + +# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include +# files with double quotes in the documentation rather than with sharp brackets. +# The default value is: NO. + +FORCE_LOCAL_INCLUDES = NO + +# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the +# documentation for inline members. +# The default value is: YES. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the +# (detailed) documentation of file and class members alphabetically by member +# name. If set to NO, the members will appear in declaration order. +# The default value is: YES. + +SORT_MEMBER_DOCS = YES + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief +# descriptions of file, namespace and class members alphabetically by member +# name. If set to NO, the members will appear in declaration order. Note that +# this will also influence the order of the classes in the class list. +# The default value is: NO. + +SORT_BRIEF_DOCS = NO + +# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the +# (brief and detailed) documentation of class members so that constructors and +# destructors are listed first. If set to NO the constructors will appear in the +# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. +# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief +# member documentation. +# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting +# detailed member documentation. +# The default value is: NO. + +SORT_MEMBERS_CTORS_1ST = NO + +# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy +# of group names into alphabetical order. If set to NO the group names will +# appear in their defined order. +# The default value is: NO. + +SORT_GROUP_NAMES = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by +# fully-qualified names, including namespaces. If set to NO, the class list will +# be sorted only by class name, not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the alphabetical +# list. +# The default value is: NO. + +SORT_BY_SCOPE_NAME = NO + +# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper +# type resolution of all parameters of a function it will reject a match between +# the prototype and the implementation of a member function even if there is +# only one candidate or it is obvious which candidate to choose by doing a +# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still +# accept a match between prototype and implementation in such cases. +# The default value is: NO. + +STRICT_PROTO_MATCHING = NO + +# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo +# list. This list is created by putting \todo commands in the documentation. +# The default value is: YES. + +GENERATE_TODOLIST = NO + +# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test +# list. This list is created by putting \test commands in the documentation. +# The default value is: YES. + +GENERATE_TESTLIST = NO + +# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug +# list. This list is created by putting \bug commands in the documentation. +# The default value is: YES. + +GENERATE_BUGLIST = NO + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO) +# the deprecated list. This list is created by putting \deprecated commands in +# the documentation. +# The default value is: YES. + +GENERATE_DEPRECATEDLIST= NO + +# The ENABLED_SECTIONS tag can be used to enable conditional documentation +# sections, marked by \if ... \endif and \cond +# ... \endcond blocks. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the +# initial value of a variable or macro / define can have for it to appear in the +# documentation. If the initializer consists of more lines than specified here +# it will be hidden. Use a value of 0 to hide initializers completely. The +# appearance of the value of individual variables and macros / defines can be +# controlled using \showinitializer or \hideinitializer command in the +# documentation regardless of this setting. +# Minimum value: 0, maximum value: 10000, default value: 30. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at +# the bottom of the documentation of classes and structs. If set to YES, the +# list will mention the files that were used to generate the documentation. +# The default value is: YES. + +SHOW_USED_FILES = YES + +# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This +# will remove the Files entry from the Quick Index and from the Folder Tree View +# (if specified). +# The default value is: YES. + +SHOW_FILES = YES + +# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces +# page. This will remove the Namespaces entry from the Quick Index and from the +# Folder Tree View (if specified). +# The default value is: YES. + +SHOW_NAMESPACES = YES + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from +# the version control system). Doxygen will invoke the program by executing (via +# popen()) the command command input-file, where command is the value of the +# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided +# by doxygen. Whatever the program writes to standard output is used as the file +# version. For an example see the documentation. + +FILE_VERSION_FILTER = + +# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed +# by doxygen. The layout file controls the global structure of the generated +# output files in an output format independent way. To create the layout file +# that represents doxygen's defaults, run doxygen with the -l option. You can +# optionally specify a file name after the option, if omitted DoxygenLayout.xml +# will be used as the name of the layout file. See also section "Changing the +# layout of pages" for information. +# +# Note that if you run doxygen from a directory containing a file called +# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE +# tag is left empty. + +LAYOUT_FILE = + +# The CITE_BIB_FILES tag can be used to specify one or more bib files containing +# the reference definitions. This must be a list of .bib files. The .bib +# extension is automatically appended if omitted. This requires the bibtex tool +# to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info. +# For LaTeX the style of the bibliography can be controlled using +# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the +# search path. See also \cite for info how to create references. + +CITE_BIB_FILES = + +#--------------------------------------------------------------------------- +# Configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated to +# standard output by doxygen. If QUIET is set to YES this implies that the +# messages are off. +# The default value is: NO. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES +# this implies that the warnings are on. +# +# Tip: Turn warnings on while writing the documentation. +# The default value is: YES. + +WARNINGS = YES + +# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate +# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag +# will automatically be disabled. +# The default value is: YES. + +WARN_IF_UNDOCUMENTED = YES + +# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as documenting some parameters in +# a documented function twice, or documenting parameters that don't exist or +# using markup commands wrongly. +# The default value is: YES. + +WARN_IF_DOC_ERROR = YES + +# If WARN_IF_INCOMPLETE_DOC is set to YES, doxygen will warn about incomplete +# function parameter documentation. If set to NO, doxygen will accept that some +# parameters have no documentation without warning. +# The default value is: YES. + +WARN_IF_INCOMPLETE_DOC = YES + +# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that +# are documented, but have no documentation for their parameters or return +# value. If set to NO, doxygen will only warn about wrong parameter +# documentation, but not about the absence of documentation. If EXTRACT_ALL is +# set to YES then this flag will automatically be disabled. See also +# WARN_IF_INCOMPLETE_DOC +# The default value is: NO. + +WARN_NO_PARAMDOC = YES + +# If WARN_IF_UNDOC_ENUM_VAL option is set to YES, doxygen will warn about +# undocumented enumeration values. If set to NO, doxygen will accept +# undocumented enumeration values. If EXTRACT_ALL is set to YES then this flag +# will automatically be disabled. +# The default value is: NO. + +WARN_IF_UNDOC_ENUM_VAL = NO + +# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when +# a warning is encountered. If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS +# then doxygen will continue running as if WARN_AS_ERROR tag is set to NO, but +# at the end of the doxygen process doxygen will return with a non-zero status. +# Possible values are: NO, YES and FAIL_ON_WARNINGS. +# The default value is: NO. + +WARN_AS_ERROR = NO + +# The WARN_FORMAT tag determines the format of the warning messages that doxygen +# can produce. The string should contain the $file, $line, and $text tags, which +# will be replaced by the file and line number from which the warning originated +# and the warning text. Optionally the format may contain $version, which will +# be replaced by the version of the file (if it could be obtained via +# FILE_VERSION_FILTER) +# See also: WARN_LINE_FORMAT +# The default value is: $file:$line: $text. + +WARN_FORMAT = "$file:$line: $text" + +# In the $text part of the WARN_FORMAT command it is possible that a reference +# to a more specific place is given. To make it easier to jump to this place +# (outside of doxygen) the user can define a custom "cut" / "paste" string. +# Example: +# WARN_LINE_FORMAT = "'vi $file +$line'" +# See also: WARN_FORMAT +# The default value is: at line $line of file $file. + +WARN_LINE_FORMAT = "at line $line of file $file" + +# The WARN_LOGFILE tag can be used to specify a file to which warning and error +# messages should be written. If left blank the output is written to standard +# error (stderr). In case the file specified cannot be opened for writing the +# warning and error messages are written to standard error. When as file - is +# specified the warning and error messages are written to standard output +# (stdout). + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# Configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag is used to specify the files and/or directories that contain +# documented source files. You may enter file names like myfile.cpp or +# directories like /usr/src/myproject. Separate the files or directories with +# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING +# Note: If this tag is empty the current directory is searched. + +INPUT = README.md cpp/roaring.hh cpp/roaring64map.hh + +# This tag can be used to specify the character encoding of the source files +# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses +# libiconv (or the iconv built into libc) for the transcoding. See the libiconv +# documentation (see: +# https://www.gnu.org/software/libiconv/) for the list of possible encodings. +# See also: INPUT_FILE_ENCODING +# The default value is: UTF-8. + +INPUT_ENCODING = UTF-8 + +# This tag can be used to specify the character encoding of the source files +# that doxygen parses The INPUT_FILE_ENCODING tag can be used to specify +# character encoding on a per file pattern basis. Doxygen will compare the file +# name with each pattern and apply the encoding instead of the default +# INPUT_ENCODING) if there is a match. The character encodings are a list of the +# form: pattern=encoding (like *.php=ISO-8859-1). See cfg_input_encoding +# "INPUT_ENCODING" for further information on supported encodings. + +INPUT_FILE_ENCODING = + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and +# *.h) to filter out the source-files in the directories. +# +# Note that for custom extensions or not directly supported extensions you also +# need to set EXTENSION_MAPPING for the extension otherwise the files are not +# read by doxygen. +# +# Note the list of default checked file patterns might differ from the list of +# default file extension mappings. +# +# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, +# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, +# *.hh, *.hxx, *.hpp, *.h++, *.l, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, +# *.inc, *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C +# comment), *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd, +# *.vhdl, *.ucf, *.qsf and *.ice. + +FILE_PATTERNS = *.c \ + *.cc \ + *.cxx \ + *.cpp \ + *.c++ \ + *.java \ + *.ii \ + *.ixx \ + *.ipp \ + *.i++ \ + *.inl \ + *.idl \ + *.ddl \ + *.odl \ + *.h \ + *.hh \ + *.hxx \ + *.hpp \ + *.h++ \ + *.l \ + *.cs \ + *.d \ + *.php \ + *.php4 \ + *.php5 \ + *.phtml \ + *.inc \ + *.m \ + *.markdown \ + *.md \ + *.mm \ + *.dox \ + *.py \ + *.pyw \ + *.f90 \ + *.f95 \ + *.f03 \ + *.f08 \ + *.f18 \ + *.f \ + *.for \ + *.vhd \ + *.vhdl \ + *.ucf \ + *.qsf \ + *.ice + +# The RECURSIVE tag can be used to specify whether or not subdirectories should +# be searched for input files as well. +# The default value is: NO. + +RECURSIVE = YES + +# The EXCLUDE tag can be used to specify files and/or directories that should be +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. +# +# Note that relative paths are relative to the directory from which doxygen is +# run. + +EXCLUDE = benchmarks, tests, Testing, tools, build, docs + +# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or +# directories that are symbolic links (a Unix file system feature) are excluded +# from the input. +# The default value is: NO. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. +# +# Note that the wildcards are matched against the file with absolute path, so to +# exclude all test directories for example use the pattern */test/* + +EXCLUDE_PATTERNS = "*/test/*" + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the +# output. The symbol name can be a fully qualified name, a word, or if the +# wildcard * is used, a substring. Examples: ANamespace, AClass, +# ANamespace::AClass, ANamespace::*Test +# +# Note that the wildcards are matched against the file with absolute path, so to +# exclude all test directories use the pattern */test/* + +EXCLUDE_SYMBOLS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or directories +# that contain example code fragments that are included (see the \include +# command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and +# *.h) to filter out the source-files in the directories. If left blank all +# files are included. + +EXAMPLE_PATTERNS = * + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude commands +# irrespective of the value of the RECURSIVE tag. +# The default value is: NO. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or directories +# that contain images that are to be included in the documentation (see the +# \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command: +# +# +# +# where is the value of the INPUT_FILTER tag, and is the +# name of an input file. Doxygen will then use the output that the filter +# program writes to standard output. If FILTER_PATTERNS is specified, this tag +# will be ignored. +# +# Note that the filter must not add or remove lines; it is applied before the +# code is scanned, but not when the output code is generated. If lines are added +# or removed, the anchors will not be placed correctly. +# +# Note that doxygen will use the data processed and written to standard output +# for further processing, therefore nothing else, like debug statements or used +# commands (so in case of a Windows batch file always use @echo OFF), should be +# written to standard output. +# +# Note that for custom extensions or not directly supported extensions you also +# need to set EXTENSION_MAPPING for the extension otherwise the files are not +# properly processed by doxygen. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. The filters are a list of the form: pattern=filter +# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how +# filters are used. If the FILTER_PATTERNS tag is empty or if none of the +# patterns match the file name, INPUT_FILTER is applied. +# +# Note that for custom extensions or not directly supported extensions you also +# need to set EXTENSION_MAPPING for the extension otherwise the files are not +# properly processed by doxygen. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will also be used to filter the input files that are used for +# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). +# The default value is: NO. + +FILTER_SOURCE_FILES = NO + +# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file +# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and +# it is also possible to disable source filtering for a specific pattern using +# *.ext= (so without naming a filter). +# This tag requires that the tag FILTER_SOURCE_FILES is set to YES. + +FILTER_SOURCE_PATTERNS = + +# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that +# is part of the input, its contents will be placed on the main page +# (index.html). This can be useful if you have a project on for instance GitHub +# and want to reuse the introduction page also for the doxygen output. + +USE_MDFILE_AS_MAINPAGE = README.md + +# The Fortran standard specifies that for fixed formatted Fortran code all +# characters from position 72 are to be considered as comment. A common +# extension is to allow longer lines before the automatic comment starts. The +# setting FORTRAN_COMMENT_AFTER will also make it possible that longer lines can +# be processed before the automatic comment starts. +# Minimum value: 7, maximum value: 10000, default value: 72. + +FORTRAN_COMMENT_AFTER = 72 + +#--------------------------------------------------------------------------- +# Configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will be +# generated. Documented entities will be cross-referenced with these sources. +# +# Note: To get rid of all source code in the generated output, make sure that +# also VERBATIM_HEADERS is set to NO. +# The default value is: NO. + +SOURCE_BROWSER = YES + +# Setting the INLINE_SOURCES tag to YES will include the body of functions, +# classes and enums directly into the documentation. +# The default value is: NO. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any +# special comment blocks from generated source code fragments. Normal C, C++ and +# Fortran comments will always remain visible. +# The default value is: YES. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES then for each documented +# entity all documented functions referencing it will be listed. +# The default value is: NO. + +REFERENCED_BY_RELATION = YES + +# If the REFERENCES_RELATION tag is set to YES then for each documented function +# all documented entities called/used by that function will be listed. +# The default value is: NO. + +REFERENCES_RELATION = YES + +# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set +# to YES then the hyperlinks from functions in REFERENCES_RELATION and +# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will +# link to the documentation. +# The default value is: YES. + +REFERENCES_LINK_SOURCE = NO + +# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the +# source code will show a tooltip with additional information such as prototype, +# brief description and links to the definition and documentation. Since this +# will make the HTML file larger and loading of large files a bit slower, you +# can opt to disable this feature. +# The default value is: YES. +# This tag requires that the tag SOURCE_BROWSER is set to YES. + +SOURCE_TOOLTIPS = YES + +# If the USE_HTAGS tag is set to YES then the references to source code will +# point to the HTML generated by the htags(1) tool instead of doxygen built-in +# source browser. The htags tool is part of GNU's global source tagging system +# (see https://www.gnu.org/software/global/global.html). You will need version +# 4.8.6 or higher. +# +# To use it do the following: +# - Install the latest version of global +# - Enable SOURCE_BROWSER and USE_HTAGS in the configuration file +# - Make sure the INPUT points to the root of the source tree +# - Run doxygen as normal +# +# Doxygen will invoke htags (and that will in turn invoke gtags), so these +# tools must be available from the command line (i.e. in the search path). +# +# The result: instead of the source browser generated by doxygen, the links to +# source code will now point to the output of htags. +# The default value is: NO. +# This tag requires that the tag SOURCE_BROWSER is set to YES. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a +# verbatim copy of the header file for each class for which an include is +# specified. Set to NO to disable this. +# See also: Section \class. +# The default value is: YES. + +VERBATIM_HEADERS = YES + +#--------------------------------------------------------------------------- +# Configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all +# compounds will be generated. Enable this if the project contains a lot of +# classes, structs, unions or interfaces. +# The default value is: YES. + +ALPHABETICAL_INDEX = YES + +# The IGNORE_PREFIX tag can be used to specify a prefix (or a list of prefixes) +# that should be ignored while generating the index headers. The IGNORE_PREFIX +# tag works for classes, function and member names. The entity will be placed in +# the alphabetical list under the first letter of the entity name that remains +# after removing the prefix. +# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output +# The default value is: YES. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a +# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of +# it. +# The default directory is: html. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each +# generated HTML page (for example: .htm, .php, .asp). +# The default value is: .html. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a user-defined HTML header file for +# each generated HTML page. If the tag is left blank doxygen will generate a +# standard header. +# +# To get valid HTML the header file that includes any scripts and style sheets +# that doxygen needs, which is dependent on the configuration options used (e.g. +# the setting GENERATE_TREEVIEW). It is highly recommended to start with a +# default header using +# doxygen -w html new_header.html new_footer.html new_stylesheet.css +# YourConfigFile +# and then modify the file new_header.html. See also section "Doxygen usage" +# for information on how to generate the default header that doxygen normally +# uses. +# Note: The header is subject to change so you typically have to regenerate the +# default header when upgrading to a newer version of doxygen. For a description +# of the possible markers and block names see the documentation. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each +# generated HTML page. If the tag is left blank doxygen will generate a standard +# footer. See HTML_HEADER for more information on how to generate a default +# footer and what special commands can be used inside the footer. See also +# section "Doxygen usage" for information on how to generate the default footer +# that doxygen normally uses. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style +# sheet that is used by each HTML page. It can be used to fine-tune the look of +# the HTML output. If left blank doxygen will generate a default style sheet. +# See also section "Doxygen usage" for information on how to generate the style +# sheet that doxygen normally uses. +# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as +# it is more robust and this tag (HTML_STYLESHEET) will in the future become +# obsolete. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_STYLESHEET = + +# The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined +# cascading style sheets that are included after the standard style sheets +# created by doxygen. Using this option one can overrule certain style aspects. +# This is preferred over using HTML_STYLESHEET since it does not replace the +# standard style sheet and is therefore more robust against future updates. +# Doxygen will copy the style sheet files to the output directory. +# Note: The order of the extra style sheet files is of importance (e.g. the last +# style sheet in the list overrules the setting of the previous ones in the +# list). +# Note: Since the styling of scrollbars can currently not be overruled in +# Webkit/Chromium, the styling will be left out of the default doxygen.css if +# one or more extra stylesheets have been specified. So if scrollbar +# customization is desired it has to be added explicitly. For an example see the +# documentation. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_EXTRA_STYLESHEET = theme/doxygen-awesome.css \ + theme/doxygen-awesome-sidebar-only.css \ + theme/doxygen-awesome-sidebar-only-darkmode-toggle.css + +# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or +# other source files which should be copied to the HTML output directory. Note +# that these files will be copied to the base HTML output directory. Use the +# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these +# files. In the HTML_STYLESHEET file, use the file name only. Also note that the +# files will be copied as-is; there are no commands or markers available. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_EXTRA_FILES = theme/doxygen-awesome-darkmode-toggle.js \ + theme/doxygen-awesome-interactive-toc.js \ + theme/doxygen-awesome-fragment-copy-button.js \ + theme/doxygen-awesome-paragraph-link.js + +# The HTML_COLORSTYLE tag can be used to specify if the generated HTML output +# should be rendered with a dark or light theme. +# Possible values are: LIGHT always generate light mode output, DARK always +# generate dark mode output, AUTO_LIGHT automatically set the mode according to +# the user preference, use light mode if no preference is set (the default), +# AUTO_DARK automatically set the mode according to the user preference, use +# dark mode if no preference is set and TOGGLE allow to user to switch between +# light and dark mode via a button. +# The default value is: AUTO_LIGHT. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE = LIGHT + +# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen +# will adjust the colors in the style sheet and background images according to +# this color. Hue is specified as an angle on a color-wheel, see +# https://en.wikipedia.org/wiki/Hue for more information. For instance the value +# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 +# purple, and 360 is red again. +# Minimum value: 0, maximum value: 359, default value: 220. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_HUE = 209 + +# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors +# in the HTML output. For a value of 0 the output will use gray-scales only. A +# value of 255 will produce the most vivid colors. +# Minimum value: 0, maximum value: 255, default value: 100. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_SAT = 255 + +# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the +# luminance component of the colors in the HTML output. Values below 100 +# gradually make the output lighter, whereas values above 100 make the output +# darker. The value divided by 100 is the actual gamma applied, so 80 represents +# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not +# change the gamma. +# Minimum value: 40, maximum value: 240, default value: 80. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_GAMMA = 113 + +# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML +# page will contain the date and time when the page was generated. Setting this +# to YES can help to show when doxygen was last run and thus if the +# documentation is up to date. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_TIMESTAMP = NO + +# If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML +# documentation will contain a main index with vertical navigation menus that +# are dynamically created via JavaScript. If disabled, the navigation index will +# consists of multiple levels of tabs that are statically embedded in every HTML +# page. Disable this option to support browsers that do not have JavaScript, +# like the Qt help browser. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_DYNAMIC_MENUS = YES + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_DYNAMIC_SECTIONS = NO + +# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries +# shown in the various tree structured indices initially; the user can expand +# and collapse entries dynamically later on. Doxygen will expand the tree to +# such a level that at most the specified number of entries are visible (unless +# a fully collapsed tree already exceeds this amount). So setting the number of +# entries 1 will produce a full collapsed tree by default. 0 is a special value +# representing an infinite number of entries and will result in a full expanded +# tree by default. +# Minimum value: 0, maximum value: 9999, default value: 100. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_INDEX_NUM_ENTRIES = 100 + +# If the GENERATE_DOCSET tag is set to YES, additional index files will be +# generated that can be used as input for Apple's Xcode 3 integrated development +# environment (see: +# https://developer.apple.com/xcode/), introduced with OSX 10.5 (Leopard). To +# create a documentation set, doxygen will generate a Makefile in the HTML +# output directory. Running make will produce the docset in that directory and +# running make install will install the docset in +# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at +# startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy +# genXcode/_index.html for more information. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_DOCSET = NO + +# This tag determines the name of the docset feed. A documentation feed provides +# an umbrella under which multiple documentation sets from a single provider +# (such as a company or product suite) can be grouped. +# The default value is: Doxygen generated docs. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_FEEDNAME = "Doxygen generated docs" + +# This tag determines the URL of the docset feed. A documentation feed provides +# an umbrella under which multiple documentation sets from a single provider +# (such as a company or product suite) can be grouped. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_FEEDURL = + +# This tag specifies a string that should uniquely identify the documentation +# set bundle. This should be a reverse domain-name style string, e.g. +# com.mycompany.MyDocSet. Doxygen will append .docset to the name. +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_BUNDLE_ID = org.doxygen.Project + +# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify +# the documentation publisher. This should be a reverse domain-name style +# string, e.g. com.mycompany.MyDocSet.documentation. +# The default value is: org.doxygen.Publisher. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_PUBLISHER_ID = org.doxygen.Publisher + +# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. +# The default value is: Publisher. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_PUBLISHER_NAME = Publisher + +# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three +# additional HTML index files: index.hhp, index.hhc, and index.hhk. The +# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop +# on Windows. In the beginning of 2021 Microsoft took the original page, with +# a.o. the download links, offline the HTML help workshop was already many years +# in maintenance mode). You can download the HTML help workshop from the web +# archives at Installation executable (see: +# http://web.archive.org/web/20160201063255/http://download.microsoft.com/downlo +# ad/0/A/9/0A939EF6-E31C-430F-A3DF-DFAE7960D564/htmlhelp.exe). +# +# The HTML Help Workshop contains a compiler that can convert all HTML output +# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML +# files are now used as the Windows 98 help format, and will replace the old +# Windows help format (.hlp) on all Windows platforms in the future. Compressed +# HTML files also contain an index, a table of contents, and you can search for +# words in the documentation. The HTML workshop also contains a viewer for +# compressed HTML files. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_HTMLHELP = NO + +# The CHM_FILE tag can be used to specify the file name of the resulting .chm +# file. You can add a path in front of the file if the result should not be +# written to the html output directory. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +CHM_FILE = + +# The HHC_LOCATION tag can be used to specify the location (absolute path +# including file name) of the HTML help compiler (hhc.exe). If non-empty, +# doxygen will try to run the HTML help compiler on the generated index.hhp. +# The file has to be specified with full path. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +HHC_LOCATION = + +# The GENERATE_CHI flag controls if a separate .chi index file is generated +# (YES) or that it should be included in the main .chm file (NO). +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +GENERATE_CHI = NO + +# The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc) +# and project file content. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +CHM_INDEX_ENCODING = + +# The BINARY_TOC flag controls whether a binary table of contents is generated +# (YES) or a normal table of contents (NO) in the .chm file. Furthermore it +# enables the Previous and Next buttons. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members to +# the table of contents of the HTML help documentation and to the tree view. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +TOC_EXPAND = NO + +# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and +# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that +# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help +# (.qch) of the generated HTML documentation. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_QHP = NO + +# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify +# the file name of the resulting .qch file. The path specified is relative to +# the HTML output folder. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QCH_FILE = + +# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help +# Project output. For more information please see Qt Help Project / Namespace +# (see: +# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace). +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_NAMESPACE = org.doxygen.Project + +# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt +# Help Project output. For more information please see Qt Help Project / Virtual +# Folders (see: +# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-folders). +# The default value is: doc. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_VIRTUAL_FOLDER = doc + +# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom +# filter to add. For more information please see Qt Help Project / Custom +# Filters (see: +# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_CUST_FILTER_NAME = + +# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the +# custom filter to add. For more information please see Qt Help Project / Custom +# Filters (see: +# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_CUST_FILTER_ATTRS = + +# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this +# project's filter section matches. Qt Help Project / Filter Attributes (see: +# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#filter-attributes). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_SECT_FILTER_ATTRS = + +# The QHG_LOCATION tag can be used to specify the location (absolute path +# including file name) of Qt's qhelpgenerator. If non-empty doxygen will try to +# run qhelpgenerator on the generated .qhp file. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHG_LOCATION = + +# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be +# generated, together with the HTML files, they form an Eclipse help plugin. To +# install this plugin and make it available under the help contents menu in +# Eclipse, the contents of the directory containing the HTML and XML files needs +# to be copied into the plugins directory of eclipse. The name of the directory +# within the plugins directory should be the same as the ECLIPSE_DOC_ID value. +# After copying Eclipse needs to be restarted before the help appears. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_ECLIPSEHELP = NO + +# A unique identifier for the Eclipse help plugin. When installing the plugin +# the directory name containing the HTML and XML files should also have this +# name. Each documentation set should have its own identifier. +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. + +ECLIPSE_DOC_ID = org.doxygen.Project + +# If you want full control over the layout of the generated HTML pages it might +# be necessary to disable the index and replace it with your own. The +# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top +# of each HTML page. A value of NO enables the index and the value YES disables +# it. Since the tabs in the index contain the same information as the navigation +# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +DISABLE_INDEX = NO + +# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index +# structure should be generated to display hierarchical information. If the tag +# value is set to YES, a side panel will be generated containing a tree-like +# index structure (just like the one that is generated for HTML Help). For this +# to work a browser that supports JavaScript, DHTML, CSS and frames is required +# (i.e. any modern browser). Windows users are probably better off using the +# HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can +# further fine tune the look of the index (see "Fine-tuning the output"). As an +# example, the default style sheet generated by doxygen has an example that +# shows how to put an image at the root of the tree instead of the PROJECT_NAME. +# Since the tree basically has the same information as the tab index, you could +# consider setting DISABLE_INDEX to YES when enabling this option. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_TREEVIEW = YES + +# When both GENERATE_TREEVIEW and DISABLE_INDEX are set to YES, then the +# FULL_SIDEBAR option determines if the side bar is limited to only the treeview +# area (value NO) or if it should extend to the full height of the window (value +# YES). Setting this to YES gives a layout similar to +# https://docs.readthedocs.io with more room for contents, but less room for the +# project logo, title, and description. If either GENERATE_TREEVIEW or +# DISABLE_INDEX is set to NO, this option has no effect. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FULL_SIDEBAR = NO + +# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that +# doxygen will group on one line in the generated HTML documentation. +# +# Note that a value of 0 will completely suppress the enum values from appearing +# in the overview section. +# Minimum value: 0, maximum value: 20, default value: 4. +# This tag requires that the tag GENERATE_HTML is set to YES. + +ENUM_VALUES_PER_LINE = 4 + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used +# to set the initial width (in pixels) of the frame in which the tree is shown. +# Minimum value: 0, maximum value: 1500, default value: 250. +# This tag requires that the tag GENERATE_HTML is set to YES. + +TREEVIEW_WIDTH = 250 + +# If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to +# external symbols imported via tag files in a separate window. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +EXT_LINKS_IN_WINDOW = NO + +# If the OBFUSCATE_EMAILS tag is set to YES, doxygen will obfuscate email +# addresses. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +OBFUSCATE_EMAILS = YES + +# If the HTML_FORMULA_FORMAT option is set to svg, doxygen will use the pdf2svg +# tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see +# https://inkscape.org) to generate formulas as SVG images instead of PNGs for +# the HTML output. These images will generally look nicer at scaled resolutions. +# Possible values are: png (the default) and svg (looks nicer but requires the +# pdf2svg or inkscape tool). +# The default value is: png. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FORMULA_FORMAT = png + +# Use this tag to change the font size of LaTeX formulas included as images in +# the HTML documentation. When you change the font size after a successful +# doxygen run you need to manually remove any form_*.png images from the HTML +# output directory to force them to be regenerated. +# Minimum value: 8, maximum value: 50, default value: 10. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FORMULA_FONTSIZE = 10 + +# The FORMULA_MACROFILE can contain LaTeX \newcommand and \renewcommand commands +# to create new LaTeX commands to be used in formulas as building blocks. See +# the section "Including formulas" for details. + +FORMULA_MACROFILE = + +# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see +# https://www.mathjax.org) which uses client side JavaScript for the rendering +# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX +# installed or if you want to formulas look prettier in the HTML output. When +# enabled you may also need to install MathJax separately and configure the path +# to it using the MATHJAX_RELPATH option. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +USE_MATHJAX = NO + +# With MATHJAX_VERSION it is possible to specify the MathJax version to be used. +# Note that the different versions of MathJax have different requirements with +# regards to the different settings, so it is possible that also other MathJax +# settings have to be changed when switching between the different MathJax +# versions. +# Possible values are: MathJax_2 and MathJax_3. +# The default value is: MathJax_2. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_VERSION = MathJax_2 + +# When MathJax is enabled you can set the default output format to be used for +# the MathJax output. For more details about the output format see MathJax +# version 2 (see: +# http://docs.mathjax.org/en/v2.7-latest/output.html) and MathJax version 3 +# (see: +# http://docs.mathjax.org/en/latest/web/components/output.html). +# Possible values are: HTML-CSS (which is slower, but has the best +# compatibility. This is the name for Mathjax version 2, for MathJax version 3 +# this will be translated into chtml), NativeMML (i.e. MathML. Only supported +# for NathJax 2. For MathJax version 3 chtml will be used instead.), chtml (This +# is the name for Mathjax version 3, for MathJax version 2 this will be +# translated into HTML-CSS) and SVG. +# The default value is: HTML-CSS. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_FORMAT = HTML-CSS + +# When MathJax is enabled you need to specify the location relative to the HTML +# output directory using the MATHJAX_RELPATH option. The destination directory +# should contain the MathJax.js script. For instance, if the mathjax directory +# is located at the same level as the HTML output directory, then +# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax +# Content Delivery Network so you can quickly see the result without installing +# MathJax. However, it is strongly recommended to install a local copy of +# MathJax from https://www.mathjax.org before deployment. The default value is: +# - in case of MathJax version 2: https://cdn.jsdelivr.net/npm/mathjax@2 +# - in case of MathJax version 3: https://cdn.jsdelivr.net/npm/mathjax@3 +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_RELPATH = + +# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax +# extension names that should be enabled during MathJax rendering. For example +# for MathJax version 2 (see +# https://docs.mathjax.org/en/v2.7-latest/tex.html#tex-and-latex-extensions): +# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols +# For example for MathJax version 3 (see +# http://docs.mathjax.org/en/latest/input/tex/extensions/index.html): +# MATHJAX_EXTENSIONS = ams +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_EXTENSIONS = + +# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces +# of code that will be used on startup of the MathJax code. See the MathJax site +# (see: +# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details. For an +# example see the documentation. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_CODEFILE = + +# When the SEARCHENGINE tag is enabled doxygen will generate a search box for +# the HTML output. The underlying search engine uses javascript and DHTML and +# should work on any modern browser. Note that when using HTML help +# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) +# there is already a search function so this one should typically be disabled. +# For large projects the javascript based search engine can be slow, then +# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to +# search using the keyboard; to jump to the search box use + S +# (what the is depends on the OS and browser, but it is typically +# , /