From f00a87c22f0207ace3e0ca1d1286d256e2aa2aa1 Mon Sep 17 00:00:00 2001 From: "Shane Michael Mathews (Brandwatch Account)" <21181346+smmathews-bw-boston@users.noreply.github.com> Date: Tue, 10 Oct 2023 08:19:24 -0400 Subject: [PATCH] upgrade CRoaring to v2.0.2 (#1) * upgrade to v2.0.2 * upgrade c version to c17 (fixes 'Unknown atomic implementation') * X_CONTAINER_TYPE_CODE was replaced with X_CONTAINER_TYPE * use the global memory hook instead of redefining the memory allocation interface * fix mismatched type signatures --- Makefile | 2 +- Makefile_native | 2 +- roaring.c | 30564 +++++++++++++++++++++++++------------- roaring.h | 7935 ++-------- roaring_buffer_reader.c | 6 +- roaringbitmap.c | 52 + roaringbitmap.h | 50 - 7 files changed, 21470 insertions(+), 17141 deletions(-) diff --git a/Makefile b/Makefile index 9a58899..ea92f73 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ REGRESS = $(patsubst sql/%.sql,%,$(TESTS)) MODULE_big = roaringbitmap OBJS = roaringbitmap.o -roaringbitmap.o: override CFLAGS += -std=c99 -Wno-error=maybe-uninitialized \ +roaringbitmap.o: override CFLAGS += -std=c17 -Wno-error=maybe-uninitialized \ -Wno-declaration-after-statement -Wno-missing-prototypes PG_CONFIG = pg_config diff --git a/Makefile_native b/Makefile_native index d808360..97bebef 100644 --- a/Makefile_native +++ b/Makefile_native @@ -5,7 +5,7 @@ REGRESS = $(patsubst sql/%.sql,%,$(TESTS)) MODULE_big = roaringbitmap OBJS = roaringbitmap.o -roaringbitmap.o: override CFLAGS += -march=native -std=c99 -Wno-error=maybe-uninitialized \ +roaringbitmap.o: override CFLAGS += -march=native -std=c17 -Wno-error=maybe-uninitialized \ -Wno-declaration-after-statement -Wno-missing-prototypes PG_CONFIG = pg_config diff --git a/roaring.c b/roaring.c index 02c3e4c..d93d861 100644 --- a/roaring.c +++ b/roaring.c @@ -1,4 +1,59 @@ -/* auto-generated on Sat Jun 27 12:40:38 2020. Do not edit! */ +// !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!! +// Created by amalgamation.sh on 2023-09-27T16:30:23Z + +/* + * The CRoaring project is under a dual license (Apache/MIT). + * Users of the library may choose one or the other license. + */ +/* + * Copyright 2016-2022 The CRoaring authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ +/* + * MIT License + * + * Copyright 2016-2022 The CRoaring authors + * + * Permission is hereby granted, free of charge, to any + * person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the + * Software without restriction, including without + * limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software + * is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice + * shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF + * ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED + * TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT + * SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR + * IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * SPDX-License-Identifier: MIT + */ + #include "roaring.h" /* used for http://dmalloc.com/ Dmalloc - Debug Malloc Library */ @@ -6,3065 +61,1512 @@ #include "dmalloc.h" #endif -/* begin file src/array_util.c */ -#include -#include -#include -#include -#include -#include - -extern inline int32_t binarySearch(const uint16_t *array, int32_t lenarray, - uint16_t ikey); - -#ifdef USESSE4 -// used by intersect_vector16 -ALIGNED(0x1000) -static const uint8_t shuffle_mask16[] = { - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 4, 5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 4, 5, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, - 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 4, 5, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 8, 9, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 4, 5, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 8, 9, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 4, 5, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 8, 9, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, - 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 8, 9, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, - 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 4, 5, 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 10, 11, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 4, 5, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 10, 11, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, - 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 4, 5, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, - 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 10, 11, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, - 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 4, 5, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 10, 11, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 4, 5, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, 10, 11, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, - 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, 10, 11, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, - 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 4, 5, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 8, 9, - 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9, - 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 8, 9, 10, 11, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 4, 5, 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 8, 9, - 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, - 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, - 0xFF, 0xFF, 0xFF, 0xFF, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 12, 13, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 12, 13, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 12, 13, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 4, 5, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 12, 13, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 4, 5, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 12, 13, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, - 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 12, 13, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, - 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 4, 5, 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7, - 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 12, 13, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, 12, 13, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 4, 5, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 8, 9, 12, 13, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, - 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 4, 5, 8, 9, 12, 13, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, - 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 8, 9, - 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, - 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 4, 5, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 8, 9, - 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 4, 5, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, - 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 10, 11, 12, 13, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 10, 11, - 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 10, 11, 12, 13, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, - 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 4, 5, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 10, 11, - 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 10, 11, - 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 10, 11, 12, 13, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 4, 5, 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 10, 11, - 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, - 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, - 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, - 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 8, 9, 10, 11, - 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, - 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 4, 5, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 8, 9, 10, 11, - 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 4, 5, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, - 6, 7, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 8, 9, 10, 11, - 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, - 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 6, 7, 8, 9, 10, 11, 12, 13, - 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 8, 9, 10, 11, - 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, - 6, 7, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 14, 15, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 4, 5, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 14, 15, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 4, 5, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 4, 5, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 4, 5, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, 14, 15, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, 14, 15, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, - 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 4, 5, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 8, 9, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 8, 9, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 4, 5, 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 8, 9, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, - 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 10, 11, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 10, 11, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 10, 11, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 4, 5, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 10, 11, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 4, 5, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 6, 7, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 10, 11, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, - 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 6, 7, 10, 11, 14, 15, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 10, 11, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, - 6, 7, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 4, 5, 6, 7, 10, 11, 14, 15, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7, - 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 10, 11, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, 10, 11, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 4, 5, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 8, 9, 10, 11, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, - 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 4, 5, 8, 9, 10, 11, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9, 10, 11, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, - 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 6, 7, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 8, 9, - 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, - 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 4, 5, 6, 7, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, - 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 12, 13, 14, 15, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 12, 13, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 12, 13, 14, 15, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, - 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 4, 5, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 12, 13, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 12, 13, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 12, 13, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 4, 5, 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 12, 13, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, - 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, - 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 8, 9, 12, 13, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, - 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 4, 5, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 8, 9, 12, 13, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 4, 5, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 6, 7, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 8, 9, 12, 13, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, - 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 6, 7, 8, 9, 12, 13, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 8, 9, 12, 13, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, - 6, 7, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 4, 5, 6, 7, 8, 9, 12, 13, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 10, 11, 12, 13, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 10, 11, 12, 13, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 4, 5, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 10, 11, 12, 13, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, - 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 4, 5, 10, 11, 12, 13, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 10, 11, 12, 13, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, - 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 6, 7, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 10, 11, - 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, - 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 10, 11, - 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, - 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, 10, 11, 12, 13, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, - 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 2, 3, 8, 9, 10, 11, 12, 13, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, 10, 11, 12, 13, - 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, - 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, - 2, 3, 4, 5, 8, 9, 10, 11, 12, 13, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 8, 9, - 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0, 1, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 8, 9, 10, 11, - 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, - 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, - 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 2, 3, 4, 5, - 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, - 12, 13, 14, 15}; - -/** - * From Schlegel et al., Fast Sorted-Set Intersection using SIMD Instructions - * Optimized by D. Lemire on May 3rd 2013 - */ -int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a, - const uint16_t *__restrict__ B, size_t s_b, - uint16_t *C) { - size_t count = 0; - size_t i_a = 0, i_b = 0; - const int vectorlength = sizeof(__m128i) / sizeof(uint16_t); - const size_t st_a = (s_a / vectorlength) * vectorlength; - const size_t st_b = (s_b / vectorlength) * vectorlength; - __m128i v_a, v_b; - if ((i_a < st_a) && (i_b < st_b)) { - v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); - v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); - while ((A[i_a] == 0) || (B[i_b] == 0)) { - const __m128i res_v = _mm_cmpestrm( - v_b, vectorlength, v_a, vectorlength, - _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); - const int r = _mm_extract_epi32(res_v, 0); - __m128i sm16 = _mm_load_si128((const __m128i *)shuffle_mask16 + r); - __m128i p = _mm_shuffle_epi8(v_a, sm16); - _mm_storeu_si128((__m128i *)&C[count], p); // can overflow - count += _mm_popcnt_u32(r); - const uint16_t a_max = A[i_a + vectorlength - 1]; - const uint16_t b_max = B[i_b + vectorlength - 1]; - if (a_max <= b_max) { - i_a += vectorlength; - if (i_a == st_a) break; - v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); - } - if (b_max <= a_max) { - i_b += vectorlength; - if (i_b == st_b) break; - v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); - } - } - if ((i_a < st_a) && (i_b < st_b)) - while (true) { - const __m128i res_v = _mm_cmpistrm( - v_b, v_a, - _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); - const int r = _mm_extract_epi32(res_v, 0); - __m128i sm16 = - _mm_load_si128((const __m128i *)shuffle_mask16 + r); - __m128i p = _mm_shuffle_epi8(v_a, sm16); - _mm_storeu_si128((__m128i *)&C[count], p); // can overflow - count += _mm_popcnt_u32(r); - const uint16_t a_max = A[i_a + vectorlength - 1]; - const uint16_t b_max = B[i_b + vectorlength - 1]; - if (a_max <= b_max) { - i_a += vectorlength; - if (i_a == st_a) break; - v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); - } - if (b_max <= a_max) { - i_b += vectorlength; - if (i_b == st_b) break; - v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); - } - } - } - // intersect the tail using scalar intersection - while (i_a < s_a && i_b < s_b) { - uint16_t a = A[i_a]; - uint16_t b = B[i_b]; - if (a < b) { - i_a++; - } else if (b < a) { - i_b++; - } else { - C[count] = a; //==b; - count++; - i_a++; - i_b++; - } - } - return (int32_t)count; -} - -int32_t intersect_vector16_cardinality(const uint16_t *__restrict__ A, - size_t s_a, - const uint16_t *__restrict__ B, - size_t s_b) { - size_t count = 0; - size_t i_a = 0, i_b = 0; - const int vectorlength = sizeof(__m128i) / sizeof(uint16_t); - const size_t st_a = (s_a / vectorlength) * vectorlength; - const size_t st_b = (s_b / vectorlength) * vectorlength; - __m128i v_a, v_b; - if ((i_a < st_a) && (i_b < st_b)) { - v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); - v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); - while ((A[i_a] == 0) || (B[i_b] == 0)) { - const __m128i res_v = _mm_cmpestrm( - v_b, vectorlength, v_a, vectorlength, - _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); - const int r = _mm_extract_epi32(res_v, 0); - count += _mm_popcnt_u32(r); - const uint16_t a_max = A[i_a + vectorlength - 1]; - const uint16_t b_max = B[i_b + vectorlength - 1]; - if (a_max <= b_max) { - i_a += vectorlength; - if (i_a == st_a) break; - v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); - } - if (b_max <= a_max) { - i_b += vectorlength; - if (i_b == st_b) break; - v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); - } - } - if ((i_a < st_a) && (i_b < st_b)) - while (true) { - const __m128i res_v = _mm_cmpistrm( - v_b, v_a, - _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); - const int r = _mm_extract_epi32(res_v, 0); - count += _mm_popcnt_u32(r); - const uint16_t a_max = A[i_a + vectorlength - 1]; - const uint16_t b_max = B[i_b + vectorlength - 1]; - if (a_max <= b_max) { - i_a += vectorlength; - if (i_a == st_a) break; - v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); - } - if (b_max <= a_max) { - i_b += vectorlength; - if (i_b == st_b) break; - v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); - } - } - } - // intersect the tail using scalar intersection - while (i_a < s_a && i_b < s_b) { - uint16_t a = A[i_a]; - uint16_t b = B[i_b]; - if (a < b) { - i_a++; - } else if (b < a) { - i_b++; - } else { - count++; - i_a++; - i_b++; - } - } - return (int32_t)count; -} - -///////// -// Warning: -// This function may not be safe if A == C or B == C. -///////// -int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a, - const uint16_t *__restrict__ B, size_t s_b, - uint16_t *C) { - // we handle the degenerate case - if (s_a == 0) return 0; - if (s_b == 0) { - if (A != C) memcpy(C, A, sizeof(uint16_t) * s_a); - return (int32_t)s_a; - } - // handle the leading zeroes, it is messy but it allows us to use the fast - // _mm_cmpistrm instrinsic safely - int32_t count = 0; - if ((A[0] == 0) || (B[0] == 0)) { - if ((A[0] == 0) && (B[0] == 0)) { - A++; - s_a--; - B++; - s_b--; - } else if (A[0] == 0) { - C[count++] = 0; - A++; - s_a--; - } else { - B++; - s_b--; - } - } - // at this point, we have two non-empty arrays, made of non-zero - // increasing values. - size_t i_a = 0, i_b = 0; - const size_t vectorlength = sizeof(__m128i) / sizeof(uint16_t); - const size_t st_a = (s_a / vectorlength) * vectorlength; - const size_t st_b = (s_b / vectorlength) * vectorlength; - if ((i_a < st_a) && (i_b < st_b)) { // this is the vectorized code path - __m128i v_a, v_b; //, v_bmax; - // we load a vector from A and a vector from B - v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); - v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); - // we have a runningmask which indicates which values from A have been - // spotted in B, these don't get written out. - __m128i runningmask_a_found_in_b = _mm_setzero_si128(); - /**** - * start of the main vectorized loop - *****/ - while (true) { - // afoundinb will contain a mask indicate for each entry in A - // whether it is seen - // in B - const __m128i a_found_in_b = - _mm_cmpistrm(v_b, v_a, _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | - _SIDD_BIT_MASK); - runningmask_a_found_in_b = - _mm_or_si128(runningmask_a_found_in_b, a_found_in_b); - // we always compare the last values of A and B - const uint16_t a_max = A[i_a + vectorlength - 1]; - const uint16_t b_max = B[i_b + vectorlength - 1]; - if (a_max <= b_max) { - // Ok. In this code path, we are ready to write our v_a - // because there is no need to read more from B, they will - // all be large values. - const int bitmask_belongs_to_difference = - _mm_extract_epi32(runningmask_a_found_in_b, 0) ^ 0xFF; - /*** next few lines are probably expensive *****/ - __m128i sm16 = _mm_load_si128((const __m128i *)shuffle_mask16 + - bitmask_belongs_to_difference); - __m128i p = _mm_shuffle_epi8(v_a, sm16); - _mm_storeu_si128((__m128i *)&C[count], p); // can overflow - count += _mm_popcnt_u32(bitmask_belongs_to_difference); - // we advance a - i_a += vectorlength; - if (i_a == st_a) // no more - break; - runningmask_a_found_in_b = _mm_setzero_si128(); - v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); - } - if (b_max <= a_max) { - // in this code path, the current v_b has become useless - i_b += vectorlength; - if (i_b == st_b) break; - v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); - } - } - // at this point, either we have i_a == st_a, which is the end of the - // vectorized processing, - // or we have i_b == st_b, and we are not done processing the vector... - // so we need to finish it off. - if (i_a < st_a) { // we have unfinished business... - uint16_t buffer[8]; // buffer to do a masked load - memset(buffer, 0, 8 * sizeof(uint16_t)); - memcpy(buffer, B + i_b, (s_b - i_b) * sizeof(uint16_t)); - v_b = _mm_lddqu_si128((__m128i *)buffer); - const __m128i a_found_in_b = - _mm_cmpistrm(v_b, v_a, _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | - _SIDD_BIT_MASK); - runningmask_a_found_in_b = - _mm_or_si128(runningmask_a_found_in_b, a_found_in_b); - const int bitmask_belongs_to_difference = - _mm_extract_epi32(runningmask_a_found_in_b, 0) ^ 0xFF; - __m128i sm16 = _mm_load_si128((const __m128i *)shuffle_mask16 + - bitmask_belongs_to_difference); - __m128i p = _mm_shuffle_epi8(v_a, sm16); - _mm_storeu_si128((__m128i *)&C[count], p); // can overflow - count += _mm_popcnt_u32(bitmask_belongs_to_difference); - i_a += vectorlength; - } - // at this point we should have i_a == st_a and i_b == st_b - } - // do the tail using scalar code - while (i_a < s_a && i_b < s_b) { - uint16_t a = A[i_a]; - uint16_t b = B[i_b]; - if (b < a) { - i_b++; - } else if (a < b) { - C[count] = a; - count++; - i_a++; - } else { //== - i_a++; - i_b++; - } - } - if (i_a < s_a) { - if(C == A) { - assert((size_t)count <= i_a); - if((size_t)count < i_a) { - memmove(C + count, A + i_a, sizeof(uint16_t) * (s_a - i_a)); - } - } else { - for(size_t i = 0; i < (s_a - i_a); i++) { - C[count + i] = A[i + i_a]; - } - } - count += (int32_t)(s_a - i_a); - } - return count; -} - -#endif // USESSE4 +#include "roaring.h" /* include public API definitions */ +/* begin file include/roaring/isadetection.h */ +#ifndef ROARING_ISADETECTION_H +#define ROARING_ISADETECTION_H +#if defined(__x86_64__) || defined(_M_AMD64) // x64 -#ifdef USE_OLD_SKEW_INTERSECT -// TODO: given enough experience with the new skew intersect, drop the old one from the code base. +#ifndef CROARING_COMPILER_SUPPORTS_AVX512 +#ifdef __has_include +// We want to make sure that the AVX-512 functions are only built on compilers +// fully supporting AVX-512. +#if __has_include() +#define CROARING_COMPILER_SUPPORTS_AVX512 1 +#endif // #if __has_include() +#endif // #ifdef __has_include -/* Computes the intersection between one small and one large set of uint16_t. - * Stores the result into buffer and return the number of elements. */ -int32_t intersect_skewed_uint16(const uint16_t *small, size_t size_s, - const uint16_t *large, size_t size_l, - uint16_t *buffer) { - size_t pos = 0, idx_l = 0, idx_s = 0; +// Visual Studio 2019 and up support AVX-512 +#ifdef _MSC_VER +#if _MSC_VER >= 1920 +#define CROARING_COMPILER_SUPPORTS_AVX512 1 +#endif // #if _MSC_VER >= 1920 +#endif // #ifdef _MSC_VER - if (0 == size_s) { - return 0; - } +#ifndef CROARING_COMPILER_SUPPORTS_AVX512 +#define CROARING_COMPILER_SUPPORTS_AVX512 0 +#endif // #ifndef CROARING_COMPILER_SUPPORTS_AVX512 +#endif // #ifndef CROARING_COMPILER_SUPPORTS_AVX512 - uint16_t val_l = large[idx_l], val_s = small[idx_s]; - - while (true) { - if (val_l < val_s) { - idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s); - if (idx_l == size_l) break; - val_l = large[idx_l]; - } else if (val_s < val_l) { - idx_s++; - if (idx_s == size_s) break; - val_s = small[idx_s]; - } else { - buffer[pos++] = val_s; - idx_s++; - if (idx_s == size_s) break; - val_s = small[idx_s]; - idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s); - if (idx_l == size_l) break; - val_l = large[idx_l]; - } - } - return (int32_t)pos; -} -#else // USE_OLD_SKEW_INTERSECT +#ifdef __cplusplus +extern "C" { namespace roaring { namespace internal { +#endif +enum { +ROARING_SUPPORTS_AVX2 = 1, +ROARING_SUPPORTS_AVX512 = 2, +}; +int croaring_hardware_support(void); +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace internal { +#endif +#endif // x64 +#endif // ROARING_ISADETECTION_H +/* end file include/roaring/isadetection.h */ +/* begin file include/roaring/containers/perfparameters.h */ +#ifndef PERFPARAMETERS_H_ +#define PERFPARAMETERS_H_ +#include -/** -* Branchless binary search going after 4 values at once. -* Assumes that array is sorted. -* You have that array[*index1] >= target1, array[*index12] >= target2, ... -* except when *index1 = n, in which case you know that all values in array are -* smaller than target1, and so forth. -* It has logarithmic complexity. -*/ -static void binarySearch4(const uint16_t *array, int32_t n, uint16_t target1, - uint16_t target2, uint16_t target3, uint16_t target4, - int32_t *index1, int32_t *index2, int32_t *index3, - int32_t *index4) { - const uint16_t *base1 = array; - const uint16_t *base2 = array; - const uint16_t *base3 = array; - const uint16_t *base4 = array; - if (n == 0) - return; - while (n > 1) { - int32_t half = n >> 1; - base1 = (base1[half] < target1) ? &base1[half] : base1; - base2 = (base2[half] < target2) ? &base2[half] : base2; - base3 = (base3[half] < target3) ? &base3[half] : base3; - base4 = (base4[half] < target4) ? &base4[half] : base4; - n -= half; - } - *index1 = (int32_t)((*base1 < target1) + base1 - array); - *index2 = (int32_t)((*base2 < target2) + base2 - array); - *index3 = (int32_t)((*base3 < target3) + base3 - array); - *index4 = (int32_t)((*base4 < target4) + base4 - array); -} +#ifdef __cplusplus +extern "C" { namespace roaring { namespace internal { +#endif /** -* Branchless binary search going after 2 values at once. -* Assumes that array is sorted. -* You have that array[*index1] >= target1, array[*index12] >= target2. -* except when *index1 = n, in which case you know that all values in array are -* smaller than target1, and so forth. -* It has logarithmic complexity. +During lazy computations, we can transform array containers into bitset +containers as +long as we can expect them to have ARRAY_LAZY_LOWERBOUND values. */ -static void binarySearch2(const uint16_t *array, int32_t n, uint16_t target1, - uint16_t target2, int32_t *index1, int32_t *index2) { - const uint16_t *base1 = array; - const uint16_t *base2 = array; - if (n == 0) - return; - while (n > 1) { - int32_t half = n >> 1; - base1 = (base1[half] < target1) ? &base1[half] : base1; - base2 = (base2[half] < target2) ? &base2[half] : base2; - n -= half; - } - *index1 = (int32_t)((*base1 < target1) + base1 - array); - *index2 = (int32_t)((*base2 < target2) + base2 - array); -} - -/* Computes the intersection between one small and one large set of uint16_t. - * Stores the result into buffer and return the number of elements. - * Processes the small set in blocks of 4 values calling binarySearch4 - * and binarySearch2. This approach can be slightly superior to a conventional - * galloping search in some instances. - */ -int32_t intersect_skewed_uint16(const uint16_t *small, size_t size_s, - const uint16_t *large, size_t size_l, - uint16_t *buffer) { - size_t pos = 0, idx_l = 0, idx_s = 0; +enum { ARRAY_LAZY_LOWERBOUND = 1024 }; - if (0 == size_s) { - return 0; - } - int32_t index1 = 0, index2 = 0, index3 = 0, index4 = 0; - while ((idx_s + 4 <= size_s) && (idx_l < size_l)) { - uint16_t target1 = small[idx_s]; - uint16_t target2 = small[idx_s + 1]; - uint16_t target3 = small[idx_s + 2]; - uint16_t target4 = small[idx_s + 3]; - binarySearch4(large + idx_l, (int32_t)(size_l - idx_l), target1, target2, target3, - target4, &index1, &index2, &index3, &index4); - if ((index1 + idx_l < size_l) && (large[idx_l + index1] == target1)) { - buffer[pos++] = target1; - } - if ((index2 + idx_l < size_l) && (large[idx_l + index2] == target2)) { - buffer[pos++] = target2; - } - if ((index3 + idx_l < size_l) && (large[idx_l + index3] == target3)) { - buffer[pos++] = target3; - } - if ((index4 + idx_l < size_l) && (large[idx_l + index4] == target4)) { - buffer[pos++] = target4; - } - idx_s += 4; - idx_l += index4; - } - if ((idx_s + 2 <= size_s) && (idx_l < size_l)) { - uint16_t target1 = small[idx_s]; - uint16_t target2 = small[idx_s + 1]; - binarySearch2(large + idx_l, (int32_t)(size_l - idx_l), target1, target2, &index1, - &index2); - if ((index1 + idx_l < size_l) && (large[idx_l + index1] == target1)) { - buffer[pos++] = target1; - } - if ((index2 + idx_l < size_l) && (large[idx_l + index2] == target2)) { - buffer[pos++] = target2; - } - idx_s += 2; - idx_l += index2; - } - if ((idx_s < size_s) && (idx_l < size_l)) { - uint16_t val_s = small[idx_s]; - int32_t index = binarySearch(large + idx_l, (int32_t)(size_l - idx_l), val_s); - if (index >= 0) - buffer[pos++] = val_s; - } - return (int32_t)pos; -} +/* default initial size of a run container + setting it to zero delays the malloc.*/ +enum { RUN_DEFAULT_INIT_SIZE = 0 }; +/* default initial size of an array container + setting it to zero delays the malloc */ +enum { ARRAY_DEFAULT_INIT_SIZE = 0 }; -#endif //USE_OLD_SKEW_INTERSECT +/* automatic bitset conversion during lazy or */ +#ifndef LAZY_OR_BITSET_CONVERSION +#define LAZY_OR_BITSET_CONVERSION true +#endif +/* automatically attempt to convert a bitset to a full run during lazy + * evaluation */ +#ifndef LAZY_OR_BITSET_CONVERSION_TO_FULL +#define LAZY_OR_BITSET_CONVERSION_TO_FULL true +#endif -// TODO: this could be accelerated, possibly, by using binarySearch4 as above. -int32_t intersect_skewed_uint16_cardinality(const uint16_t *small, - size_t size_s, - const uint16_t *large, - size_t size_l) { - size_t pos = 0, idx_l = 0, idx_s = 0; +/* automatically attempt to convert a bitset to a full run */ +#ifndef OR_BITSET_CONVERSION_TO_FULL +#define OR_BITSET_CONVERSION_TO_FULL true +#endif - if (0 == size_s) { - return 0; - } +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace internal { +#endif - uint16_t val_l = large[idx_l], val_s = small[idx_s]; - - while (true) { - if (val_l < val_s) { - idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s); - if (idx_l == size_l) break; - val_l = large[idx_l]; - } else if (val_s < val_l) { - idx_s++; - if (idx_s == size_s) break; - val_s = small[idx_s]; - } else { - pos++; - idx_s++; - if (idx_s == size_s) break; - val_s = small[idx_s]; - idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s); - if (idx_l == size_l) break; - val_l = large[idx_l]; - } - } +#endif +/* end file include/roaring/containers/perfparameters.h */ +/* begin file include/roaring/containers/container_defs.h */ +/* + * container_defs.h + * + * Unlike containers.h (which is a file aggregating all the container includes, + * like array.h, bitset.h, and run.h) this is a file included BY those headers + * to do things like define the container base class `container_t`. + */ - return (int32_t)pos; -} +#ifndef INCLUDE_CONTAINERS_CONTAINER_DEFS_H_ +#define INCLUDE_CONTAINERS_CONTAINER_DEFS_H_ -bool intersect_skewed_uint16_nonempty(const uint16_t *small, size_t size_s, - const uint16_t *large, size_t size_l) { - size_t idx_l = 0, idx_s = 0; +#ifdef __cplusplus +#include // used by casting helper for compile-time check +#endif - if (0 == size_s) { - return false; - } +// The preferences are a separate file to separate out tweakable parameters - uint16_t val_l = large[idx_l], val_s = small[idx_s]; - - while (true) { - if (val_l < val_s) { - idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s); - if (idx_l == size_l) break; - val_l = large[idx_l]; - } else if (val_s < val_l) { - idx_s++; - if (idx_s == size_s) break; - val_s = small[idx_s]; - } else { - return true; - } - } +#ifdef __cplusplus +namespace roaring { namespace internal { // No extern "C" (contains template) +#endif - return false; -} -/** - * Generic intersection function. +/* + * Since roaring_array_t's definition is not opaque, the container type is + * part of the API. If it's not going to be `void*` then it needs a name, and + * expectations are to prefix C library-exported names with `roaring_` etc. + * + * Rather than force the whole codebase to use the name `roaring_container_t`, + * the few API appearances use the macro ROARING_CONTAINER_T. Those includes + * are prior to containers.h, so make a short private alias of `container_t`. + * Then undefine the awkward macro so it's not used any more than it has to be. */ -int32_t intersect_uint16(const uint16_t *A, const size_t lenA, - const uint16_t *B, const size_t lenB, uint16_t *out) { - const uint16_t *initout = out; - if (lenA == 0 || lenB == 0) return 0; - const uint16_t *endA = A + lenA; - const uint16_t *endB = B + lenB; - - while (1) { - while (*A < *B) { - SKIP_FIRST_COMPARE: - if (++A == endA) return (int32_t)(out - initout); - } - while (*A > *B) { - if (++B == endB) return (int32_t)(out - initout); - } - if (*A == *B) { - *out++ = *A; - if (++A == endA || ++B == endB) return (int32_t)(out - initout); - } else { - goto SKIP_FIRST_COMPARE; - } - } - return (int32_t)(out - initout); // NOTREACHED -} - -int32_t intersect_uint16_cardinality(const uint16_t *A, const size_t lenA, - const uint16_t *B, const size_t lenB) { - int32_t answer = 0; - if (lenA == 0 || lenB == 0) return 0; - const uint16_t *endA = A + lenA; - const uint16_t *endB = B + lenB; - - while (1) { - while (*A < *B) { - SKIP_FIRST_COMPARE: - if (++A == endA) return answer; - } - while (*A > *B) { - if (++B == endB) return answer; - } - if (*A == *B) { - ++answer; - if (++A == endA || ++B == endB) return answer; - } else { - goto SKIP_FIRST_COMPARE; - } - } - return answer; // NOTREACHED -} - +typedef ROARING_CONTAINER_T container_t; +#undef ROARING_CONTAINER_T -bool intersect_uint16_nonempty(const uint16_t *A, const size_t lenA, - const uint16_t *B, const size_t lenB) { - if (lenA == 0 || lenB == 0) return 0; - const uint16_t *endA = A + lenA; - const uint16_t *endB = B + lenB; - - while (1) { - while (*A < *B) { - SKIP_FIRST_COMPARE: - if (++A == endA) return false; - } - while (*A > *B) { - if (++B == endB) return false; - } - if (*A == *B) { - return true; - } else { - goto SKIP_FIRST_COMPARE; - } - } - return false; // NOTREACHED -} +/* + * See ROARING_CONTAINER_T for notes on using container_t as a base class. + * This macro helps make the following pattern look nicer: + * + * #ifdef __cplusplus + * struct roaring_array_s : public container_t { + * #else + * struct roaring_array_s { + * #endif + * int32_t cardinality; + * int32_t capacity; + * uint16_t *array; + * } + */ +#if defined(__cplusplus) +#define STRUCT_CONTAINER(name) \ + struct name : public container_t /* { ... } */ +#else +#define STRUCT_CONTAINER(name) \ + struct name /* { ... } */ +#endif /** - * Generic intersection function. + * Since container_t* is not void* in C++, "dangerous" casts are not needed to + * downcast; only a static_cast<> is needed. Define a macro for static casting + * which helps make casts more visible, and catches problems at compile-time + * when building the C sources in C++ mode: + * + * void some_func(container_t **c, ...) { // double pointer, not single + * array_container_t *ac1 = (array_container_t *)(c); // uncaught!! + * + * array_container_t *ac2 = CAST(array_container_t *, c) // C++ errors + * array_container_t *ac3 = CAST_array(c); // shorthand for #2, errors + * } + * + * Trickier to do is a cast from `container**` to `array_container_t**`. This + * needs a reinterpret_cast<>, which sacrifices safety...so a template is used + * leveraging to make sure it's legal in the C++ build. */ -size_t intersection_uint32(const uint32_t *A, const size_t lenA, - const uint32_t *B, const size_t lenB, - uint32_t *out) { - const uint32_t *initout = out; - if (lenA == 0 || lenB == 0) return 0; - const uint32_t *endA = A + lenA; - const uint32_t *endB = B + lenB; - - while (1) { - while (*A < *B) { - SKIP_FIRST_COMPARE: - if (++A == endA) return (out - initout); - } - while (*A > *B) { - if (++B == endB) return (out - initout); - } - if (*A == *B) { - *out++ = *A; - if (++A == endA || ++B == endB) return (out - initout); - } else { - goto SKIP_FIRST_COMPARE; - } - } - return (out - initout); // NOTREACHED +#ifdef __cplusplus +#define CAST(type,value) static_cast(value) +#define movable_CAST(type,value) movable_CAST_HELPER(value) + +template +PPDerived movable_CAST_HELPER(Base **ptr_to_ptr) { +typedef typename std::remove_pointer::type PDerived; +typedef typename std::remove_pointer::type Derived; +static_assert( +std::is_base_of::value, +"use movable_CAST() for container_t** => xxx_container_t**" +); +return reinterpret_cast(ptr_to_ptr); } +#else +#define CAST(type,value) ((type)value) +#define movable_CAST(type, value) ((type)value) +#endif -size_t intersection_uint32_card(const uint32_t *A, const size_t lenA, - const uint32_t *B, const size_t lenB) { - if (lenA == 0 || lenB == 0) return 0; - size_t card = 0; - const uint32_t *endA = A + lenA; - const uint32_t *endB = B + lenB; - - while (1) { - while (*A < *B) { - SKIP_FIRST_COMPARE: - if (++A == endA) return card; - } - while (*A > *B) { - if (++B == endB) return card; - } - if (*A == *B) { - card++; - if (++A == endA || ++B == endB) return card; - } else { - goto SKIP_FIRST_COMPARE; - } - } - return card; // NOTREACHED -} +// Use for converting e.g. an `array_container_t**` to a `container_t**` +// +#define movable_CAST_base(c) movable_CAST(container_t **, c) -// can one vectorize the computation of the union? (Update: Yes! See -// union_vector16). -size_t union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2, - size_t size_2, uint16_t *buffer) { - size_t pos = 0, idx_1 = 0, idx_2 = 0; +#ifdef __cplusplus +} } // namespace roaring { namespace internal { +#endif - if (0 == size_2) { - memmove(buffer, set_1, size_1 * sizeof(uint16_t)); - return size_1; - } - if (0 == size_1) { - memmove(buffer, set_2, size_2 * sizeof(uint16_t)); - return size_2; - } +#endif /* INCLUDE_CONTAINERS_CONTAINER_DEFS_H_ */ +/* end file include/roaring/containers/container_defs.h */ +/* begin file include/roaring/array_util.h */ +#ifndef ARRAY_UTIL_H +#define ARRAY_UTIL_H - uint16_t val_1 = set_1[idx_1], val_2 = set_2[idx_2]; - - while (true) { - if (val_1 < val_2) { - buffer[pos++] = val_1; - ++idx_1; - if (idx_1 >= size_1) break; - val_1 = set_1[idx_1]; - } else if (val_2 < val_1) { - buffer[pos++] = val_2; - ++idx_2; - if (idx_2 >= size_2) break; - val_2 = set_2[idx_2]; - } else { - buffer[pos++] = val_1; - ++idx_1; - ++idx_2; - if (idx_1 >= size_1 || idx_2 >= size_2) break; - val_1 = set_1[idx_1]; - val_2 = set_2[idx_2]; - } - } +#include // for size_t +#include - if (idx_1 < size_1) { - const size_t n_elems = size_1 - idx_1; - memmove(buffer + pos, set_1 + idx_1, n_elems * sizeof(uint16_t)); - pos += n_elems; - } else if (idx_2 < size_2) { - const size_t n_elems = size_2 - idx_2; - memmove(buffer + pos, set_2 + idx_2, n_elems * sizeof(uint16_t)); - pos += n_elems; - } - return pos; -} +#if CROARING_IS_X64 +#ifndef CROARING_COMPILER_SUPPORTS_AVX512 +#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined." +#endif // CROARING_COMPILER_SUPPORTS_AVX512 +#endif -int difference_uint16(const uint16_t *a1, int length1, const uint16_t *a2, - int length2, uint16_t *a_out) { - int out_card = 0; - int k1 = 0, k2 = 0; - if (length1 == 0) return 0; - if (length2 == 0) { - if (a1 != a_out) memcpy(a_out, a1, sizeof(uint16_t) * length1); - return length1; - } - uint16_t s1 = a1[k1]; - uint16_t s2 = a2[k2]; - while (true) { - if (s1 < s2) { - a_out[out_card++] = s1; - ++k1; - if (k1 >= length1) { - break; - } - s1 = a1[k1]; - } else if (s1 == s2) { - ++k1; - ++k2; - if (k1 >= length1) { - break; - } - if (k2 >= length2) { - memmove(a_out + out_card, a1 + k1, - sizeof(uint16_t) * (length1 - k1)); - return out_card + length1 - k1; - } - s1 = a1[k1]; - s2 = a2[k2]; - } else { // if (val1>val2) - ++k2; - if (k2 >= length2) { - memmove(a_out + out_card, a1 + k1, - sizeof(uint16_t) * (length1 - k1)); - return out_card + length1 - k1; - } - s2 = a2[k2]; - } - } - return out_card; -} +#ifdef __cplusplus +extern "C" { namespace roaring { namespace internal { +#endif -int32_t xor_uint16(const uint16_t *array_1, int32_t card_1, - const uint16_t *array_2, int32_t card_2, uint16_t *out) { - int32_t pos1 = 0, pos2 = 0, pos_out = 0; - while (pos1 < card_1 && pos2 < card_2) { - const uint16_t v1 = array_1[pos1]; - const uint16_t v2 = array_2[pos2]; - if (v1 == v2) { - ++pos1; - ++pos2; - continue; - } - if (v1 < v2) { - out[pos_out++] = v1; - ++pos1; - } else { - out[pos_out++] = v2; - ++pos2; - } - } - if (pos1 < card_1) { - const size_t n_elems = card_1 - pos1; - memcpy(out + pos_out, array_1 + pos1, n_elems * sizeof(uint16_t)); - pos_out += (int32_t)n_elems; - } else if (pos2 < card_2) { - const size_t n_elems = card_2 - pos2; - memcpy(out + pos_out, array_2 + pos2, n_elems * sizeof(uint16_t)); - pos_out += (int32_t)n_elems; - } - return pos_out; +/* + * Good old binary search. + * Assumes that array is sorted, has logarithmic complexity. + * if the result is x, then: + * if ( x>0 ) you have array[x] = ikey + * if ( x<0 ) then inserting ikey at position -x-1 in array (insuring that array[-x-1]=ikey) + * keys the array sorted. + */ +inline int32_t binarySearch(const uint16_t *array, int32_t lenarray, +uint16_t ikey) { +int32_t low = 0; +int32_t high = lenarray - 1; +while (low <= high) { +int32_t middleIndex = (low + high) >> 1; +uint16_t middleValue = array[middleIndex]; +if (middleValue < ikey) { +low = middleIndex + 1; +} else if (middleValue > ikey) { +high = middleIndex - 1; +} else { +return middleIndex; +} +} +return -(low + 1); } -#ifdef USESSE4 - -/*** - * start of the SIMD 16-bit union code - * +/** + * Galloping search + * Assumes that array is sorted, has logarithmic complexity. + * if the result is x, then if x = length, you have that all values in array between pos and length + * are smaller than min. + * otherwise returns the first index x such that array[x] >= min. */ +static inline int32_t advanceUntil(const uint16_t *array, int32_t pos, +int32_t length, uint16_t min) { +int32_t lower = pos + 1; -// Assuming that vInput1 and vInput2 are sorted, produces a sorted output going -// from vecMin all the way to vecMax -// developed originally for merge sort using SIMD instructions. -// Standard merge. See, e.g., Inoue and Taura, SIMD- and Cache-Friendly -// Algorithm for Sorting an Array of Structures -static inline void sse_merge(const __m128i *vInput1, - const __m128i *vInput2, // input 1 & 2 - __m128i *vecMin, __m128i *vecMax) { // output - __m128i vecTmp; - vecTmp = _mm_min_epu16(*vInput1, *vInput2); - *vecMax = _mm_max_epu16(*vInput1, *vInput2); - vecTmp = _mm_alignr_epi8(vecTmp, vecTmp, 2); - *vecMin = _mm_min_epu16(vecTmp, *vecMax); - *vecMax = _mm_max_epu16(vecTmp, *vecMax); - vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); - *vecMin = _mm_min_epu16(vecTmp, *vecMax); - *vecMax = _mm_max_epu16(vecTmp, *vecMax); - vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); - *vecMin = _mm_min_epu16(vecTmp, *vecMax); - *vecMax = _mm_max_epu16(vecTmp, *vecMax); - vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); - *vecMin = _mm_min_epu16(vecTmp, *vecMax); - *vecMax = _mm_max_epu16(vecTmp, *vecMax); - vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); - *vecMin = _mm_min_epu16(vecTmp, *vecMax); - *vecMax = _mm_max_epu16(vecTmp, *vecMax); - vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); - *vecMin = _mm_min_epu16(vecTmp, *vecMax); - *vecMax = _mm_max_epu16(vecTmp, *vecMax); - vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); - *vecMin = _mm_min_epu16(vecTmp, *vecMax); - *vecMax = _mm_max_epu16(vecTmp, *vecMax); - *vecMin = _mm_alignr_epi8(*vecMin, *vecMin, 2); +if ((lower >= length) || (array[lower] >= min)) { +return lower; } -// used by store_unique, generated by simdunion.py -static uint8_t uniqshuf[] = { - 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, - 0xc, 0xd, 0xe, 0xf, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, - 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, - 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, - 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, - 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, - 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, - 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, - 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, - 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, - 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, - 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, - 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xa, 0xb, - 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, - 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0xa, 0xb, 0xc, 0xd, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xa, 0xb, - 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, - 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, - 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, - 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, - 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, - 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, - 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xc, 0xd, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, - 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xc, 0xd, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, - 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x4, 0x5, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x4, 0x5, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0xc, 0xd, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xc, 0xd, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xc, 0xd, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, - 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, - 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, - 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, - 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, - 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, - 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, - 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, - 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x4, 0x5, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xa, 0xb, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, - 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, - 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x4, 0x5, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, - 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, - 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x4, 0x5, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0xe, 0xf, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xe, 0xf, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, - 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, - 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, - 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, - 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, - 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, - 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, - 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xa, 0xb, - 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, - 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, - 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, - 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, - 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xa, 0xb, - 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xa, 0xb, - 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, - 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, - 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, - 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, - 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xc, 0xd, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, - 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, - 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, - 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xc, 0xd, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, - 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x4, 0x5, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xc, 0xd, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, - 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, - 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, - 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, - 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, - 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, - 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, - 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, - 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xa, 0xb, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, - 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x4, 0x5, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0xa, 0xb, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xa, 0xb, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, - 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, - 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, - 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, - 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x4, 0x5, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, - 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x2, 0x3, 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0x0, 0x1, 0x2, 0x3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF}; +int32_t spansize = 1; -// write vector new, while omitting repeated values assuming that previously -// written vector was "old" -static inline int store_unique(__m128i old, __m128i newval, uint16_t *output) { - __m128i vecTmp = _mm_alignr_epi8(newval, old, 16 - 2); - // lots of high latency instructions follow (optimize?) - int M = _mm_movemask_epi8( - _mm_packs_epi16(_mm_cmpeq_epi16(vecTmp, newval), _mm_setzero_si128())); - int numberofnewvalues = 8 - _mm_popcnt_u32(M); - __m128i key = _mm_lddqu_si128((const __m128i *)uniqshuf + M); - __m128i val = _mm_shuffle_epi8(newval, key); - _mm_storeu_si128((__m128i *)output, val); - return numberofnewvalues; +while ((lower + spansize < length) && (array[lower + spansize] < min)) { +spansize <<= 1; } +int32_t upper = (lower + spansize < length) ? lower + spansize : length - 1; -// working in-place, this function overwrites the repeated values -// could be avoided? -static inline uint32_t unique(uint16_t *out, uint32_t len) { - uint32_t pos = 1; - for (uint32_t i = 1; i < len; ++i) { - if (out[i] != out[i - 1]) { - out[pos++] = out[i]; - } - } - return pos; +if (array[upper] == min) { +return upper; } - -// use with qsort, could be avoided -static int uint16_compare(const void *a, const void *b) { - return (*(uint16_t *)a - *(uint16_t *)b); +if (array[upper] < min) { +// means +// array +// has no +// item +// >= min +// pos = array.length; +return length; } -// a one-pass SSE union algorithm -// This function may not be safe if array1 == output or array2 == output. -uint32_t union_vector16(const uint16_t *__restrict__ array1, uint32_t length1, - const uint16_t *__restrict__ array2, uint32_t length2, - uint16_t *__restrict__ output) { - if ((length1 < 8) || (length2 < 8)) { - return (uint32_t)union_uint16(array1, length1, array2, length2, output); - } - __m128i vA, vB, V, vecMin, vecMax; - __m128i laststore; - uint16_t *initoutput = output; - uint32_t len1 = length1 / 8; - uint32_t len2 = length2 / 8; - uint32_t pos1 = 0; - uint32_t pos2 = 0; - // we start the machine - vA = _mm_lddqu_si128((const __m128i *)array1 + pos1); - pos1++; - vB = _mm_lddqu_si128((const __m128i *)array2 + pos2); - pos2++; - sse_merge(&vA, &vB, &vecMin, &vecMax); - laststore = _mm_set1_epi16(-1); - output += store_unique(laststore, vecMin, output); - laststore = vecMin; - if ((pos1 < len1) && (pos2 < len2)) { - uint16_t curA, curB; - curA = array1[8 * pos1]; - curB = array2[8 * pos2]; - while (true) { - if (curA <= curB) { - V = _mm_lddqu_si128((const __m128i *)array1 + pos1); - pos1++; - if (pos1 < len1) { - curA = array1[8 * pos1]; - } else { - break; - } - } else { - V = _mm_lddqu_si128((const __m128i *)array2 + pos2); - pos2++; - if (pos2 < len2) { - curB = array2[8 * pos2]; - } else { - break; - } - } - sse_merge(&V, &vecMax, &vecMin, &vecMax); - output += store_unique(laststore, vecMin, output); - laststore = vecMin; - } - sse_merge(&V, &vecMax, &vecMin, &vecMax); - output += store_unique(laststore, vecMin, output); - laststore = vecMin; - } - // we finish the rest off using a scalar algorithm - // could be improved? - // - // copy the small end on a tmp buffer - uint32_t len = (uint32_t)(output - initoutput); - uint16_t buffer[16]; - uint32_t leftoversize = store_unique(laststore, vecMax, buffer); - if (pos1 == len1) { - memcpy(buffer + leftoversize, array1 + 8 * pos1, - (length1 - 8 * len1) * sizeof(uint16_t)); - leftoversize += length1 - 8 * len1; - qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare); - - leftoversize = unique(buffer, leftoversize); - len += (uint32_t)union_uint16(buffer, leftoversize, array2 + 8 * pos2, - length2 - 8 * pos2, output); - } else { - memcpy(buffer + leftoversize, array2 + 8 * pos2, - (length2 - 8 * len2) * sizeof(uint16_t)); - leftoversize += length2 - 8 * len2; - qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare); - leftoversize = unique(buffer, leftoversize); - len += (uint32_t)union_uint16(buffer, leftoversize, array1 + 8 * pos1, - length1 - 8 * pos1, output); - } - return len; +// we know that the next-smallest span was too small +lower += (spansize >> 1); + +int32_t mid = 0; +while (lower + 1 != upper) { +mid = (lower + upper) >> 1; +if (array[mid] == min) { +return mid; +} else if (array[mid] < min) { +lower = mid; +} else { +upper = mid; +} +} +return upper; } /** - * End of the SIMD 16-bit union code - * + * Returns number of elements which are less than ikey. + * Array elements must be unique and sorted. */ +static inline int32_t count_less(const uint16_t *array, int32_t lenarray, +uint16_t ikey) { +if (lenarray == 0) return 0; +int32_t pos = binarySearch(array, lenarray, ikey); +return pos >= 0 ? pos : -(pos+1); +} /** - * Start of SIMD 16-bit XOR code + * Returns number of elements which are greater than ikey. + * Array elements must be unique and sorted. */ - -// write vector new, while omitting repeated values assuming that previously -// written vector was "old" -static inline int store_unique_xor(__m128i old, __m128i newval, - uint16_t *output) { - __m128i vecTmp1 = _mm_alignr_epi8(newval, old, 16 - 4); - __m128i vecTmp2 = _mm_alignr_epi8(newval, old, 16 - 2); - __m128i equalleft = _mm_cmpeq_epi16(vecTmp2, vecTmp1); - __m128i equalright = _mm_cmpeq_epi16(vecTmp2, newval); - __m128i equalleftoright = _mm_or_si128(equalleft, equalright); - int M = _mm_movemask_epi8( - _mm_packs_epi16(equalleftoright, _mm_setzero_si128())); - int numberofnewvalues = 8 - _mm_popcnt_u32(M); - __m128i key = _mm_lddqu_si128((const __m128i *)uniqshuf + M); - __m128i val = _mm_shuffle_epi8(vecTmp2, key); - _mm_storeu_si128((__m128i *)output, val); - return numberofnewvalues; -} - -// working in-place, this function overwrites the repeated values -// could be avoided? Warning: assumes len > 0 -static inline uint32_t unique_xor(uint16_t *out, uint32_t len) { - uint32_t pos = 1; - for (uint32_t i = 1; i < len; ++i) { - if (out[i] != out[i - 1]) { - out[pos++] = out[i]; - } else - pos--; // if it is identical to previous, delete it - } - return pos; +static inline int32_t count_greater(const uint16_t *array, int32_t lenarray, +uint16_t ikey) { +if (lenarray == 0) return 0; +int32_t pos = binarySearch(array, lenarray, ikey); +if (pos >= 0) { +return lenarray - (pos+1); +} else { +return lenarray - (-pos-1); } - -// a one-pass SSE xor algorithm -uint32_t xor_vector16(const uint16_t *__restrict__ array1, uint32_t length1, - const uint16_t *__restrict__ array2, uint32_t length2, - uint16_t *__restrict__ output) { - if ((length1 < 8) || (length2 < 8)) { - return xor_uint16(array1, length1, array2, length2, output); - } - __m128i vA, vB, V, vecMin, vecMax; - __m128i laststore; - uint16_t *initoutput = output; - uint32_t len1 = length1 / 8; - uint32_t len2 = length2 / 8; - uint32_t pos1 = 0; - uint32_t pos2 = 0; - // we start the machine - vA = _mm_lddqu_si128((const __m128i *)array1 + pos1); - pos1++; - vB = _mm_lddqu_si128((const __m128i *)array2 + pos2); - pos2++; - sse_merge(&vA, &vB, &vecMin, &vecMax); - laststore = _mm_set1_epi16(-1); - uint16_t buffer[17]; - output += store_unique_xor(laststore, vecMin, output); - - laststore = vecMin; - if ((pos1 < len1) && (pos2 < len2)) { - uint16_t curA, curB; - curA = array1[8 * pos1]; - curB = array2[8 * pos2]; - while (true) { - if (curA <= curB) { - V = _mm_lddqu_si128((const __m128i *)array1 + pos1); - pos1++; - if (pos1 < len1) { - curA = array1[8 * pos1]; - } else { - break; - } - } else { - V = _mm_lddqu_si128((const __m128i *)array2 + pos2); - pos2++; - if (pos2 < len2) { - curB = array2[8 * pos2]; - } else { - break; - } - } - sse_merge(&V, &vecMax, &vecMin, &vecMax); - // conditionally stores the last value of laststore as well as all - // but the - // last value of vecMin - output += store_unique_xor(laststore, vecMin, output); - laststore = vecMin; - } - sse_merge(&V, &vecMax, &vecMin, &vecMax); - // conditionally stores the last value of laststore as well as all but - // the - // last value of vecMin - output += store_unique_xor(laststore, vecMin, output); - laststore = vecMin; - } - uint32_t len = (uint32_t)(output - initoutput); - - // we finish the rest off using a scalar algorithm - // could be improved? - // conditionally stores the last value of laststore as well as all but the - // last value of vecMax, - // we store to "buffer" - int leftoversize = store_unique_xor(laststore, vecMax, buffer); - uint16_t vec7 = _mm_extract_epi16(vecMax, 7); - uint16_t vec6 = _mm_extract_epi16(vecMax, 6); - if (vec7 != vec6) buffer[leftoversize++] = vec7; - if (pos1 == len1) { - memcpy(buffer + leftoversize, array1 + 8 * pos1, - (length1 - 8 * len1) * sizeof(uint16_t)); - leftoversize += length1 - 8 * len1; - if (leftoversize == 0) { // trivial case - memcpy(output, array2 + 8 * pos2, - (length2 - 8 * pos2) * sizeof(uint16_t)); - len += (length2 - 8 * pos2); - } else { - qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare); - leftoversize = unique_xor(buffer, leftoversize); - len += xor_uint16(buffer, leftoversize, array2 + 8 * pos2, - length2 - 8 * pos2, output); - } - } else { - memcpy(buffer + leftoversize, array2 + 8 * pos2, - (length2 - 8 * len2) * sizeof(uint16_t)); - leftoversize += length2 - 8 * len2; - if (leftoversize == 0) { // trivial case - memcpy(output, array1 + 8 * pos1, - (length1 - 8 * pos1) * sizeof(uint16_t)); - len += (length1 - 8 * pos1); - } else { - qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare); - leftoversize = unique_xor(buffer, leftoversize); - len += xor_uint16(buffer, leftoversize, array1 + 8 * pos1, - length1 - 8 * pos1, output); - } - } - return len; } /** - * End of SIMD 16-bit XOR code + * From Schlegel et al., Fast Sorted-Set Intersection using SIMD Instructions + * Optimized by D. Lemire on May 3rd 2013 + * + * C should have capacity greater than the minimum of s_1 and s_b + 8 + * where 8 is sizeof(__m128i)/sizeof(uint16_t). */ +int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a, +const uint16_t *__restrict__ B, size_t s_b, +uint16_t *C); -#endif // USESSE4 - -size_t union_uint32(const uint32_t *set_1, size_t size_1, const uint32_t *set_2, - size_t size_2, uint32_t *buffer) { - size_t pos = 0, idx_1 = 0, idx_2 = 0; +int32_t intersect_vector16_inplace(uint16_t *__restrict__ A, size_t s_a, +const uint16_t *__restrict__ B, size_t s_b); - if (0 == size_2) { - memmove(buffer, set_1, size_1 * sizeof(uint32_t)); - return size_1; - } - if (0 == size_1) { - memmove(buffer, set_2, size_2 * sizeof(uint32_t)); - return size_2; - } +/** + * Take an array container and write it out to a 32-bit array, using base + * as the offset. + */ +int array_container_to_uint32_array_vector16(void *vout, const uint16_t* array, size_t cardinality, +uint32_t base); +#if CROARING_COMPILER_SUPPORTS_AVX512 +int avx512_array_container_to_uint32_array(void *vout, const uint16_t* array, size_t cardinality, +uint32_t base); +#endif +/** + * Compute the cardinality of the intersection using SSE4 instructions + */ +int32_t intersect_vector16_cardinality(const uint16_t *__restrict__ A, +size_t s_a, +const uint16_t *__restrict__ B, +size_t s_b); - uint32_t val_1 = set_1[idx_1], val_2 = set_2[idx_2]; - - while (true) { - if (val_1 < val_2) { - buffer[pos++] = val_1; - ++idx_1; - if (idx_1 >= size_1) break; - val_1 = set_1[idx_1]; - } else if (val_2 < val_1) { - buffer[pos++] = val_2; - ++idx_2; - if (idx_2 >= size_2) break; - val_2 = set_2[idx_2]; - } else { - buffer[pos++] = val_1; - ++idx_1; - ++idx_2; - if (idx_1 >= size_1 || idx_2 >= size_2) break; - val_1 = set_1[idx_1]; - val_2 = set_2[idx_2]; - } - } +/* Computes the intersection between one small and one large set of uint16_t. + * Stores the result into buffer and return the number of elements. */ +int32_t intersect_skewed_uint16(const uint16_t *smallarray, size_t size_s, +const uint16_t *largearray, size_t size_l, +uint16_t *buffer); - if (idx_1 < size_1) { - const size_t n_elems = size_1 - idx_1; - memmove(buffer + pos, set_1 + idx_1, n_elems * sizeof(uint32_t)); - pos += n_elems; - } else if (idx_2 < size_2) { - const size_t n_elems = size_2 - idx_2; - memmove(buffer + pos, set_2 + idx_2, n_elems * sizeof(uint32_t)); - pos += n_elems; - } +/* Computes the size of the intersection between one small and one large set of + * uint16_t. */ +int32_t intersect_skewed_uint16_cardinality(const uint16_t *smallarray, +size_t size_s, +const uint16_t *largearray, +size_t size_l); - return pos; -} -size_t union_uint32_card(const uint32_t *set_1, size_t size_1, - const uint32_t *set_2, size_t size_2) { - size_t pos = 0, idx_1 = 0, idx_2 = 0; +/* Check whether the size of the intersection between one small and one large set of uint16_t is non-zero. */ +bool intersect_skewed_uint16_nonempty(const uint16_t *smallarray, size_t size_s, +const uint16_t *largearray, size_t size_l); +/** + * Generic intersection function. + */ +int32_t intersect_uint16(const uint16_t *A, const size_t lenA, +const uint16_t *B, const size_t lenB, uint16_t *out); +/** + * Compute the size of the intersection (generic). + */ +int32_t intersect_uint16_cardinality(const uint16_t *A, const size_t lenA, +const uint16_t *B, const size_t lenB); - if (0 == size_2) { - return size_1; - } - if (0 == size_1) { - return size_2; - } +/** + * Checking whether the size of the intersection is non-zero. + */ +bool intersect_uint16_nonempty(const uint16_t *A, const size_t lenA, +const uint16_t *B, const size_t lenB); +/** + * Generic union function. + */ +size_t union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2, +size_t size_2, uint16_t *buffer); - uint32_t val_1 = set_1[idx_1], val_2 = set_2[idx_2]; - - while (true) { - if (val_1 < val_2) { - ++idx_1; - ++pos; - if (idx_1 >= size_1) break; - val_1 = set_1[idx_1]; - } else if (val_2 < val_1) { - ++idx_2; - ++pos; - if (idx_2 >= size_2) break; - val_2 = set_2[idx_2]; - } else { - ++idx_1; - ++idx_2; - ++pos; - if (idx_1 >= size_1 || idx_2 >= size_2) break; - val_1 = set_1[idx_1]; - val_2 = set_2[idx_2]; - } - } +/** + * Generic XOR function. + */ +int32_t xor_uint16(const uint16_t *array_1, int32_t card_1, +const uint16_t *array_2, int32_t card_2, uint16_t *out); - if (idx_1 < size_1) { - const size_t n_elems = size_1 - idx_1; - pos += n_elems; - } else if (idx_2 < size_2) { - const size_t n_elems = size_2 - idx_2; - pos += n_elems; - } - return pos; -} +/** + * Generic difference function (ANDNOT). + */ +int difference_uint16(const uint16_t *a1, int length1, const uint16_t *a2, +int length2, uint16_t *a_out); + +/** + * Generic intersection function. + */ +size_t intersection_uint32(const uint32_t *A, const size_t lenA, +const uint32_t *B, const size_t lenB, uint32_t *out); + +/** + * Generic intersection function, returns just the cardinality. + */ +size_t intersection_uint32_card(const uint32_t *A, const size_t lenA, +const uint32_t *B, const size_t lenB); + +/** + * Generic union function. + */ +size_t union_uint32(const uint32_t *set_1, size_t size_1, const uint32_t *set_2, +size_t size_2, uint32_t *buffer); + +/** + * A fast SSE-based union function. + */ +uint32_t union_vector16(const uint16_t *__restrict__ set_1, uint32_t size_1, +const uint16_t *__restrict__ set_2, uint32_t size_2, +uint16_t *__restrict__ buffer); +/** + * A fast SSE-based XOR function. + */ +uint32_t xor_vector16(const uint16_t *__restrict__ array1, uint32_t length1, +const uint16_t *__restrict__ array2, uint32_t length2, +uint16_t *__restrict__ output); +/** + * A fast SSE-based difference function. + */ +int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a, +const uint16_t *__restrict__ B, size_t s_b, +uint16_t *C); +/** + * Generic union function, returns just the cardinality. + */ +size_t union_uint32_card(const uint32_t *set_1, size_t size_1, +const uint32_t *set_2, size_t size_2); +/** +* combines union_uint16 and union_vector16 optimally +*/ size_t fast_union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2, - size_t size_2, uint16_t *buffer) { -#ifdef ROARING_VECTOR_OPERATIONS_ENABLED - // compute union with smallest array first - if (size_1 < size_2) { - return union_vector16(set_1, (uint32_t)size_1, - set_2, (uint32_t)size_2, buffer); - } else { - return union_vector16(set_2, (uint32_t)size_2, - set_1, (uint32_t)size_1, buffer); - } -#else - // compute union with smallest array first - if (size_1 < size_2) { - return union_uint16( - set_1, size_1, set_2, size_2, buffer); - } else { - return union_uint16( - set_2, size_2, set_1, size_1, buffer); - } +size_t size_2, uint16_t *buffer); + + +bool memequals(const void *s1, const void *s2, size_t n); + +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace internal { #endif -} -bool memequals(const void *s1, const void *s2, size_t n) { - if (n == 0) { - return true; - } -#ifdef USEAVX - const uint8_t *ptr1 = (const uint8_t *)s1; - const uint8_t *ptr2 = (const uint8_t *)s2; - const uint8_t *end1 = ptr1 + n; - const uint8_t *end8 = ptr1 + n/8*8; - const uint8_t *end32 = ptr1 + n/32*32; - - while (ptr1 < end32) { - __m256i r1 = _mm256_loadu_si256((const __m256i*)ptr1); - __m256i r2 = _mm256_loadu_si256((const __m256i*)ptr2); - int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(r1, r2)); - if ((uint32_t)mask != UINT32_MAX) { - return false; - } - ptr1 += 32; - ptr2 += 32; - } +#endif +/* end file include/roaring/array_util.h */ +/* begin file include/roaring/utilasm.h */ +/* + * utilasm.h + * + */ - while (ptr1 < end8) { - uint64_t v1 = *((const uint64_t*)ptr1); - uint64_t v2 = *((const uint64_t*)ptr2); - if (v1 != v2) { - return false; - } - ptr1 += 8; - ptr2 += 8; - } +#ifndef INCLUDE_UTILASM_H_ +#define INCLUDE_UTILASM_H_ - while (ptr1 < end1) { - if (*ptr1 != *ptr2) { - return false; - } - ptr1++; - ptr2++; - } - return true; -#else - return memcmp(s1, s2, n) == 0; +#ifdef __cplusplus +extern "C" { namespace roaring { #endif -} -/* end file src/array_util.c */ -/* begin file src/bitset_util.c */ -#include -#include -#include -#include -#include +#if defined(CROARING_INLINE_ASM) +#define CROARING_ASMBITMANIPOPTIMIZATION // optimization flag + +#define ASM_SHIFT_RIGHT(srcReg, bitsReg, destReg) \ + __asm volatile("shrx %1, %2, %0" \ + : "=r"(destReg) \ + : /* write */ \ + "r"(bitsReg), /* read only */ \ + "r"(srcReg) /* read only */ \ + ) + +#define ASM_INPLACESHIFT_RIGHT(srcReg, bitsReg) \ + __asm volatile("shrx %1, %0, %0" \ + : "+r"(srcReg) \ + : /* read/write */ \ + "r"(bitsReg) /* read only */ \ + ) + +#define ASM_SHIFT_LEFT(srcReg, bitsReg, destReg) \ + __asm volatile("shlx %1, %2, %0" \ + : "=r"(destReg) \ + : /* write */ \ + "r"(bitsReg), /* read only */ \ + "r"(srcReg) /* read only */ \ + ) +// set bit at position testBit within testByte to 1 and +// copy cmovDst to cmovSrc if that bit was previously clear +#define ASM_SET_BIT_INC_WAS_CLEAR(testByte, testBit, count) \ + __asm volatile( \ + "bts %2, %0\n" \ + "sbb $-1, %1\n" \ + : "+r"(testByte), /* read/write */ \ + "+r"(count) \ + : /* read/write */ \ + "r"(testBit) /* read only */ \ + ) + +#define ASM_CLEAR_BIT_DEC_WAS_SET(testByte, testBit, count) \ + __asm volatile( \ + "btr %2, %0\n" \ + "sbb $0, %1\n" \ + : "+r"(testByte), /* read/write */ \ + "+r"(count) \ + : /* read/write */ \ + "r"(testBit) /* read only */ \ + ) + +#define ASM_BT64(testByte, testBit, count) \ + __asm volatile( \ + "bt %2,%1\n" \ + "sbb %0,%0" /*could use setb */ \ + : "=r"(count) \ + : /* write */ \ + "r"(testByte), /* read only */ \ + "r"(testBit) /* read only */ \ + ) -#ifdef IS_X64 -static uint8_t lengthTable[256] = { - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, - 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, - 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, - 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, - 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, - 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; -#endif - -#ifdef USEAVX -ALIGNED(32) -static uint32_t vecDecodeTable[256][8] = { - {0, 0, 0, 0, 0, 0, 0, 0}, /* 0x00 (00000000) */ - {1, 0, 0, 0, 0, 0, 0, 0}, /* 0x01 (00000001) */ - {2, 0, 0, 0, 0, 0, 0, 0}, /* 0x02 (00000010) */ - {1, 2, 0, 0, 0, 0, 0, 0}, /* 0x03 (00000011) */ - {3, 0, 0, 0, 0, 0, 0, 0}, /* 0x04 (00000100) */ - {1, 3, 0, 0, 0, 0, 0, 0}, /* 0x05 (00000101) */ - {2, 3, 0, 0, 0, 0, 0, 0}, /* 0x06 (00000110) */ - {1, 2, 3, 0, 0, 0, 0, 0}, /* 0x07 (00000111) */ - {4, 0, 0, 0, 0, 0, 0, 0}, /* 0x08 (00001000) */ - {1, 4, 0, 0, 0, 0, 0, 0}, /* 0x09 (00001001) */ - {2, 4, 0, 0, 0, 0, 0, 0}, /* 0x0A (00001010) */ - {1, 2, 4, 0, 0, 0, 0, 0}, /* 0x0B (00001011) */ - {3, 4, 0, 0, 0, 0, 0, 0}, /* 0x0C (00001100) */ - {1, 3, 4, 0, 0, 0, 0, 0}, /* 0x0D (00001101) */ - {2, 3, 4, 0, 0, 0, 0, 0}, /* 0x0E (00001110) */ - {1, 2, 3, 4, 0, 0, 0, 0}, /* 0x0F (00001111) */ - {5, 0, 0, 0, 0, 0, 0, 0}, /* 0x10 (00010000) */ - {1, 5, 0, 0, 0, 0, 0, 0}, /* 0x11 (00010001) */ - {2, 5, 0, 0, 0, 0, 0, 0}, /* 0x12 (00010010) */ - {1, 2, 5, 0, 0, 0, 0, 0}, /* 0x13 (00010011) */ - {3, 5, 0, 0, 0, 0, 0, 0}, /* 0x14 (00010100) */ - {1, 3, 5, 0, 0, 0, 0, 0}, /* 0x15 (00010101) */ - {2, 3, 5, 0, 0, 0, 0, 0}, /* 0x16 (00010110) */ - {1, 2, 3, 5, 0, 0, 0, 0}, /* 0x17 (00010111) */ - {4, 5, 0, 0, 0, 0, 0, 0}, /* 0x18 (00011000) */ - {1, 4, 5, 0, 0, 0, 0, 0}, /* 0x19 (00011001) */ - {2, 4, 5, 0, 0, 0, 0, 0}, /* 0x1A (00011010) */ - {1, 2, 4, 5, 0, 0, 0, 0}, /* 0x1B (00011011) */ - {3, 4, 5, 0, 0, 0, 0, 0}, /* 0x1C (00011100) */ - {1, 3, 4, 5, 0, 0, 0, 0}, /* 0x1D (00011101) */ - {2, 3, 4, 5, 0, 0, 0, 0}, /* 0x1E (00011110) */ - {1, 2, 3, 4, 5, 0, 0, 0}, /* 0x1F (00011111) */ - {6, 0, 0, 0, 0, 0, 0, 0}, /* 0x20 (00100000) */ - {1, 6, 0, 0, 0, 0, 0, 0}, /* 0x21 (00100001) */ - {2, 6, 0, 0, 0, 0, 0, 0}, /* 0x22 (00100010) */ - {1, 2, 6, 0, 0, 0, 0, 0}, /* 0x23 (00100011) */ - {3, 6, 0, 0, 0, 0, 0, 0}, /* 0x24 (00100100) */ - {1, 3, 6, 0, 0, 0, 0, 0}, /* 0x25 (00100101) */ - {2, 3, 6, 0, 0, 0, 0, 0}, /* 0x26 (00100110) */ - {1, 2, 3, 6, 0, 0, 0, 0}, /* 0x27 (00100111) */ - {4, 6, 0, 0, 0, 0, 0, 0}, /* 0x28 (00101000) */ - {1, 4, 6, 0, 0, 0, 0, 0}, /* 0x29 (00101001) */ - {2, 4, 6, 0, 0, 0, 0, 0}, /* 0x2A (00101010) */ - {1, 2, 4, 6, 0, 0, 0, 0}, /* 0x2B (00101011) */ - {3, 4, 6, 0, 0, 0, 0, 0}, /* 0x2C (00101100) */ - {1, 3, 4, 6, 0, 0, 0, 0}, /* 0x2D (00101101) */ - {2, 3, 4, 6, 0, 0, 0, 0}, /* 0x2E (00101110) */ - {1, 2, 3, 4, 6, 0, 0, 0}, /* 0x2F (00101111) */ - {5, 6, 0, 0, 0, 0, 0, 0}, /* 0x30 (00110000) */ - {1, 5, 6, 0, 0, 0, 0, 0}, /* 0x31 (00110001) */ - {2, 5, 6, 0, 0, 0, 0, 0}, /* 0x32 (00110010) */ - {1, 2, 5, 6, 0, 0, 0, 0}, /* 0x33 (00110011) */ - {3, 5, 6, 0, 0, 0, 0, 0}, /* 0x34 (00110100) */ - {1, 3, 5, 6, 0, 0, 0, 0}, /* 0x35 (00110101) */ - {2, 3, 5, 6, 0, 0, 0, 0}, /* 0x36 (00110110) */ - {1, 2, 3, 5, 6, 0, 0, 0}, /* 0x37 (00110111) */ - {4, 5, 6, 0, 0, 0, 0, 0}, /* 0x38 (00111000) */ - {1, 4, 5, 6, 0, 0, 0, 0}, /* 0x39 (00111001) */ - {2, 4, 5, 6, 0, 0, 0, 0}, /* 0x3A (00111010) */ - {1, 2, 4, 5, 6, 0, 0, 0}, /* 0x3B (00111011) */ - {3, 4, 5, 6, 0, 0, 0, 0}, /* 0x3C (00111100) */ - {1, 3, 4, 5, 6, 0, 0, 0}, /* 0x3D (00111101) */ - {2, 3, 4, 5, 6, 0, 0, 0}, /* 0x3E (00111110) */ - {1, 2, 3, 4, 5, 6, 0, 0}, /* 0x3F (00111111) */ - {7, 0, 0, 0, 0, 0, 0, 0}, /* 0x40 (01000000) */ - {1, 7, 0, 0, 0, 0, 0, 0}, /* 0x41 (01000001) */ - {2, 7, 0, 0, 0, 0, 0, 0}, /* 0x42 (01000010) */ - {1, 2, 7, 0, 0, 0, 0, 0}, /* 0x43 (01000011) */ - {3, 7, 0, 0, 0, 0, 0, 0}, /* 0x44 (01000100) */ - {1, 3, 7, 0, 0, 0, 0, 0}, /* 0x45 (01000101) */ - {2, 3, 7, 0, 0, 0, 0, 0}, /* 0x46 (01000110) */ - {1, 2, 3, 7, 0, 0, 0, 0}, /* 0x47 (01000111) */ - {4, 7, 0, 0, 0, 0, 0, 0}, /* 0x48 (01001000) */ - {1, 4, 7, 0, 0, 0, 0, 0}, /* 0x49 (01001001) */ - {2, 4, 7, 0, 0, 0, 0, 0}, /* 0x4A (01001010) */ - {1, 2, 4, 7, 0, 0, 0, 0}, /* 0x4B (01001011) */ - {3, 4, 7, 0, 0, 0, 0, 0}, /* 0x4C (01001100) */ - {1, 3, 4, 7, 0, 0, 0, 0}, /* 0x4D (01001101) */ - {2, 3, 4, 7, 0, 0, 0, 0}, /* 0x4E (01001110) */ - {1, 2, 3, 4, 7, 0, 0, 0}, /* 0x4F (01001111) */ - {5, 7, 0, 0, 0, 0, 0, 0}, /* 0x50 (01010000) */ - {1, 5, 7, 0, 0, 0, 0, 0}, /* 0x51 (01010001) */ - {2, 5, 7, 0, 0, 0, 0, 0}, /* 0x52 (01010010) */ - {1, 2, 5, 7, 0, 0, 0, 0}, /* 0x53 (01010011) */ - {3, 5, 7, 0, 0, 0, 0, 0}, /* 0x54 (01010100) */ - {1, 3, 5, 7, 0, 0, 0, 0}, /* 0x55 (01010101) */ - {2, 3, 5, 7, 0, 0, 0, 0}, /* 0x56 (01010110) */ - {1, 2, 3, 5, 7, 0, 0, 0}, /* 0x57 (01010111) */ - {4, 5, 7, 0, 0, 0, 0, 0}, /* 0x58 (01011000) */ - {1, 4, 5, 7, 0, 0, 0, 0}, /* 0x59 (01011001) */ - {2, 4, 5, 7, 0, 0, 0, 0}, /* 0x5A (01011010) */ - {1, 2, 4, 5, 7, 0, 0, 0}, /* 0x5B (01011011) */ - {3, 4, 5, 7, 0, 0, 0, 0}, /* 0x5C (01011100) */ - {1, 3, 4, 5, 7, 0, 0, 0}, /* 0x5D (01011101) */ - {2, 3, 4, 5, 7, 0, 0, 0}, /* 0x5E (01011110) */ - {1, 2, 3, 4, 5, 7, 0, 0}, /* 0x5F (01011111) */ - {6, 7, 0, 0, 0, 0, 0, 0}, /* 0x60 (01100000) */ - {1, 6, 7, 0, 0, 0, 0, 0}, /* 0x61 (01100001) */ - {2, 6, 7, 0, 0, 0, 0, 0}, /* 0x62 (01100010) */ - {1, 2, 6, 7, 0, 0, 0, 0}, /* 0x63 (01100011) */ - {3, 6, 7, 0, 0, 0, 0, 0}, /* 0x64 (01100100) */ - {1, 3, 6, 7, 0, 0, 0, 0}, /* 0x65 (01100101) */ - {2, 3, 6, 7, 0, 0, 0, 0}, /* 0x66 (01100110) */ - {1, 2, 3, 6, 7, 0, 0, 0}, /* 0x67 (01100111) */ - {4, 6, 7, 0, 0, 0, 0, 0}, /* 0x68 (01101000) */ - {1, 4, 6, 7, 0, 0, 0, 0}, /* 0x69 (01101001) */ - {2, 4, 6, 7, 0, 0, 0, 0}, /* 0x6A (01101010) */ - {1, 2, 4, 6, 7, 0, 0, 0}, /* 0x6B (01101011) */ - {3, 4, 6, 7, 0, 0, 0, 0}, /* 0x6C (01101100) */ - {1, 3, 4, 6, 7, 0, 0, 0}, /* 0x6D (01101101) */ - {2, 3, 4, 6, 7, 0, 0, 0}, /* 0x6E (01101110) */ - {1, 2, 3, 4, 6, 7, 0, 0}, /* 0x6F (01101111) */ - {5, 6, 7, 0, 0, 0, 0, 0}, /* 0x70 (01110000) */ - {1, 5, 6, 7, 0, 0, 0, 0}, /* 0x71 (01110001) */ - {2, 5, 6, 7, 0, 0, 0, 0}, /* 0x72 (01110010) */ - {1, 2, 5, 6, 7, 0, 0, 0}, /* 0x73 (01110011) */ - {3, 5, 6, 7, 0, 0, 0, 0}, /* 0x74 (01110100) */ - {1, 3, 5, 6, 7, 0, 0, 0}, /* 0x75 (01110101) */ - {2, 3, 5, 6, 7, 0, 0, 0}, /* 0x76 (01110110) */ - {1, 2, 3, 5, 6, 7, 0, 0}, /* 0x77 (01110111) */ - {4, 5, 6, 7, 0, 0, 0, 0}, /* 0x78 (01111000) */ - {1, 4, 5, 6, 7, 0, 0, 0}, /* 0x79 (01111001) */ - {2, 4, 5, 6, 7, 0, 0, 0}, /* 0x7A (01111010) */ - {1, 2, 4, 5, 6, 7, 0, 0}, /* 0x7B (01111011) */ - {3, 4, 5, 6, 7, 0, 0, 0}, /* 0x7C (01111100) */ - {1, 3, 4, 5, 6, 7, 0, 0}, /* 0x7D (01111101) */ - {2, 3, 4, 5, 6, 7, 0, 0}, /* 0x7E (01111110) */ - {1, 2, 3, 4, 5, 6, 7, 0}, /* 0x7F (01111111) */ - {8, 0, 0, 0, 0, 0, 0, 0}, /* 0x80 (10000000) */ - {1, 8, 0, 0, 0, 0, 0, 0}, /* 0x81 (10000001) */ - {2, 8, 0, 0, 0, 0, 0, 0}, /* 0x82 (10000010) */ - {1, 2, 8, 0, 0, 0, 0, 0}, /* 0x83 (10000011) */ - {3, 8, 0, 0, 0, 0, 0, 0}, /* 0x84 (10000100) */ - {1, 3, 8, 0, 0, 0, 0, 0}, /* 0x85 (10000101) */ - {2, 3, 8, 0, 0, 0, 0, 0}, /* 0x86 (10000110) */ - {1, 2, 3, 8, 0, 0, 0, 0}, /* 0x87 (10000111) */ - {4, 8, 0, 0, 0, 0, 0, 0}, /* 0x88 (10001000) */ - {1, 4, 8, 0, 0, 0, 0, 0}, /* 0x89 (10001001) */ - {2, 4, 8, 0, 0, 0, 0, 0}, /* 0x8A (10001010) */ - {1, 2, 4, 8, 0, 0, 0, 0}, /* 0x8B (10001011) */ - {3, 4, 8, 0, 0, 0, 0, 0}, /* 0x8C (10001100) */ - {1, 3, 4, 8, 0, 0, 0, 0}, /* 0x8D (10001101) */ - {2, 3, 4, 8, 0, 0, 0, 0}, /* 0x8E (10001110) */ - {1, 2, 3, 4, 8, 0, 0, 0}, /* 0x8F (10001111) */ - {5, 8, 0, 0, 0, 0, 0, 0}, /* 0x90 (10010000) */ - {1, 5, 8, 0, 0, 0, 0, 0}, /* 0x91 (10010001) */ - {2, 5, 8, 0, 0, 0, 0, 0}, /* 0x92 (10010010) */ - {1, 2, 5, 8, 0, 0, 0, 0}, /* 0x93 (10010011) */ - {3, 5, 8, 0, 0, 0, 0, 0}, /* 0x94 (10010100) */ - {1, 3, 5, 8, 0, 0, 0, 0}, /* 0x95 (10010101) */ - {2, 3, 5, 8, 0, 0, 0, 0}, /* 0x96 (10010110) */ - {1, 2, 3, 5, 8, 0, 0, 0}, /* 0x97 (10010111) */ - {4, 5, 8, 0, 0, 0, 0, 0}, /* 0x98 (10011000) */ - {1, 4, 5, 8, 0, 0, 0, 0}, /* 0x99 (10011001) */ - {2, 4, 5, 8, 0, 0, 0, 0}, /* 0x9A (10011010) */ - {1, 2, 4, 5, 8, 0, 0, 0}, /* 0x9B (10011011) */ - {3, 4, 5, 8, 0, 0, 0, 0}, /* 0x9C (10011100) */ - {1, 3, 4, 5, 8, 0, 0, 0}, /* 0x9D (10011101) */ - {2, 3, 4, 5, 8, 0, 0, 0}, /* 0x9E (10011110) */ - {1, 2, 3, 4, 5, 8, 0, 0}, /* 0x9F (10011111) */ - {6, 8, 0, 0, 0, 0, 0, 0}, /* 0xA0 (10100000) */ - {1, 6, 8, 0, 0, 0, 0, 0}, /* 0xA1 (10100001) */ - {2, 6, 8, 0, 0, 0, 0, 0}, /* 0xA2 (10100010) */ - {1, 2, 6, 8, 0, 0, 0, 0}, /* 0xA3 (10100011) */ - {3, 6, 8, 0, 0, 0, 0, 0}, /* 0xA4 (10100100) */ - {1, 3, 6, 8, 0, 0, 0, 0}, /* 0xA5 (10100101) */ - {2, 3, 6, 8, 0, 0, 0, 0}, /* 0xA6 (10100110) */ - {1, 2, 3, 6, 8, 0, 0, 0}, /* 0xA7 (10100111) */ - {4, 6, 8, 0, 0, 0, 0, 0}, /* 0xA8 (10101000) */ - {1, 4, 6, 8, 0, 0, 0, 0}, /* 0xA9 (10101001) */ - {2, 4, 6, 8, 0, 0, 0, 0}, /* 0xAA (10101010) */ - {1, 2, 4, 6, 8, 0, 0, 0}, /* 0xAB (10101011) */ - {3, 4, 6, 8, 0, 0, 0, 0}, /* 0xAC (10101100) */ - {1, 3, 4, 6, 8, 0, 0, 0}, /* 0xAD (10101101) */ - {2, 3, 4, 6, 8, 0, 0, 0}, /* 0xAE (10101110) */ - {1, 2, 3, 4, 6, 8, 0, 0}, /* 0xAF (10101111) */ - {5, 6, 8, 0, 0, 0, 0, 0}, /* 0xB0 (10110000) */ - {1, 5, 6, 8, 0, 0, 0, 0}, /* 0xB1 (10110001) */ - {2, 5, 6, 8, 0, 0, 0, 0}, /* 0xB2 (10110010) */ - {1, 2, 5, 6, 8, 0, 0, 0}, /* 0xB3 (10110011) */ - {3, 5, 6, 8, 0, 0, 0, 0}, /* 0xB4 (10110100) */ - {1, 3, 5, 6, 8, 0, 0, 0}, /* 0xB5 (10110101) */ - {2, 3, 5, 6, 8, 0, 0, 0}, /* 0xB6 (10110110) */ - {1, 2, 3, 5, 6, 8, 0, 0}, /* 0xB7 (10110111) */ - {4, 5, 6, 8, 0, 0, 0, 0}, /* 0xB8 (10111000) */ - {1, 4, 5, 6, 8, 0, 0, 0}, /* 0xB9 (10111001) */ - {2, 4, 5, 6, 8, 0, 0, 0}, /* 0xBA (10111010) */ - {1, 2, 4, 5, 6, 8, 0, 0}, /* 0xBB (10111011) */ - {3, 4, 5, 6, 8, 0, 0, 0}, /* 0xBC (10111100) */ - {1, 3, 4, 5, 6, 8, 0, 0}, /* 0xBD (10111101) */ - {2, 3, 4, 5, 6, 8, 0, 0}, /* 0xBE (10111110) */ - {1, 2, 3, 4, 5, 6, 8, 0}, /* 0xBF (10111111) */ - {7, 8, 0, 0, 0, 0, 0, 0}, /* 0xC0 (11000000) */ - {1, 7, 8, 0, 0, 0, 0, 0}, /* 0xC1 (11000001) */ - {2, 7, 8, 0, 0, 0, 0, 0}, /* 0xC2 (11000010) */ - {1, 2, 7, 8, 0, 0, 0, 0}, /* 0xC3 (11000011) */ - {3, 7, 8, 0, 0, 0, 0, 0}, /* 0xC4 (11000100) */ - {1, 3, 7, 8, 0, 0, 0, 0}, /* 0xC5 (11000101) */ - {2, 3, 7, 8, 0, 0, 0, 0}, /* 0xC6 (11000110) */ - {1, 2, 3, 7, 8, 0, 0, 0}, /* 0xC7 (11000111) */ - {4, 7, 8, 0, 0, 0, 0, 0}, /* 0xC8 (11001000) */ - {1, 4, 7, 8, 0, 0, 0, 0}, /* 0xC9 (11001001) */ - {2, 4, 7, 8, 0, 0, 0, 0}, /* 0xCA (11001010) */ - {1, 2, 4, 7, 8, 0, 0, 0}, /* 0xCB (11001011) */ - {3, 4, 7, 8, 0, 0, 0, 0}, /* 0xCC (11001100) */ - {1, 3, 4, 7, 8, 0, 0, 0}, /* 0xCD (11001101) */ - {2, 3, 4, 7, 8, 0, 0, 0}, /* 0xCE (11001110) */ - {1, 2, 3, 4, 7, 8, 0, 0}, /* 0xCF (11001111) */ - {5, 7, 8, 0, 0, 0, 0, 0}, /* 0xD0 (11010000) */ - {1, 5, 7, 8, 0, 0, 0, 0}, /* 0xD1 (11010001) */ - {2, 5, 7, 8, 0, 0, 0, 0}, /* 0xD2 (11010010) */ - {1, 2, 5, 7, 8, 0, 0, 0}, /* 0xD3 (11010011) */ - {3, 5, 7, 8, 0, 0, 0, 0}, /* 0xD4 (11010100) */ - {1, 3, 5, 7, 8, 0, 0, 0}, /* 0xD5 (11010101) */ - {2, 3, 5, 7, 8, 0, 0, 0}, /* 0xD6 (11010110) */ - {1, 2, 3, 5, 7, 8, 0, 0}, /* 0xD7 (11010111) */ - {4, 5, 7, 8, 0, 0, 0, 0}, /* 0xD8 (11011000) */ - {1, 4, 5, 7, 8, 0, 0, 0}, /* 0xD9 (11011001) */ - {2, 4, 5, 7, 8, 0, 0, 0}, /* 0xDA (11011010) */ - {1, 2, 4, 5, 7, 8, 0, 0}, /* 0xDB (11011011) */ - {3, 4, 5, 7, 8, 0, 0, 0}, /* 0xDC (11011100) */ - {1, 3, 4, 5, 7, 8, 0, 0}, /* 0xDD (11011101) */ - {2, 3, 4, 5, 7, 8, 0, 0}, /* 0xDE (11011110) */ - {1, 2, 3, 4, 5, 7, 8, 0}, /* 0xDF (11011111) */ - {6, 7, 8, 0, 0, 0, 0, 0}, /* 0xE0 (11100000) */ - {1, 6, 7, 8, 0, 0, 0, 0}, /* 0xE1 (11100001) */ - {2, 6, 7, 8, 0, 0, 0, 0}, /* 0xE2 (11100010) */ - {1, 2, 6, 7, 8, 0, 0, 0}, /* 0xE3 (11100011) */ - {3, 6, 7, 8, 0, 0, 0, 0}, /* 0xE4 (11100100) */ - {1, 3, 6, 7, 8, 0, 0, 0}, /* 0xE5 (11100101) */ - {2, 3, 6, 7, 8, 0, 0, 0}, /* 0xE6 (11100110) */ - {1, 2, 3, 6, 7, 8, 0, 0}, /* 0xE7 (11100111) */ - {4, 6, 7, 8, 0, 0, 0, 0}, /* 0xE8 (11101000) */ - {1, 4, 6, 7, 8, 0, 0, 0}, /* 0xE9 (11101001) */ - {2, 4, 6, 7, 8, 0, 0, 0}, /* 0xEA (11101010) */ - {1, 2, 4, 6, 7, 8, 0, 0}, /* 0xEB (11101011) */ - {3, 4, 6, 7, 8, 0, 0, 0}, /* 0xEC (11101100) */ - {1, 3, 4, 6, 7, 8, 0, 0}, /* 0xED (11101101) */ - {2, 3, 4, 6, 7, 8, 0, 0}, /* 0xEE (11101110) */ - {1, 2, 3, 4, 6, 7, 8, 0}, /* 0xEF (11101111) */ - {5, 6, 7, 8, 0, 0, 0, 0}, /* 0xF0 (11110000) */ - {1, 5, 6, 7, 8, 0, 0, 0}, /* 0xF1 (11110001) */ - {2, 5, 6, 7, 8, 0, 0, 0}, /* 0xF2 (11110010) */ - {1, 2, 5, 6, 7, 8, 0, 0}, /* 0xF3 (11110011) */ - {3, 5, 6, 7, 8, 0, 0, 0}, /* 0xF4 (11110100) */ - {1, 3, 5, 6, 7, 8, 0, 0}, /* 0xF5 (11110101) */ - {2, 3, 5, 6, 7, 8, 0, 0}, /* 0xF6 (11110110) */ - {1, 2, 3, 5, 6, 7, 8, 0}, /* 0xF7 (11110111) */ - {4, 5, 6, 7, 8, 0, 0, 0}, /* 0xF8 (11111000) */ - {1, 4, 5, 6, 7, 8, 0, 0}, /* 0xF9 (11111001) */ - {2, 4, 5, 6, 7, 8, 0, 0}, /* 0xFA (11111010) */ - {1, 2, 4, 5, 6, 7, 8, 0}, /* 0xFB (11111011) */ - {3, 4, 5, 6, 7, 8, 0, 0}, /* 0xFC (11111100) */ - {1, 3, 4, 5, 6, 7, 8, 0}, /* 0xFD (11111101) */ - {2, 3, 4, 5, 6, 7, 8, 0}, /* 0xFE (11111110) */ - {1, 2, 3, 4, 5, 6, 7, 8} /* 0xFF (11111111) */ -}; +#endif -#endif // #ifdef USEAVX +#ifdef __cplusplus +} } // extern "C" { namespace roaring { +#endif -#ifdef IS_X64 -// same as vecDecodeTable but in 16 bits -ALIGNED(32) -static uint16_t vecDecodeTable_uint16[256][8] = { - {0, 0, 0, 0, 0, 0, 0, 0}, /* 0x00 (00000000) */ - {1, 0, 0, 0, 0, 0, 0, 0}, /* 0x01 (00000001) */ - {2, 0, 0, 0, 0, 0, 0, 0}, /* 0x02 (00000010) */ - {1, 2, 0, 0, 0, 0, 0, 0}, /* 0x03 (00000011) */ - {3, 0, 0, 0, 0, 0, 0, 0}, /* 0x04 (00000100) */ - {1, 3, 0, 0, 0, 0, 0, 0}, /* 0x05 (00000101) */ - {2, 3, 0, 0, 0, 0, 0, 0}, /* 0x06 (00000110) */ - {1, 2, 3, 0, 0, 0, 0, 0}, /* 0x07 (00000111) */ - {4, 0, 0, 0, 0, 0, 0, 0}, /* 0x08 (00001000) */ - {1, 4, 0, 0, 0, 0, 0, 0}, /* 0x09 (00001001) */ - {2, 4, 0, 0, 0, 0, 0, 0}, /* 0x0A (00001010) */ - {1, 2, 4, 0, 0, 0, 0, 0}, /* 0x0B (00001011) */ - {3, 4, 0, 0, 0, 0, 0, 0}, /* 0x0C (00001100) */ - {1, 3, 4, 0, 0, 0, 0, 0}, /* 0x0D (00001101) */ - {2, 3, 4, 0, 0, 0, 0, 0}, /* 0x0E (00001110) */ - {1, 2, 3, 4, 0, 0, 0, 0}, /* 0x0F (00001111) */ - {5, 0, 0, 0, 0, 0, 0, 0}, /* 0x10 (00010000) */ - {1, 5, 0, 0, 0, 0, 0, 0}, /* 0x11 (00010001) */ - {2, 5, 0, 0, 0, 0, 0, 0}, /* 0x12 (00010010) */ - {1, 2, 5, 0, 0, 0, 0, 0}, /* 0x13 (00010011) */ - {3, 5, 0, 0, 0, 0, 0, 0}, /* 0x14 (00010100) */ - {1, 3, 5, 0, 0, 0, 0, 0}, /* 0x15 (00010101) */ - {2, 3, 5, 0, 0, 0, 0, 0}, /* 0x16 (00010110) */ - {1, 2, 3, 5, 0, 0, 0, 0}, /* 0x17 (00010111) */ - {4, 5, 0, 0, 0, 0, 0, 0}, /* 0x18 (00011000) */ - {1, 4, 5, 0, 0, 0, 0, 0}, /* 0x19 (00011001) */ - {2, 4, 5, 0, 0, 0, 0, 0}, /* 0x1A (00011010) */ - {1, 2, 4, 5, 0, 0, 0, 0}, /* 0x1B (00011011) */ - {3, 4, 5, 0, 0, 0, 0, 0}, /* 0x1C (00011100) */ - {1, 3, 4, 5, 0, 0, 0, 0}, /* 0x1D (00011101) */ - {2, 3, 4, 5, 0, 0, 0, 0}, /* 0x1E (00011110) */ - {1, 2, 3, 4, 5, 0, 0, 0}, /* 0x1F (00011111) */ - {6, 0, 0, 0, 0, 0, 0, 0}, /* 0x20 (00100000) */ - {1, 6, 0, 0, 0, 0, 0, 0}, /* 0x21 (00100001) */ - {2, 6, 0, 0, 0, 0, 0, 0}, /* 0x22 (00100010) */ - {1, 2, 6, 0, 0, 0, 0, 0}, /* 0x23 (00100011) */ - {3, 6, 0, 0, 0, 0, 0, 0}, /* 0x24 (00100100) */ - {1, 3, 6, 0, 0, 0, 0, 0}, /* 0x25 (00100101) */ - {2, 3, 6, 0, 0, 0, 0, 0}, /* 0x26 (00100110) */ - {1, 2, 3, 6, 0, 0, 0, 0}, /* 0x27 (00100111) */ - {4, 6, 0, 0, 0, 0, 0, 0}, /* 0x28 (00101000) */ - {1, 4, 6, 0, 0, 0, 0, 0}, /* 0x29 (00101001) */ - {2, 4, 6, 0, 0, 0, 0, 0}, /* 0x2A (00101010) */ - {1, 2, 4, 6, 0, 0, 0, 0}, /* 0x2B (00101011) */ - {3, 4, 6, 0, 0, 0, 0, 0}, /* 0x2C (00101100) */ - {1, 3, 4, 6, 0, 0, 0, 0}, /* 0x2D (00101101) */ - {2, 3, 4, 6, 0, 0, 0, 0}, /* 0x2E (00101110) */ - {1, 2, 3, 4, 6, 0, 0, 0}, /* 0x2F (00101111) */ - {5, 6, 0, 0, 0, 0, 0, 0}, /* 0x30 (00110000) */ - {1, 5, 6, 0, 0, 0, 0, 0}, /* 0x31 (00110001) */ - {2, 5, 6, 0, 0, 0, 0, 0}, /* 0x32 (00110010) */ - {1, 2, 5, 6, 0, 0, 0, 0}, /* 0x33 (00110011) */ - {3, 5, 6, 0, 0, 0, 0, 0}, /* 0x34 (00110100) */ - {1, 3, 5, 6, 0, 0, 0, 0}, /* 0x35 (00110101) */ - {2, 3, 5, 6, 0, 0, 0, 0}, /* 0x36 (00110110) */ - {1, 2, 3, 5, 6, 0, 0, 0}, /* 0x37 (00110111) */ - {4, 5, 6, 0, 0, 0, 0, 0}, /* 0x38 (00111000) */ - {1, 4, 5, 6, 0, 0, 0, 0}, /* 0x39 (00111001) */ - {2, 4, 5, 6, 0, 0, 0, 0}, /* 0x3A (00111010) */ - {1, 2, 4, 5, 6, 0, 0, 0}, /* 0x3B (00111011) */ - {3, 4, 5, 6, 0, 0, 0, 0}, /* 0x3C (00111100) */ - {1, 3, 4, 5, 6, 0, 0, 0}, /* 0x3D (00111101) */ - {2, 3, 4, 5, 6, 0, 0, 0}, /* 0x3E (00111110) */ - {1, 2, 3, 4, 5, 6, 0, 0}, /* 0x3F (00111111) */ - {7, 0, 0, 0, 0, 0, 0, 0}, /* 0x40 (01000000) */ - {1, 7, 0, 0, 0, 0, 0, 0}, /* 0x41 (01000001) */ - {2, 7, 0, 0, 0, 0, 0, 0}, /* 0x42 (01000010) */ - {1, 2, 7, 0, 0, 0, 0, 0}, /* 0x43 (01000011) */ - {3, 7, 0, 0, 0, 0, 0, 0}, /* 0x44 (01000100) */ - {1, 3, 7, 0, 0, 0, 0, 0}, /* 0x45 (01000101) */ - {2, 3, 7, 0, 0, 0, 0, 0}, /* 0x46 (01000110) */ - {1, 2, 3, 7, 0, 0, 0, 0}, /* 0x47 (01000111) */ - {4, 7, 0, 0, 0, 0, 0, 0}, /* 0x48 (01001000) */ - {1, 4, 7, 0, 0, 0, 0, 0}, /* 0x49 (01001001) */ - {2, 4, 7, 0, 0, 0, 0, 0}, /* 0x4A (01001010) */ - {1, 2, 4, 7, 0, 0, 0, 0}, /* 0x4B (01001011) */ - {3, 4, 7, 0, 0, 0, 0, 0}, /* 0x4C (01001100) */ - {1, 3, 4, 7, 0, 0, 0, 0}, /* 0x4D (01001101) */ - {2, 3, 4, 7, 0, 0, 0, 0}, /* 0x4E (01001110) */ - {1, 2, 3, 4, 7, 0, 0, 0}, /* 0x4F (01001111) */ - {5, 7, 0, 0, 0, 0, 0, 0}, /* 0x50 (01010000) */ - {1, 5, 7, 0, 0, 0, 0, 0}, /* 0x51 (01010001) */ - {2, 5, 7, 0, 0, 0, 0, 0}, /* 0x52 (01010010) */ - {1, 2, 5, 7, 0, 0, 0, 0}, /* 0x53 (01010011) */ - {3, 5, 7, 0, 0, 0, 0, 0}, /* 0x54 (01010100) */ - {1, 3, 5, 7, 0, 0, 0, 0}, /* 0x55 (01010101) */ - {2, 3, 5, 7, 0, 0, 0, 0}, /* 0x56 (01010110) */ - {1, 2, 3, 5, 7, 0, 0, 0}, /* 0x57 (01010111) */ - {4, 5, 7, 0, 0, 0, 0, 0}, /* 0x58 (01011000) */ - {1, 4, 5, 7, 0, 0, 0, 0}, /* 0x59 (01011001) */ - {2, 4, 5, 7, 0, 0, 0, 0}, /* 0x5A (01011010) */ - {1, 2, 4, 5, 7, 0, 0, 0}, /* 0x5B (01011011) */ - {3, 4, 5, 7, 0, 0, 0, 0}, /* 0x5C (01011100) */ - {1, 3, 4, 5, 7, 0, 0, 0}, /* 0x5D (01011101) */ - {2, 3, 4, 5, 7, 0, 0, 0}, /* 0x5E (01011110) */ - {1, 2, 3, 4, 5, 7, 0, 0}, /* 0x5F (01011111) */ - {6, 7, 0, 0, 0, 0, 0, 0}, /* 0x60 (01100000) */ - {1, 6, 7, 0, 0, 0, 0, 0}, /* 0x61 (01100001) */ - {2, 6, 7, 0, 0, 0, 0, 0}, /* 0x62 (01100010) */ - {1, 2, 6, 7, 0, 0, 0, 0}, /* 0x63 (01100011) */ - {3, 6, 7, 0, 0, 0, 0, 0}, /* 0x64 (01100100) */ - {1, 3, 6, 7, 0, 0, 0, 0}, /* 0x65 (01100101) */ - {2, 3, 6, 7, 0, 0, 0, 0}, /* 0x66 (01100110) */ - {1, 2, 3, 6, 7, 0, 0, 0}, /* 0x67 (01100111) */ - {4, 6, 7, 0, 0, 0, 0, 0}, /* 0x68 (01101000) */ - {1, 4, 6, 7, 0, 0, 0, 0}, /* 0x69 (01101001) */ - {2, 4, 6, 7, 0, 0, 0, 0}, /* 0x6A (01101010) */ - {1, 2, 4, 6, 7, 0, 0, 0}, /* 0x6B (01101011) */ - {3, 4, 6, 7, 0, 0, 0, 0}, /* 0x6C (01101100) */ - {1, 3, 4, 6, 7, 0, 0, 0}, /* 0x6D (01101101) */ - {2, 3, 4, 6, 7, 0, 0, 0}, /* 0x6E (01101110) */ - {1, 2, 3, 4, 6, 7, 0, 0}, /* 0x6F (01101111) */ - {5, 6, 7, 0, 0, 0, 0, 0}, /* 0x70 (01110000) */ - {1, 5, 6, 7, 0, 0, 0, 0}, /* 0x71 (01110001) */ - {2, 5, 6, 7, 0, 0, 0, 0}, /* 0x72 (01110010) */ - {1, 2, 5, 6, 7, 0, 0, 0}, /* 0x73 (01110011) */ - {3, 5, 6, 7, 0, 0, 0, 0}, /* 0x74 (01110100) */ - {1, 3, 5, 6, 7, 0, 0, 0}, /* 0x75 (01110101) */ - {2, 3, 5, 6, 7, 0, 0, 0}, /* 0x76 (01110110) */ - {1, 2, 3, 5, 6, 7, 0, 0}, /* 0x77 (01110111) */ - {4, 5, 6, 7, 0, 0, 0, 0}, /* 0x78 (01111000) */ - {1, 4, 5, 6, 7, 0, 0, 0}, /* 0x79 (01111001) */ - {2, 4, 5, 6, 7, 0, 0, 0}, /* 0x7A (01111010) */ - {1, 2, 4, 5, 6, 7, 0, 0}, /* 0x7B (01111011) */ - {3, 4, 5, 6, 7, 0, 0, 0}, /* 0x7C (01111100) */ - {1, 3, 4, 5, 6, 7, 0, 0}, /* 0x7D (01111101) */ - {2, 3, 4, 5, 6, 7, 0, 0}, /* 0x7E (01111110) */ - {1, 2, 3, 4, 5, 6, 7, 0}, /* 0x7F (01111111) */ - {8, 0, 0, 0, 0, 0, 0, 0}, /* 0x80 (10000000) */ - {1, 8, 0, 0, 0, 0, 0, 0}, /* 0x81 (10000001) */ - {2, 8, 0, 0, 0, 0, 0, 0}, /* 0x82 (10000010) */ - {1, 2, 8, 0, 0, 0, 0, 0}, /* 0x83 (10000011) */ - {3, 8, 0, 0, 0, 0, 0, 0}, /* 0x84 (10000100) */ - {1, 3, 8, 0, 0, 0, 0, 0}, /* 0x85 (10000101) */ - {2, 3, 8, 0, 0, 0, 0, 0}, /* 0x86 (10000110) */ - {1, 2, 3, 8, 0, 0, 0, 0}, /* 0x87 (10000111) */ - {4, 8, 0, 0, 0, 0, 0, 0}, /* 0x88 (10001000) */ - {1, 4, 8, 0, 0, 0, 0, 0}, /* 0x89 (10001001) */ - {2, 4, 8, 0, 0, 0, 0, 0}, /* 0x8A (10001010) */ - {1, 2, 4, 8, 0, 0, 0, 0}, /* 0x8B (10001011) */ - {3, 4, 8, 0, 0, 0, 0, 0}, /* 0x8C (10001100) */ - {1, 3, 4, 8, 0, 0, 0, 0}, /* 0x8D (10001101) */ - {2, 3, 4, 8, 0, 0, 0, 0}, /* 0x8E (10001110) */ - {1, 2, 3, 4, 8, 0, 0, 0}, /* 0x8F (10001111) */ - {5, 8, 0, 0, 0, 0, 0, 0}, /* 0x90 (10010000) */ - {1, 5, 8, 0, 0, 0, 0, 0}, /* 0x91 (10010001) */ - {2, 5, 8, 0, 0, 0, 0, 0}, /* 0x92 (10010010) */ - {1, 2, 5, 8, 0, 0, 0, 0}, /* 0x93 (10010011) */ - {3, 5, 8, 0, 0, 0, 0, 0}, /* 0x94 (10010100) */ - {1, 3, 5, 8, 0, 0, 0, 0}, /* 0x95 (10010101) */ - {2, 3, 5, 8, 0, 0, 0, 0}, /* 0x96 (10010110) */ - {1, 2, 3, 5, 8, 0, 0, 0}, /* 0x97 (10010111) */ - {4, 5, 8, 0, 0, 0, 0, 0}, /* 0x98 (10011000) */ - {1, 4, 5, 8, 0, 0, 0, 0}, /* 0x99 (10011001) */ - {2, 4, 5, 8, 0, 0, 0, 0}, /* 0x9A (10011010) */ - {1, 2, 4, 5, 8, 0, 0, 0}, /* 0x9B (10011011) */ - {3, 4, 5, 8, 0, 0, 0, 0}, /* 0x9C (10011100) */ - {1, 3, 4, 5, 8, 0, 0, 0}, /* 0x9D (10011101) */ - {2, 3, 4, 5, 8, 0, 0, 0}, /* 0x9E (10011110) */ - {1, 2, 3, 4, 5, 8, 0, 0}, /* 0x9F (10011111) */ - {6, 8, 0, 0, 0, 0, 0, 0}, /* 0xA0 (10100000) */ - {1, 6, 8, 0, 0, 0, 0, 0}, /* 0xA1 (10100001) */ - {2, 6, 8, 0, 0, 0, 0, 0}, /* 0xA2 (10100010) */ - {1, 2, 6, 8, 0, 0, 0, 0}, /* 0xA3 (10100011) */ - {3, 6, 8, 0, 0, 0, 0, 0}, /* 0xA4 (10100100) */ - {1, 3, 6, 8, 0, 0, 0, 0}, /* 0xA5 (10100101) */ - {2, 3, 6, 8, 0, 0, 0, 0}, /* 0xA6 (10100110) */ - {1, 2, 3, 6, 8, 0, 0, 0}, /* 0xA7 (10100111) */ - {4, 6, 8, 0, 0, 0, 0, 0}, /* 0xA8 (10101000) */ - {1, 4, 6, 8, 0, 0, 0, 0}, /* 0xA9 (10101001) */ - {2, 4, 6, 8, 0, 0, 0, 0}, /* 0xAA (10101010) */ - {1, 2, 4, 6, 8, 0, 0, 0}, /* 0xAB (10101011) */ - {3, 4, 6, 8, 0, 0, 0, 0}, /* 0xAC (10101100) */ - {1, 3, 4, 6, 8, 0, 0, 0}, /* 0xAD (10101101) */ - {2, 3, 4, 6, 8, 0, 0, 0}, /* 0xAE (10101110) */ - {1, 2, 3, 4, 6, 8, 0, 0}, /* 0xAF (10101111) */ - {5, 6, 8, 0, 0, 0, 0, 0}, /* 0xB0 (10110000) */ - {1, 5, 6, 8, 0, 0, 0, 0}, /* 0xB1 (10110001) */ - {2, 5, 6, 8, 0, 0, 0, 0}, /* 0xB2 (10110010) */ - {1, 2, 5, 6, 8, 0, 0, 0}, /* 0xB3 (10110011) */ - {3, 5, 6, 8, 0, 0, 0, 0}, /* 0xB4 (10110100) */ - {1, 3, 5, 6, 8, 0, 0, 0}, /* 0xB5 (10110101) */ - {2, 3, 5, 6, 8, 0, 0, 0}, /* 0xB6 (10110110) */ - {1, 2, 3, 5, 6, 8, 0, 0}, /* 0xB7 (10110111) */ - {4, 5, 6, 8, 0, 0, 0, 0}, /* 0xB8 (10111000) */ - {1, 4, 5, 6, 8, 0, 0, 0}, /* 0xB9 (10111001) */ - {2, 4, 5, 6, 8, 0, 0, 0}, /* 0xBA (10111010) */ - {1, 2, 4, 5, 6, 8, 0, 0}, /* 0xBB (10111011) */ - {3, 4, 5, 6, 8, 0, 0, 0}, /* 0xBC (10111100) */ - {1, 3, 4, 5, 6, 8, 0, 0}, /* 0xBD (10111101) */ - {2, 3, 4, 5, 6, 8, 0, 0}, /* 0xBE (10111110) */ - {1, 2, 3, 4, 5, 6, 8, 0}, /* 0xBF (10111111) */ - {7, 8, 0, 0, 0, 0, 0, 0}, /* 0xC0 (11000000) */ - {1, 7, 8, 0, 0, 0, 0, 0}, /* 0xC1 (11000001) */ - {2, 7, 8, 0, 0, 0, 0, 0}, /* 0xC2 (11000010) */ - {1, 2, 7, 8, 0, 0, 0, 0}, /* 0xC3 (11000011) */ - {3, 7, 8, 0, 0, 0, 0, 0}, /* 0xC4 (11000100) */ - {1, 3, 7, 8, 0, 0, 0, 0}, /* 0xC5 (11000101) */ - {2, 3, 7, 8, 0, 0, 0, 0}, /* 0xC6 (11000110) */ - {1, 2, 3, 7, 8, 0, 0, 0}, /* 0xC7 (11000111) */ - {4, 7, 8, 0, 0, 0, 0, 0}, /* 0xC8 (11001000) */ - {1, 4, 7, 8, 0, 0, 0, 0}, /* 0xC9 (11001001) */ - {2, 4, 7, 8, 0, 0, 0, 0}, /* 0xCA (11001010) */ - {1, 2, 4, 7, 8, 0, 0, 0}, /* 0xCB (11001011) */ - {3, 4, 7, 8, 0, 0, 0, 0}, /* 0xCC (11001100) */ - {1, 3, 4, 7, 8, 0, 0, 0}, /* 0xCD (11001101) */ - {2, 3, 4, 7, 8, 0, 0, 0}, /* 0xCE (11001110) */ - {1, 2, 3, 4, 7, 8, 0, 0}, /* 0xCF (11001111) */ - {5, 7, 8, 0, 0, 0, 0, 0}, /* 0xD0 (11010000) */ - {1, 5, 7, 8, 0, 0, 0, 0}, /* 0xD1 (11010001) */ - {2, 5, 7, 8, 0, 0, 0, 0}, /* 0xD2 (11010010) */ - {1, 2, 5, 7, 8, 0, 0, 0}, /* 0xD3 (11010011) */ - {3, 5, 7, 8, 0, 0, 0, 0}, /* 0xD4 (11010100) */ - {1, 3, 5, 7, 8, 0, 0, 0}, /* 0xD5 (11010101) */ - {2, 3, 5, 7, 8, 0, 0, 0}, /* 0xD6 (11010110) */ - {1, 2, 3, 5, 7, 8, 0, 0}, /* 0xD7 (11010111) */ - {4, 5, 7, 8, 0, 0, 0, 0}, /* 0xD8 (11011000) */ - {1, 4, 5, 7, 8, 0, 0, 0}, /* 0xD9 (11011001) */ - {2, 4, 5, 7, 8, 0, 0, 0}, /* 0xDA (11011010) */ - {1, 2, 4, 5, 7, 8, 0, 0}, /* 0xDB (11011011) */ - {3, 4, 5, 7, 8, 0, 0, 0}, /* 0xDC (11011100) */ - {1, 3, 4, 5, 7, 8, 0, 0}, /* 0xDD (11011101) */ - {2, 3, 4, 5, 7, 8, 0, 0}, /* 0xDE (11011110) */ - {1, 2, 3, 4, 5, 7, 8, 0}, /* 0xDF (11011111) */ - {6, 7, 8, 0, 0, 0, 0, 0}, /* 0xE0 (11100000) */ - {1, 6, 7, 8, 0, 0, 0, 0}, /* 0xE1 (11100001) */ - {2, 6, 7, 8, 0, 0, 0, 0}, /* 0xE2 (11100010) */ - {1, 2, 6, 7, 8, 0, 0, 0}, /* 0xE3 (11100011) */ - {3, 6, 7, 8, 0, 0, 0, 0}, /* 0xE4 (11100100) */ - {1, 3, 6, 7, 8, 0, 0, 0}, /* 0xE5 (11100101) */ - {2, 3, 6, 7, 8, 0, 0, 0}, /* 0xE6 (11100110) */ - {1, 2, 3, 6, 7, 8, 0, 0}, /* 0xE7 (11100111) */ - {4, 6, 7, 8, 0, 0, 0, 0}, /* 0xE8 (11101000) */ - {1, 4, 6, 7, 8, 0, 0, 0}, /* 0xE9 (11101001) */ - {2, 4, 6, 7, 8, 0, 0, 0}, /* 0xEA (11101010) */ - {1, 2, 4, 6, 7, 8, 0, 0}, /* 0xEB (11101011) */ - {3, 4, 6, 7, 8, 0, 0, 0}, /* 0xEC (11101100) */ - {1, 3, 4, 6, 7, 8, 0, 0}, /* 0xED (11101101) */ - {2, 3, 4, 6, 7, 8, 0, 0}, /* 0xEE (11101110) */ - {1, 2, 3, 4, 6, 7, 8, 0}, /* 0xEF (11101111) */ - {5, 6, 7, 8, 0, 0, 0, 0}, /* 0xF0 (11110000) */ - {1, 5, 6, 7, 8, 0, 0, 0}, /* 0xF1 (11110001) */ - {2, 5, 6, 7, 8, 0, 0, 0}, /* 0xF2 (11110010) */ - {1, 2, 5, 6, 7, 8, 0, 0}, /* 0xF3 (11110011) */ - {3, 5, 6, 7, 8, 0, 0, 0}, /* 0xF4 (11110100) */ - {1, 3, 5, 6, 7, 8, 0, 0}, /* 0xF5 (11110101) */ - {2, 3, 5, 6, 7, 8, 0, 0}, /* 0xF6 (11110110) */ - {1, 2, 3, 5, 6, 7, 8, 0}, /* 0xF7 (11110111) */ - {4, 5, 6, 7, 8, 0, 0, 0}, /* 0xF8 (11111000) */ - {1, 4, 5, 6, 7, 8, 0, 0}, /* 0xF9 (11111001) */ - {2, 4, 5, 6, 7, 8, 0, 0}, /* 0xFA (11111010) */ - {1, 2, 4, 5, 6, 7, 8, 0}, /* 0xFB (11111011) */ - {3, 4, 5, 6, 7, 8, 0, 0}, /* 0xFC (11111100) */ - {1, 3, 4, 5, 6, 7, 8, 0}, /* 0xFD (11111101) */ - {2, 3, 4, 5, 6, 7, 8, 0}, /* 0xFE (11111110) */ - {1, 2, 3, 4, 5, 6, 7, 8} /* 0xFF (11111111) */ -}; +#endif /* INCLUDE_UTILASM_H_ */ +/* end file include/roaring/utilasm.h */ +/* begin file include/roaring/bitset_util.h */ +#ifndef BITSET_UTIL_H +#define BITSET_UTIL_H + +#include + +#if CROARING_IS_X64 +#ifndef CROARING_COMPILER_SUPPORTS_AVX512 +#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined." +#endif // CROARING_COMPILER_SUPPORTS_AVX512 #endif -#ifdef USEAVX - -size_t bitset_extract_setbits_avx2(uint64_t *array, size_t length, void *vout, - size_t outcapacity, uint32_t base) { - uint32_t *out = (uint32_t *)vout; - uint32_t *initout = out; - __m256i baseVec = _mm256_set1_epi32(base - 1); - __m256i incVec = _mm256_set1_epi32(64); - __m256i add8 = _mm256_set1_epi32(8); - uint32_t *safeout = out + outcapacity; - size_t i = 0; - for (; (i < length) && (out + 64 <= safeout); ++i) { - uint64_t w = array[i]; - if (w == 0) { - baseVec = _mm256_add_epi32(baseVec, incVec); - } else { - for (int k = 0; k < 4; ++k) { - uint8_t byteA = (uint8_t)w; - uint8_t byteB = (uint8_t)(w >> 8); - w >>= 16; - __m256i vecA = - _mm256_load_si256((const __m256i *)vecDecodeTable[byteA]); - __m256i vecB = - _mm256_load_si256((const __m256i *)vecDecodeTable[byteB]); - uint8_t advanceA = lengthTable[byteA]; - uint8_t advanceB = lengthTable[byteB]; - vecA = _mm256_add_epi32(baseVec, vecA); - baseVec = _mm256_add_epi32(baseVec, add8); - vecB = _mm256_add_epi32(baseVec, vecB); - baseVec = _mm256_add_epi32(baseVec, add8); - _mm256_storeu_si256((__m256i *)out, vecA); - out += advanceA; - _mm256_storeu_si256((__m256i *)out, vecB); - out += advanceB; - } - } - } - base += i * 64; - for (; (i < length) && (out < safeout); ++i) { - uint64_t w = array[i]; - while ((w != 0) && (out < safeout)) { - uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail) - int r = __builtin_ctzll(w); // on x64, should compile to TZCNT - uint32_t val = r + base; - memcpy(out, &val, - sizeof(uint32_t)); // should be compiled as a MOV on x64 - out++; - w ^= t; - } - base += 64; - } - return out - initout; -} -#endif // USEAVX - -size_t bitset_extract_setbits(uint64_t *bitset, size_t length, void *vout, - uint32_t base) { - int outpos = 0; - uint32_t *out = (uint32_t *)vout; - for (size_t i = 0; i < length; ++i) { - uint64_t w = bitset[i]; - while (w != 0) { - uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail) - int r = __builtin_ctzll(w); // on x64, should compile to TZCNT - uint32_t val = r + base; - memcpy(out + outpos, &val, - sizeof(uint32_t)); // should be compiled as a MOV on x64 - outpos++; - w ^= t; - } - base += 64; - } - return outpos; -} - -size_t bitset_extract_intersection_setbits_uint16(const uint64_t * __restrict__ bitset1, - const uint64_t * __restrict__ bitset2, - size_t length, uint16_t *out, - uint16_t base) { - int outpos = 0; - for (size_t i = 0; i < length; ++i) { - uint64_t w = bitset1[i] & bitset2[i]; - while (w != 0) { - uint64_t t = w & (~w + 1); - int r = __builtin_ctzll(w); - out[outpos++] = r + base; - w ^= t; - } - base += 64; - } - return outpos; +#ifdef __cplusplus +extern "C" { namespace roaring { namespace internal { +#endif + +/* + * Set all bits in indexes [begin,end) to true. + */ +static inline void bitset_set_range(uint64_t *words, uint32_t start, +uint32_t end) { +if (start == end) return; +uint32_t firstword = start / 64; +uint32_t endword = (end - 1) / 64; +if (firstword == endword) { +words[firstword] |= ((~UINT64_C(0)) << (start % 64)) & +((~UINT64_C(0)) >> ((~end + 1) % 64)); +return; +} +words[firstword] |= (~UINT64_C(0)) << (start % 64); +for (uint32_t i = firstword + 1; i < endword; i++) { +words[i] = ~UINT64_C(0); +} +words[endword] |= (~UINT64_C(0)) >> ((~end + 1) % 64); +} + + +/* + * Find the cardinality of the bitset in [begin,begin+lenminusone] + */ +static inline int bitset_lenrange_cardinality(const uint64_t *words, +uint32_t start, +uint32_t lenminusone) { +uint32_t firstword = start / 64; +uint32_t endword = (start + lenminusone) / 64; +if (firstword == endword) { +return roaring_hamming(words[firstword] & +((~UINT64_C(0)) >> ((63 - lenminusone) % 64)) +<< (start % 64)); +} +int answer = roaring_hamming(words[firstword] & ((~UINT64_C(0)) << (start % 64))); +for (uint32_t i = firstword + 1; i < endword; i++) { +answer += roaring_hamming(words[i]); +} +answer += +roaring_hamming(words[endword] & +(~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64)); +return answer; +} + +/* + * Check whether the cardinality of the bitset in [begin,begin+lenminusone] is 0 + */ +static inline bool bitset_lenrange_empty(const uint64_t *words, uint32_t start, +uint32_t lenminusone) { +uint32_t firstword = start / 64; +uint32_t endword = (start + lenminusone) / 64; +if (firstword == endword) { +return (words[firstword] & ((~UINT64_C(0)) >> ((63 - lenminusone) % 64)) +<< (start % 64)) == 0; +} +if (((words[firstword] & ((~UINT64_C(0)) << (start%64)))) != 0) { +return false; +} +for (uint32_t i = firstword + 1; i < endword; i++) { +if (words[i] != 0) { +return false; +} +} +if ((words[endword] & (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64)) != 0) { +return false; +} +return true; +} + + +/* + * Set all bits in indexes [begin,begin+lenminusone] to true. + */ +static inline void bitset_set_lenrange(uint64_t *words, uint32_t start, +uint32_t lenminusone) { +uint32_t firstword = start / 64; +uint32_t endword = (start + lenminusone) / 64; +if (firstword == endword) { +words[firstword] |= ((~UINT64_C(0)) >> ((63 - lenminusone) % 64)) +<< (start % 64); +return; +} +uint64_t temp = words[endword]; +words[firstword] |= (~UINT64_C(0)) << (start % 64); +for (uint32_t i = firstword + 1; i < endword; i += 2) +words[i] = words[i + 1] = ~UINT64_C(0); +words[endword] = +temp | (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64); +} + +/* + * Flip all the bits in indexes [begin,end). + */ +static inline void bitset_flip_range(uint64_t *words, uint32_t start, +uint32_t end) { +if (start == end) return; +uint32_t firstword = start / 64; +uint32_t endword = (end - 1) / 64; +words[firstword] ^= ~((~UINT64_C(0)) << (start % 64)); +for (uint32_t i = firstword; i < endword; i++) { +words[i] = ~words[i]; +} +words[endword] ^= ((~UINT64_C(0)) >> ((~end + 1) % 64)); +} + +/* + * Set all bits in indexes [begin,end) to false. + */ +static inline void bitset_reset_range(uint64_t *words, uint32_t start, +uint32_t end) { +if (start == end) return; +uint32_t firstword = start / 64; +uint32_t endword = (end - 1) / 64; +if (firstword == endword) { +words[firstword] &= ~(((~UINT64_C(0)) << (start % 64)) & +((~UINT64_C(0)) >> ((~end + 1) % 64))); +return; +} +words[firstword] &= ~((~UINT64_C(0)) << (start % 64)); +for (uint32_t i = firstword + 1; i < endword; i++) { +words[i] = UINT64_C(0); } +words[endword] &= ~((~UINT64_C(0)) >> ((~end + 1) % 64)); +} + +/* + * Given a bitset containing "length" 64-bit words, write out the position + * of all the set bits to "out", values start at "base". + * + * The "out" pointer should be sufficient to store the actual number of bits + * set. + * + * Returns how many values were actually decoded. + * + * This function should only be expected to be faster than + * bitset_extract_setbits + * when the density of the bitset is high. + * + * This function uses AVX2 decoding. + */ +size_t bitset_extract_setbits_avx2(const uint64_t *words, size_t length, +uint32_t *out, size_t outcapacity, +uint32_t base); + +size_t bitset_extract_setbits_avx512(const uint64_t *words, size_t length, +uint32_t *out, size_t outcapacity, +uint32_t base); +/* + * Given a bitset containing "length" 64-bit words, write out the position + * of all the set bits to "out", values start at "base". + * + * The "out" pointer should be sufficient to store the actual number of bits + *set. + * + * Returns how many values were actually decoded. + */ +size_t bitset_extract_setbits(const uint64_t *words, size_t length, +uint32_t *out, uint32_t base); -#ifdef IS_X64 /* * Given a bitset containing "length" 64-bit words, write out the position * of all the set bits to "out" as 16-bit integers, values start at "base" (can - *be set to zero). + *be set to zero) * * The "out" pointer should be sufficient to store the actual number of bits *set. * * Returns how many values were actually decoded. * + * This function should only be expected to be faster than + *bitset_extract_setbits_uint16 + * when the density of the bitset is high. + * * This function uses SSE decoding. */ -size_t bitset_extract_setbits_sse_uint16(const uint64_t *bitset, size_t length, - uint16_t *out, size_t outcapacity, - uint16_t base) { - uint16_t *initout = out; - __m128i baseVec = _mm_set1_epi16(base - 1); - __m128i incVec = _mm_set1_epi16(64); - __m128i add8 = _mm_set1_epi16(8); - uint16_t *safeout = out + outcapacity; - const int numberofbytes = 2; // process two bytes at a time - size_t i = 0; - for (; (i < length) && (out + numberofbytes * 8 <= safeout); ++i) { - uint64_t w = bitset[i]; - if (w == 0) { - baseVec = _mm_add_epi16(baseVec, incVec); - } else { - for (int k = 0; k < 4; ++k) { - uint8_t byteA = (uint8_t)w; - uint8_t byteB = (uint8_t)(w >> 8); - w >>= 16; - __m128i vecA = _mm_load_si128( - (const __m128i *)vecDecodeTable_uint16[byteA]); - __m128i vecB = _mm_load_si128( - (const __m128i *)vecDecodeTable_uint16[byteB]); - uint8_t advanceA = lengthTable[byteA]; - uint8_t advanceB = lengthTable[byteB]; - vecA = _mm_add_epi16(baseVec, vecA); - baseVec = _mm_add_epi16(baseVec, add8); - vecB = _mm_add_epi16(baseVec, vecB); - baseVec = _mm_add_epi16(baseVec, add8); - _mm_storeu_si128((__m128i *)out, vecA); - out += advanceA; - _mm_storeu_si128((__m128i *)out, vecB); - out += advanceB; - } - } - } - base += (uint16_t)(i * 64); - for (; (i < length) && (out < safeout); ++i) { - uint64_t w = bitset[i]; - while ((w != 0) && (out < safeout)) { - uint64_t t = w & (~w + 1); - int r = __builtin_ctzll(w); - *out = r + base; - out++; - w ^= t; - } - base += 64; - } - return out - initout; -} -#endif +size_t bitset_extract_setbits_sse_uint16(const uint64_t *words, size_t length, +uint16_t *out, size_t outcapacity, +uint16_t base); + +size_t bitset_extract_setbits_avx512_uint16(const uint64_t *words, size_t length, +uint16_t *out, size_t outcapacity, +uint16_t base); /* * Given a bitset containing "length" 64-bit words, write out the position - * of all the set bits to "out", values start at "base" (can be set to zero). + * of all the set bits to "out", values start at "base" + * (can be set to zero) * * The "out" pointer should be sufficient to store the actual number of bits *set. * * Returns how many values were actually decoded. */ -size_t bitset_extract_setbits_uint16(const uint64_t *bitset, size_t length, - uint16_t *out, uint16_t base) { - int outpos = 0; - for (size_t i = 0; i < length; ++i) { - uint64_t w = bitset[i]; - while (w != 0) { - uint64_t t = w & (~w + 1); - int r = __builtin_ctzll(w); - out[outpos++] = r + base; - w ^= t; - } - base += 64; - } - return outpos; -} +size_t bitset_extract_setbits_uint16(const uint64_t *words, size_t length, +uint16_t *out, uint16_t base); + +/* + * Given two bitsets containing "length" 64-bit words, write out the position + * of all the common set bits to "out", values start at "base" + * (can be set to zero) + * + * The "out" pointer should be sufficient to store the actual number of bits + * set. + * + * Returns how many values were actually decoded. + */ +size_t bitset_extract_intersection_setbits_uint16(const uint64_t * __restrict__ words1, +const uint64_t * __restrict__ words2, +size_t length, uint16_t *out, +uint16_t base); -#if defined(ASMBITMANIPOPTIMIZATION) +/* + * Given a bitset having cardinality card, set all bit values in the list (there + * are length of them) + * and return the updated cardinality. This evidently assumes that the bitset + * already contained data. + */ +uint64_t bitset_set_list_withcard(uint64_t *words, uint64_t card, +const uint16_t *list, uint64_t length); +/* + * Given a bitset, set all bit values in the list (there + * are length of them). + */ +void bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length); -uint64_t bitset_set_list_withcard(void *bitset, uint64_t card, - const uint16_t *list, uint64_t length) { - uint64_t offset, load, pos; - uint64_t shift = 6; - const uint16_t *end = list + length; - if (!length) return card; - // TODO: could unroll for performance, see bitset_set_list - // bts is not available as an intrinsic in GCC - __asm volatile( - "1:\n" - "movzwq (%[list]), %[pos]\n" - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[bitset],%[offset],8), %[load]\n" - "bts %[pos], %[load]\n" - "mov %[load], (%[bitset],%[offset],8)\n" - "sbb $-1, %[card]\n" - "add $2, %[list]\n" - "cmp %[list], %[end]\n" - "jnz 1b" - : [card] "+&r"(card), [list] "+&r"(list), [load] "=&r"(load), - [pos] "=&r"(pos), [offset] "=&r"(offset) - : [end] "r"(end), [bitset] "r"(bitset), [shift] "r"(shift)); - return card; -} - -void bitset_set_list(void *bitset, const uint16_t *list, uint64_t length) { - uint64_t pos; - const uint16_t *end = list + length; +/* + * Given a bitset having cardinality card, unset all bit values in the list + * (there are length of them) + * and return the updated cardinality. This evidently assumes that the bitset + * already contained data. + */ +uint64_t bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list, +uint64_t length); - uint64_t shift = 6; - uint64_t offset; - uint64_t load; - for (; list + 3 < end; list += 4) { - pos = list[0]; - __asm volatile( - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[bitset],%[offset],8), %[load]\n" - "bts %[pos], %[load]\n" - "mov %[load], (%[bitset],%[offset],8)" - : [load] "=&r"(load), [offset] "=&r"(offset) - : [bitset] "r"(bitset), [shift] "r"(shift), [pos] "r"(pos)); - pos = list[1]; - __asm volatile( - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[bitset],%[offset],8), %[load]\n" - "bts %[pos], %[load]\n" - "mov %[load], (%[bitset],%[offset],8)" - : [load] "=&r"(load), [offset] "=&r"(offset) - : [bitset] "r"(bitset), [shift] "r"(shift), [pos] "r"(pos)); - pos = list[2]; - __asm volatile( - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[bitset],%[offset],8), %[load]\n" - "bts %[pos], %[load]\n" - "mov %[load], (%[bitset],%[offset],8)" - : [load] "=&r"(load), [offset] "=&r"(offset) - : [bitset] "r"(bitset), [shift] "r"(shift), [pos] "r"(pos)); - pos = list[3]; - __asm volatile( - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[bitset],%[offset],8), %[load]\n" - "bts %[pos], %[load]\n" - "mov %[load], (%[bitset],%[offset],8)" - : [load] "=&r"(load), [offset] "=&r"(offset) - : [bitset] "r"(bitset), [shift] "r"(shift), [pos] "r"(pos)); - } +/* + * Given a bitset having cardinality card, toggle all bit values in the list + * (there are length of them) + * and return the updated cardinality. This evidently assumes that the bitset + * already contained data. + */ - while (list != end) { - pos = list[0]; - __asm volatile( - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[bitset],%[offset],8), %[load]\n" - "bts %[pos], %[load]\n" - "mov %[load], (%[bitset],%[offset],8)" - : [load] "=&r"(load), [offset] "=&r"(offset) - : [bitset] "r"(bitset), [shift] "r"(shift), [pos] "r"(pos)); - list++; - } -} +uint64_t bitset_flip_list_withcard(uint64_t *words, uint64_t card, +const uint16_t *list, uint64_t length); -uint64_t bitset_clear_list(void *bitset, uint64_t card, const uint16_t *list, - uint64_t length) { - uint64_t offset, load, pos; - uint64_t shift = 6; - const uint16_t *end = list + length; - if (!length) return card; - // btr is not available as an intrinsic in GCC - __asm volatile( - "1:\n" - "movzwq (%[list]), %[pos]\n" - "shrx %[shift], %[pos], %[offset]\n" - "mov (%[bitset],%[offset],8), %[load]\n" - "btr %[pos], %[load]\n" - "mov %[load], (%[bitset],%[offset],8)\n" - "sbb $0, %[card]\n" - "add $2, %[list]\n" - "cmp %[list], %[end]\n" - "jnz 1b" - : [card] "+&r"(card), [list] "+&r"(list), [load] "=&r"(load), - [pos] "=&r"(pos), [offset] "=&r"(offset) - : [end] "r"(end), [bitset] "r"(bitset), [shift] "r"(shift) - : - /* clobbers */ "memory"); - return card; -} +void bitset_flip_list(uint64_t *words, const uint16_t *list, uint64_t length); -#else -uint64_t bitset_clear_list(void *bitset, uint64_t card, const uint16_t *list, - uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *(const uint16_t *)list; - offset = pos >> 6; - index = pos % 64; - load = ((uint64_t *)bitset)[offset]; - newload = load & ~(UINT64_C(1) << index); - card -= (load ^ newload) >> index; - ((uint64_t *)bitset)[offset] = newload; - list++; - } - return card; -} - -uint64_t bitset_set_list_withcard(void *bitset, uint64_t card, - const uint16_t *list, uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *(const uint16_t *)list; - offset = pos >> 6; - index = pos % 64; - load = ((uint64_t *)bitset)[offset]; - newload = load | (UINT64_C(1) << index); - card += (load ^ newload) >> index; - ((uint64_t *)bitset)[offset] = newload; - list++; - } - return card; -} - -void bitset_set_list(void *bitset, const uint16_t *list, uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *(const uint16_t *)list; - offset = pos >> 6; - index = pos % 64; - load = ((uint64_t *)bitset)[offset]; - newload = load | (UINT64_C(1) << index); - ((uint64_t *)bitset)[offset] = newload; - list++; - } +#if CROARING_IS_X64 +/*** + * BEGIN Harley-Seal popcount functions. + */ +CROARING_TARGET_AVX2 +/** + * Compute the population count of a 256-bit word + * This is not especially fast, but it is convenient as part of other functions. + */ +static inline __m256i popcount256(__m256i v) { +const __m256i lookuppos = _mm256_setr_epi8( +/* 0 */ 4 + 0, /* 1 */ 4 + 1, /* 2 */ 4 + 1, /* 3 */ 4 + 2, +/* 4 */ 4 + 1, /* 5 */ 4 + 2, /* 6 */ 4 + 2, /* 7 */ 4 + 3, +/* 8 */ 4 + 1, /* 9 */ 4 + 2, /* a */ 4 + 2, /* b */ 4 + 3, +/* c */ 4 + 2, /* d */ 4 + 3, /* e */ 4 + 3, /* f */ 4 + 4, + +/* 0 */ 4 + 0, /* 1 */ 4 + 1, /* 2 */ 4 + 1, /* 3 */ 4 + 2, +/* 4 */ 4 + 1, /* 5 */ 4 + 2, /* 6 */ 4 + 2, /* 7 */ 4 + 3, +/* 8 */ 4 + 1, /* 9 */ 4 + 2, /* a */ 4 + 2, /* b */ 4 + 3, +/* c */ 4 + 2, /* d */ 4 + 3, /* e */ 4 + 3, /* f */ 4 + 4); +const __m256i lookupneg = _mm256_setr_epi8( +/* 0 */ 4 - 0, /* 1 */ 4 - 1, /* 2 */ 4 - 1, /* 3 */ 4 - 2, +/* 4 */ 4 - 1, /* 5 */ 4 - 2, /* 6 */ 4 - 2, /* 7 */ 4 - 3, +/* 8 */ 4 - 1, /* 9 */ 4 - 2, /* a */ 4 - 2, /* b */ 4 - 3, +/* c */ 4 - 2, /* d */ 4 - 3, /* e */ 4 - 3, /* f */ 4 - 4, + +/* 0 */ 4 - 0, /* 1 */ 4 - 1, /* 2 */ 4 - 1, /* 3 */ 4 - 2, +/* 4 */ 4 - 1, /* 5 */ 4 - 2, /* 6 */ 4 - 2, /* 7 */ 4 - 3, +/* 8 */ 4 - 1, /* 9 */ 4 - 2, /* a */ 4 - 2, /* b */ 4 - 3, +/* c */ 4 - 2, /* d */ 4 - 3, /* e */ 4 - 3, /* f */ 4 - 4); +const __m256i low_mask = _mm256_set1_epi8(0x0f); + +const __m256i lo = _mm256_and_si256(v, low_mask); +const __m256i hi = _mm256_and_si256(_mm256_srli_epi16(v, 4), low_mask); +const __m256i popcnt1 = _mm256_shuffle_epi8(lookuppos, lo); +const __m256i popcnt2 = _mm256_shuffle_epi8(lookupneg, hi); +return _mm256_sad_epu8(popcnt1, popcnt2); +} +CROARING_UNTARGET_AVX2 + +CROARING_TARGET_AVX2 +/** + * Simple CSA over 256 bits + */ +static inline void CSA(__m256i *h, __m256i *l, __m256i a, __m256i b, +__m256i c) { +const __m256i u = _mm256_xor_si256(a, b); +*h = _mm256_or_si256(_mm256_and_si256(a, b), _mm256_and_si256(u, c)); +*l = _mm256_xor_si256(u, c); } +CROARING_UNTARGET_AVX2 + +CROARING_TARGET_AVX2 +/** + * Fast Harley-Seal AVX population count function + */ +inline static uint64_t avx2_harley_seal_popcount256(const __m256i *data, +const uint64_t size) { +__m256i total = _mm256_setzero_si256(); +__m256i ones = _mm256_setzero_si256(); +__m256i twos = _mm256_setzero_si256(); +__m256i fours = _mm256_setzero_si256(); +__m256i eights = _mm256_setzero_si256(); +__m256i sixteens = _mm256_setzero_si256(); +__m256i twosA, twosB, foursA, foursB, eightsA, eightsB; + +const uint64_t limit = size - size % 16; +uint64_t i = 0; + +for (; i < limit; i += 16) { +CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i), +_mm256_lddqu_si256(data + i + 1)); +CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 2), +_mm256_lddqu_si256(data + i + 3)); +CSA(&foursA, &twos, twos, twosA, twosB); +CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i + 4), +_mm256_lddqu_si256(data + i + 5)); +CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 6), +_mm256_lddqu_si256(data + i + 7)); +CSA(&foursB, &twos, twos, twosA, twosB); +CSA(&eightsA, &fours, fours, foursA, foursB); +CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i + 8), +_mm256_lddqu_si256(data + i + 9)); +CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 10), +_mm256_lddqu_si256(data + i + 11)); +CSA(&foursA, &twos, twos, twosA, twosB); +CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i + 12), +_mm256_lddqu_si256(data + i + 13)); +CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 14), +_mm256_lddqu_si256(data + i + 15)); +CSA(&foursB, &twos, twos, twosA, twosB); +CSA(&eightsB, &fours, fours, foursA, foursB); +CSA(&sixteens, &eights, eights, eightsA, eightsB); + +total = _mm256_add_epi64(total, popcount256(sixteens)); +} + +total = _mm256_slli_epi64(total, 4); // * 16 +total = _mm256_add_epi64( +total, _mm256_slli_epi64(popcount256(eights), 3)); // += 8 * ... +total = _mm256_add_epi64( +total, _mm256_slli_epi64(popcount256(fours), 2)); // += 4 * ... +total = _mm256_add_epi64( +total, _mm256_slli_epi64(popcount256(twos), 1)); // += 2 * ... +total = _mm256_add_epi64(total, popcount256(ones)); +for (; i < size; i++) +total = +_mm256_add_epi64(total, popcount256(_mm256_lddqu_si256(data + i))); + +return (uint64_t)(_mm256_extract_epi64(total, 0)) + +(uint64_t)(_mm256_extract_epi64(total, 1)) + +(uint64_t)(_mm256_extract_epi64(total, 2)) + +(uint64_t)(_mm256_extract_epi64(total, 3)); +} +CROARING_UNTARGET_AVX2 + +#define AVXPOPCNTFNC(opname, avx_intrinsic) \ + static inline uint64_t avx2_harley_seal_popcount256_##opname( \ + const __m256i *data1, const __m256i *data2, const uint64_t size) { \ + __m256i total = _mm256_setzero_si256(); \ + __m256i ones = _mm256_setzero_si256(); \ + __m256i twos = _mm256_setzero_si256(); \ + __m256i fours = _mm256_setzero_si256(); \ + __m256i eights = _mm256_setzero_si256(); \ + __m256i sixteens = _mm256_setzero_si256(); \ + __m256i twosA, twosB, foursA, foursB, eightsA, eightsB; \ + __m256i A1, A2; \ + const uint64_t limit = size - size % 16; \ + uint64_t i = 0; \ + for (; i < limit; i += 16) { \ + A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i), \ + _mm256_lddqu_si256(data2 + i)); \ + A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 1), \ + _mm256_lddqu_si256(data2 + i + 1)); \ + CSA(&twosA, &ones, ones, A1, A2); \ + A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 2), \ + _mm256_lddqu_si256(data2 + i + 2)); \ + A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 3), \ + _mm256_lddqu_si256(data2 + i + 3)); \ + CSA(&twosB, &ones, ones, A1, A2); \ + CSA(&foursA, &twos, twos, twosA, twosB); \ + A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 4), \ + _mm256_lddqu_si256(data2 + i + 4)); \ + A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 5), \ + _mm256_lddqu_si256(data2 + i + 5)); \ + CSA(&twosA, &ones, ones, A1, A2); \ + A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 6), \ + _mm256_lddqu_si256(data2 + i + 6)); \ + A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 7), \ + _mm256_lddqu_si256(data2 + i + 7)); \ + CSA(&twosB, &ones, ones, A1, A2); \ + CSA(&foursB, &twos, twos, twosA, twosB); \ + CSA(&eightsA, &fours, fours, foursA, foursB); \ + A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 8), \ + _mm256_lddqu_si256(data2 + i + 8)); \ + A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 9), \ + _mm256_lddqu_si256(data2 + i + 9)); \ + CSA(&twosA, &ones, ones, A1, A2); \ + A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 10), \ + _mm256_lddqu_si256(data2 + i + 10)); \ + A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 11), \ + _mm256_lddqu_si256(data2 + i + 11)); \ + CSA(&twosB, &ones, ones, A1, A2); \ + CSA(&foursA, &twos, twos, twosA, twosB); \ + A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 12), \ + _mm256_lddqu_si256(data2 + i + 12)); \ + A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 13), \ + _mm256_lddqu_si256(data2 + i + 13)); \ + CSA(&twosA, &ones, ones, A1, A2); \ + A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 14), \ + _mm256_lddqu_si256(data2 + i + 14)); \ + A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 15), \ + _mm256_lddqu_si256(data2 + i + 15)); \ + CSA(&twosB, &ones, ones, A1, A2); \ + CSA(&foursB, &twos, twos, twosA, twosB); \ + CSA(&eightsB, &fours, fours, foursA, foursB); \ + CSA(&sixteens, &eights, eights, eightsA, eightsB); \ + total = _mm256_add_epi64(total, popcount256(sixteens)); \ + } \ + total = _mm256_slli_epi64(total, 4); \ + total = _mm256_add_epi64(total, \ + _mm256_slli_epi64(popcount256(eights), 3)); \ + total = \ + _mm256_add_epi64(total, _mm256_slli_epi64(popcount256(fours), 2)); \ + total = \ + _mm256_add_epi64(total, _mm256_slli_epi64(popcount256(twos), 1)); \ + total = _mm256_add_epi64(total, popcount256(ones)); \ + for (; i < size; i++) { \ + A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i), \ + _mm256_lddqu_si256(data2 + i)); \ + total = _mm256_add_epi64(total, popcount256(A1)); \ + } \ + return (uint64_t)(_mm256_extract_epi64(total, 0)) + \ + (uint64_t)(_mm256_extract_epi64(total, 1)) + \ + (uint64_t)(_mm256_extract_epi64(total, 2)) + \ + (uint64_t)(_mm256_extract_epi64(total, 3)); \ + } \ + static inline uint64_t avx2_harley_seal_popcount256andstore_##opname( \ + const __m256i *__restrict__ data1, const __m256i *__restrict__ data2, \ + __m256i *__restrict__ out, const uint64_t size) { \ + __m256i total = _mm256_setzero_si256(); \ + __m256i ones = _mm256_setzero_si256(); \ + __m256i twos = _mm256_setzero_si256(); \ + __m256i fours = _mm256_setzero_si256(); \ + __m256i eights = _mm256_setzero_si256(); \ + __m256i sixteens = _mm256_setzero_si256(); \ + __m256i twosA, twosB, foursA, foursB, eightsA, eightsB; \ + __m256i A1, A2; \ + const uint64_t limit = size - size % 16; \ + uint64_t i = 0; \ + for (; i < limit; i += 16) { \ + A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i), \ + _mm256_lddqu_si256(data2 + i)); \ + _mm256_storeu_si256(out + i, A1); \ + A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 1), \ + _mm256_lddqu_si256(data2 + i + 1)); \ + _mm256_storeu_si256(out + i + 1, A2); \ + CSA(&twosA, &ones, ones, A1, A2); \ + A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 2), \ + _mm256_lddqu_si256(data2 + i + 2)); \ + _mm256_storeu_si256(out + i + 2, A1); \ + A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 3), \ + _mm256_lddqu_si256(data2 + i + 3)); \ + _mm256_storeu_si256(out + i + 3, A2); \ + CSA(&twosB, &ones, ones, A1, A2); \ + CSA(&foursA, &twos, twos, twosA, twosB); \ + A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 4), \ + _mm256_lddqu_si256(data2 + i + 4)); \ + _mm256_storeu_si256(out + i + 4, A1); \ + A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 5), \ + _mm256_lddqu_si256(data2 + i + 5)); \ + _mm256_storeu_si256(out + i + 5, A2); \ + CSA(&twosA, &ones, ones, A1, A2); \ + A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 6), \ + _mm256_lddqu_si256(data2 + i + 6)); \ + _mm256_storeu_si256(out + i + 6, A1); \ + A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 7), \ + _mm256_lddqu_si256(data2 + i + 7)); \ + _mm256_storeu_si256(out + i + 7, A2); \ + CSA(&twosB, &ones, ones, A1, A2); \ + CSA(&foursB, &twos, twos, twosA, twosB); \ + CSA(&eightsA, &fours, fours, foursA, foursB); \ + A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 8), \ + _mm256_lddqu_si256(data2 + i + 8)); \ + _mm256_storeu_si256(out + i + 8, A1); \ + A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 9), \ + _mm256_lddqu_si256(data2 + i + 9)); \ + _mm256_storeu_si256(out + i + 9, A2); \ + CSA(&twosA, &ones, ones, A1, A2); \ + A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 10), \ + _mm256_lddqu_si256(data2 + i + 10)); \ + _mm256_storeu_si256(out + i + 10, A1); \ + A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 11), \ + _mm256_lddqu_si256(data2 + i + 11)); \ + _mm256_storeu_si256(out + i + 11, A2); \ + CSA(&twosB, &ones, ones, A1, A2); \ + CSA(&foursA, &twos, twos, twosA, twosB); \ + A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 12), \ + _mm256_lddqu_si256(data2 + i + 12)); \ + _mm256_storeu_si256(out + i + 12, A1); \ + A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 13), \ + _mm256_lddqu_si256(data2 + i + 13)); \ + _mm256_storeu_si256(out + i + 13, A2); \ + CSA(&twosA, &ones, ones, A1, A2); \ + A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 14), \ + _mm256_lddqu_si256(data2 + i + 14)); \ + _mm256_storeu_si256(out + i + 14, A1); \ + A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 15), \ + _mm256_lddqu_si256(data2 + i + 15)); \ + _mm256_storeu_si256(out + i + 15, A2); \ + CSA(&twosB, &ones, ones, A1, A2); \ + CSA(&foursB, &twos, twos, twosA, twosB); \ + CSA(&eightsB, &fours, fours, foursA, foursB); \ + CSA(&sixteens, &eights, eights, eightsA, eightsB); \ + total = _mm256_add_epi64(total, popcount256(sixteens)); \ + } \ + total = _mm256_slli_epi64(total, 4); \ + total = _mm256_add_epi64(total, \ + _mm256_slli_epi64(popcount256(eights), 3)); \ + total = \ + _mm256_add_epi64(total, _mm256_slli_epi64(popcount256(fours), 2)); \ + total = \ + _mm256_add_epi64(total, _mm256_slli_epi64(popcount256(twos), 1)); \ + total = _mm256_add_epi64(total, popcount256(ones)); \ + for (; i < size; i++) { \ + A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i), \ + _mm256_lddqu_si256(data2 + i)); \ + _mm256_storeu_si256(out + i, A1); \ + total = _mm256_add_epi64(total, popcount256(A1)); \ + } \ + return (uint64_t)(_mm256_extract_epi64(total, 0)) + \ + (uint64_t)(_mm256_extract_epi64(total, 1)) + \ + (uint64_t)(_mm256_extract_epi64(total, 2)) + \ + (uint64_t)(_mm256_extract_epi64(total, 3)); \ + } + +CROARING_TARGET_AVX2 +AVXPOPCNTFNC(or, _mm256_or_si256) +CROARING_UNTARGET_AVX2 + +CROARING_TARGET_AVX2 +AVXPOPCNTFNC(union, _mm256_or_si256) +CROARING_UNTARGET_AVX2 + +CROARING_TARGET_AVX2 +AVXPOPCNTFNC(and, _mm256_and_si256) +CROARING_UNTARGET_AVX2 + +CROARING_TARGET_AVX2 +AVXPOPCNTFNC(intersection, _mm256_and_si256) +CROARING_UNTARGET_AVX2 + +CROARING_TARGET_AVX2 +AVXPOPCNTFNC (xor, _mm256_xor_si256) +CROARING_UNTARGET_AVX2 + +CROARING_TARGET_AVX2 +AVXPOPCNTFNC(andnot, _mm256_andnot_si256) +CROARING_UNTARGET_AVX2 + + +#define VPOPCNT_AND_ADD(ptr, i, accu) \ + const __m512i v##i = _mm512_loadu_si512((const __m512i*)ptr + i); \ + const __m512i p##i = _mm512_popcnt_epi64(v##i); \ + accu = _mm512_add_epi64(accu, p##i); + +#if CROARING_COMPILER_SUPPORTS_AVX512 +CROARING_TARGET_AVX512 +static inline uint64_t sum_epu64_256(const __m256i v) { + +return (uint64_t)(_mm256_extract_epi64(v, 0)) ++ (uint64_t)(_mm256_extract_epi64(v, 1)) ++ (uint64_t)(_mm256_extract_epi64(v, 2)) ++ (uint64_t)(_mm256_extract_epi64(v, 3)); +} + + +static inline uint64_t simd_sum_epu64(const __m512i v) { + +__m256i lo = _mm512_extracti64x4_epi64(v, 0); +__m256i hi = _mm512_extracti64x4_epi64(v, 1); + +return sum_epu64_256(lo) + sum_epu64_256(hi); +} + +static inline uint64_t avx512_vpopcount(const __m512i* data, const uint64_t size) +{ +const uint64_t limit = size - size % 4; +__m512i total = _mm512_setzero_si512(); +uint64_t i = 0; + +for (; i < limit; i += 4) +{ +VPOPCNT_AND_ADD(data + i, 0, total); +VPOPCNT_AND_ADD(data + i, 1, total); +VPOPCNT_AND_ADD(data + i, 2, total); +VPOPCNT_AND_ADD(data + i, 3, total); +} + +for (; i < size; i++) +{ +total = _mm512_add_epi64(total, _mm512_popcnt_epi64(_mm512_loadu_si512(data + i))); +} + +return simd_sum_epu64(total); +} +CROARING_UNTARGET_AVX512 +#endif +#define AVXPOPCNTFNC512(opname, avx_intrinsic) \ + static inline uint64_t avx512_harley_seal_popcount512_##opname( \ + const __m512i *data1, const __m512i *data2, const uint64_t size) { \ + __m512i total = _mm512_setzero_si512(); \ + const uint64_t limit = size - size % 4; \ + uint64_t i = 0; \ + for (; i < limit; i += 4) { \ + __m512i a1 = avx_intrinsic(_mm512_loadu_si512(data1 + i), \ + _mm512_loadu_si512(data2 + i)); \ + total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a1)); \ + __m512i a2 = avx_intrinsic(_mm512_loadu_si512(data1 + i + 1), \ + _mm512_loadu_si512(data2 + i + 1)); \ + total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a2)); \ + __m512i a3 = avx_intrinsic(_mm512_loadu_si512(data1 + i + 2), \ + _mm512_loadu_si512(data2 + i + 2)); \ + total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a3)); \ + __m512i a4 = avx_intrinsic(_mm512_loadu_si512(data1 + i + 3), \ + _mm512_loadu_si512(data2 + i + 3)); \ + total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a4)); \ + } \ + for(; i < size; i++) { \ + __m512i a = avx_intrinsic(_mm512_loadu_si512(data1 + i), \ + _mm512_loadu_si512(data2 + i)); \ + total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a)); \ + } \ + return simd_sum_epu64(total); \ + } \ + static inline uint64_t avx512_harley_seal_popcount512andstore_##opname( \ + const __m512i *__restrict__ data1, const __m512i *__restrict__ data2, \ + __m512i *__restrict__ out, const uint64_t size) { \ + __m512i total = _mm512_setzero_si512(); \ + const uint64_t limit = size - size % 4; \ + uint64_t i = 0; \ + for (; i < limit; i += 4) { \ + __m512i a1 = avx_intrinsic(_mm512_loadu_si512(data1 + i), \ + _mm512_loadu_si512(data2 + i)); \ + _mm512_storeu_si512(out + i, a1); \ + total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a1)); \ + __m512i a2 = avx_intrinsic(_mm512_loadu_si512(data1 + i + 1), \ + _mm512_loadu_si512(data2 + i + 1)); \ + _mm512_storeu_si512(out + i + 1, a2); \ + total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a2)); \ + __m512i a3 = avx_intrinsic(_mm512_loadu_si512(data1 + i + 2), \ + _mm512_loadu_si512(data2 + i + 2)); \ + _mm512_storeu_si512(out + i + 2, a3); \ + total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a3)); \ + __m512i a4 = avx_intrinsic(_mm512_loadu_si512(data1 + i + 3), \ + _mm512_loadu_si512(data2 + i + 3)); \ + _mm512_storeu_si512(out + i + 3, a4); \ + total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a4)); \ + } \ + for(; i < size; i++) { \ + __m512i a = avx_intrinsic(_mm512_loadu_si512(data1 + i), \ + _mm512_loadu_si512(data2 + i)); \ + _mm512_storeu_si512(out + i, a); \ + total = _mm512_add_epi64(total, _mm512_popcnt_epi64(a)); \ + } \ + return simd_sum_epu64(total); \ + } \ + +#if CROARING_COMPILER_SUPPORTS_AVX512 +CROARING_TARGET_AVX512 +AVXPOPCNTFNC512(or, _mm512_or_si512) +AVXPOPCNTFNC512(union, _mm512_or_si512) +AVXPOPCNTFNC512(and, _mm512_and_si512) +AVXPOPCNTFNC512(intersection, _mm512_and_si512) +AVXPOPCNTFNC512(xor, _mm512_xor_si512) +AVXPOPCNTFNC512(andnot, _mm512_andnot_si512) +CROARING_UNTARGET_AVX512 #endif +/*** + * END Harley-Seal popcount functions. + */ -/* flip specified bits */ -/* TODO: consider whether worthwhile to make an asm version */ +#endif // CROARING_IS_X64 -uint64_t bitset_flip_list_withcard(void *bitset, uint64_t card, - const uint16_t *list, uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *(const uint16_t *)list; - offset = pos >> 6; - index = pos % 64; - load = ((uint64_t *)bitset)[offset]; - newload = load ^ (UINT64_C(1) << index); - // todo: is a branch here all that bad? - card += - (1 - 2 * (((UINT64_C(1) << index) & load) >> index)); // +1 or -1 - ((uint64_t *)bitset)[offset] = newload; - list++; - } - return card; -} - -void bitset_flip_list(void *bitset, const uint16_t *list, uint64_t length) { - uint64_t offset, load, newload, pos, index; - const uint16_t *end = list + length; - while (list != end) { - pos = *(const uint16_t *)list; - offset = pos >> 6; - index = pos % 64; - load = ((uint64_t *)bitset)[offset]; - newload = load ^ (UINT64_C(1) << index); - ((uint64_t *)bitset)[offset] = newload; - list++; - } -} -/* end file src/bitset_util.c */ -/* begin file src/containers/array.c */ +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace internal +#endif + +#endif +/* end file include/roaring/bitset_util.h */ +/* begin file include/roaring/containers/array.h */ /* - * array.c + * array.h * */ -#include -#include -#include +#ifndef INCLUDE_CONTAINERS_ARRAY_H_ +#define INCLUDE_CONTAINERS_ARRAY_H_ -extern inline uint16_t array_container_minimum(const array_container_t *arr); -extern inline uint16_t array_container_maximum(const array_container_t *arr); -extern inline int array_container_index_equalorlarger(const array_container_t *arr, uint16_t x); +#include -extern inline int array_container_rank(const array_container_t *arr, - uint16_t x); -extern inline bool array_container_contains(const array_container_t *arr, - uint16_t pos); -extern inline int array_container_cardinality(const array_container_t *array); -extern inline bool array_container_nonzero_cardinality(const array_container_t *array); -extern inline void array_container_clear(array_container_t *array); -extern inline int32_t array_container_serialized_size_in_bytes(int32_t card); -extern inline bool array_container_empty(const array_container_t *array); -extern inline bool array_container_full(const array_container_t *array); -/* Create a new array with capacity size. Return NULL in case of failure. */ -array_container_t *array_container_create_given_capacity(int32_t size) { - array_container_t *container; - if ((container = (array_container_t *)malloc(sizeof(array_container_t))) == - NULL) { - return NULL; - } +#ifdef __cplusplus +extern "C" { namespace roaring { - if( size <= 0 ) { // we don't want to rely on malloc(0) - container->array = NULL; - } else if ((container->array = (uint16_t *)malloc(sizeof(uint16_t) * size)) == - NULL) { - free(container); - return NULL; - } +// Note: in pure C++ code, you should avoid putting `using` in header files +using api::roaring_iterator; +using api::roaring_iterator64; - container->capacity = size; - container->cardinality = 0; +namespace internal { +#endif - return container; -} +/* Containers with DEFAULT_MAX_SIZE or less integers should be arrays */ +enum { DEFAULT_MAX_SIZE = 4096 }; -/* Create a new array. Return NULL in case of failure. */ -array_container_t *array_container_create() { - return array_container_create_given_capacity(ARRAY_DEFAULT_INIT_SIZE); -} +/* struct array_container - sparse representation of a bitmap + * + * @cardinality: number of indices in `array` (and the bitmap) + * @capacity: allocated size of `array` + * @array: sorted list of integers + */ +STRUCT_CONTAINER(array_container_s) { +int32_t cardinality; +int32_t capacity; +uint16_t *array; +}; + +typedef struct array_container_s array_container_t; + +#define CAST_array(c) CAST(array_container_t *, c) // safer downcast +#define const_CAST_array(c) CAST(const array_container_t *, c) +#define movable_CAST_array(c) movable_CAST(array_container_t **, c) + +/* Create a new array with default. Return NULL in case of failure. See also + * array_container_create_given_capacity. */ +array_container_t *array_container_create(void); + +/* Create a new array with a specified capacity size. Return NULL in case of + * failure. */ +array_container_t *array_container_create_given_capacity(int32_t size); /* Create a new array containing all values in [min,max). */ -array_container_t * array_container_create_range(uint32_t min, uint32_t max) { - array_container_t * answer = array_container_create_given_capacity(max - min + 1); - if(answer == NULL) return answer; - answer->cardinality = 0; - for(uint32_t k = min; k < max; k++) { - answer->array[answer->cardinality++] = k; - } - return answer; -} +array_container_t * array_container_create_range(uint32_t min, uint32_t max); -/* Duplicate container */ -array_container_t *array_container_clone(const array_container_t *src) { - array_container_t *newcontainer = - array_container_create_given_capacity(src->capacity); - if (newcontainer == NULL) return NULL; +/* + * Shrink the capacity to the actual size, return the number of bytes saved. + */ +int array_container_shrink_to_fit(array_container_t *src); - newcontainer->cardinality = src->cardinality; +/* Free memory owned by `array'. */ +void array_container_free(array_container_t *array); - memcpy(newcontainer->array, src->array, - src->cardinality * sizeof(uint16_t)); +/* Duplicate container */ +array_container_t *array_container_clone(const array_container_t *src); - return newcontainer; +/* Get the cardinality of `array'. */ +ALLOW_UNALIGNED +static inline int array_container_cardinality(const array_container_t *array) { +return array->cardinality; } -int array_container_shrink_to_fit(array_container_t *src) { - if (src->cardinality == src->capacity) return 0; // nothing to do - int savings = src->capacity - src->cardinality; - src->capacity = src->cardinality; - if( src->capacity == 0) { // we do not want to rely on realloc for zero allocs - free(src->array); - src->array = NULL; - } else { - uint16_t *oldarray = src->array; - src->array = - (uint16_t *)realloc(oldarray, src->capacity * sizeof(uint16_t)); - if (src->array == NULL) free(oldarray); // should never happen? - } - return savings; +static inline bool array_container_nonzero_cardinality( +const array_container_t *array) { +return array->cardinality > 0; } -/* Free memory. */ -void array_container_free(array_container_t *arr) { - if(arr->array != NULL) {// Jon Strabala reports that some tools complain otherwise - free(arr->array); - arr->array = NULL; // pedantic - } - free(arr); +/* Copy one container into another. We assume that they are distinct. */ +void array_container_copy(const array_container_t *src, array_container_t *dst); + +/* Add all the values in [min,max) (included) at a distance k*step from min. + The container must have a size less or equal to DEFAULT_MAX_SIZE after this + addition. */ +void array_container_add_from_range(array_container_t *arr, uint32_t min, +uint32_t max, uint16_t step); + + +static inline bool array_container_empty(const array_container_t *array) { +return array->cardinality == 0; } -static inline int32_t grow_capacity(int32_t capacity) { - return (capacity <= 0) ? ARRAY_DEFAULT_INIT_SIZE - : capacity < 64 ? capacity * 2 - : capacity < 1024 ? capacity * 3 / 2 - : capacity * 5 / 4; +/* check whether the cardinality is equal to the capacity (this does not mean +* that it contains 1<<16 elements) */ +static inline bool array_container_full(const array_container_t *array) { +return array->cardinality == array->capacity; } -static inline int32_t clamp(int32_t val, int32_t min, int32_t max) { - return ((val < min) ? min : (val > max) ? max : val); + +/* Compute the union of `src_1' and `src_2' and write the result to `dst' + * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ +void array_container_union(const array_container_t *src_1, +const array_container_t *src_2, +array_container_t *dst); + +/* symmetric difference, see array_container_union */ +void array_container_xor(const array_container_t *array_1, +const array_container_t *array_2, +array_container_t *out); + +/* Computes the intersection of src_1 and src_2 and write the result to + * dst. It is assumed that dst is distinct from both src_1 and src_2. */ +void array_container_intersection(const array_container_t *src_1, +const array_container_t *src_2, +array_container_t *dst); + +/* Check whether src_1 and src_2 intersect. */ +bool array_container_intersect(const array_container_t *src_1, +const array_container_t *src_2); + + +/* computers the size of the intersection between two arrays. + */ +int array_container_intersection_cardinality(const array_container_t *src_1, +const array_container_t *src_2); + +/* computes the intersection of array1 and array2 and write the result to + * array1. + * */ +void array_container_intersection_inplace(array_container_t *src_1, +const array_container_t *src_2); + +/* + * Write out the 16-bit integers contained in this container as a list of 32-bit + * integers using base + * as the starting value (it might be expected that base has zeros in its 16 + * least significant bits). + * The function returns the number of values written. + * The caller is responsible for allocating enough memory in out. + */ +int array_container_to_uint32_array(void *vout, const array_container_t *cont, +uint32_t base); + +/* Compute the number of runs */ +int32_t array_container_number_of_runs(const array_container_t *ac); + +/* + * Print this container using printf (useful for debugging). + */ +void array_container_printf(const array_container_t *v); + +/* + * Print this container using printf as a comma-separated list of 32-bit + * integers starting at base. + */ +void array_container_printf_as_uint32_array(const array_container_t *v, +uint32_t base); + +bool array_container_validate(const array_container_t *v, const char **reason); + +/** + * Return the serialized size in bytes of a container having cardinality "card". + */ +static inline int32_t array_container_serialized_size_in_bytes(int32_t card) { +return card * 2 + 2; } +/** + * Increase capacity to at least min. + * Whether the existing data needs to be copied over depends on the "preserve" + * parameter. If preserve is false, then the new content will be uninitialized, + * otherwise the old content is copied. + */ void array_container_grow(array_container_t *container, int32_t min, - bool preserve) { - - int32_t max = (min <= DEFAULT_MAX_SIZE ? DEFAULT_MAX_SIZE : 65536); - int32_t new_capacity = clamp(grow_capacity(container->capacity), min, max); - - container->capacity = new_capacity; - uint16_t *array = container->array; - - if (preserve) { - container->array = - (uint16_t *)realloc(array, new_capacity * sizeof(uint16_t)); - if (container->array == NULL) free(array); - } else { - // Jon Strabala reports that some tools complain otherwise - if (array != NULL) { - free(array); - } - container->array = (uint16_t *)malloc(new_capacity * sizeof(uint16_t)); - } +bool preserve); - // handle the case where realloc fails - if (container->array == NULL) { - fprintf(stderr, "could not allocate memory\n"); - } - assert(container->array != NULL); -} +bool array_container_iterate(const array_container_t *cont, uint32_t base, +roaring_iterator iterator, void *ptr); +bool array_container_iterate64(const array_container_t *cont, uint32_t base, +roaring_iterator64 iterator, uint64_t high_bits, +void *ptr); -/* Copy one container into another. We assume that they are distinct. */ -void array_container_copy(const array_container_t *src, - array_container_t *dst) { - const int32_t cardinality = src->cardinality; - if (cardinality > dst->capacity) { - array_container_grow(dst, cardinality, false); - } +/** + * Writes the underlying array to buf, outputs how many bytes were written. + * This is meant to be byte-by-byte compatible with the Java and Go versions of + * Roaring. + * The number of bytes written should be + * array_container_size_in_bytes(container). + * + */ +int32_t array_container_write(const array_container_t *container, char *buf); +/** + * Reads the instance from buf, outputs how many bytes were read. + * This is meant to be byte-by-byte compatible with the Java and Go versions of + * Roaring. + * The number of bytes read should be array_container_size_in_bytes(container). + * You need to provide the (known) cardinality. + */ +int32_t array_container_read(int32_t cardinality, array_container_t *container, +const char *buf); - dst->cardinality = cardinality; - memcpy(dst->array, src->array, cardinality * sizeof(uint16_t)); +/** + * Return the serialized size in bytes of a container (see + * bitset_container_write) + * This is meant to be compatible with the Java and Go versions of Roaring and + * assumes + * that the cardinality of the container is already known. + * + */ +static inline int32_t array_container_size_in_bytes( +const array_container_t *container) { +return container->cardinality * sizeof(uint16_t); } -void array_container_add_from_range(array_container_t *arr, uint32_t min, - uint32_t max, uint16_t step) { - for (uint32_t value = min; value < max; value += step) { - array_container_append(arr, value); - } +/** + * Return true if the two arrays have the same content. + */ +ALLOW_UNALIGNED +static inline bool array_container_equals( +const array_container_t *container1, +const array_container_t *container2) { + +if (container1->cardinality != container2->cardinality) { +return false; +} +return memequals(container1->array, container2->array, container1->cardinality*2); } -/* Computes the union of array1 and array2 and write the result to arrayout. - * It is assumed that arrayout is distinct from both array1 and array2. +/** + * Return true if container1 is a subset of container2. */ -void array_container_union(const array_container_t *array_1, - const array_container_t *array_2, - array_container_t *out) { - const int32_t card_1 = array_1->cardinality, card_2 = array_2->cardinality; - const int32_t max_cardinality = card_1 + card_2; - - if (out->capacity < max_cardinality) { - array_container_grow(out, max_cardinality, false); - } - out->cardinality = (int32_t)fast_union_uint16(array_1->array, card_1, - array_2->array, card_2, out->array); +bool array_container_is_subset(const array_container_t *container1, +const array_container_t *container2); +/** + * If the element of given rank is in this container, supposing that the first + * element has rank start_rank, then the function returns true and sets element + * accordingly. + * Otherwise, it returns false and update start_rank. + */ +static inline bool array_container_select(const array_container_t *container, +uint32_t *start_rank, uint32_t rank, +uint32_t *element) { +int card = array_container_cardinality(container); +if (*start_rank + card <= rank) { +*start_rank += card; +return false; +} else { +*element = container->array[rank - *start_rank]; +return true; +} } /* Computes the difference of array1 and array2 and write the result @@ -3072,6908 +1574,17710 @@ void array_container_union(const array_container_t *array_1, * Array out does not need to be distinct from array_1 */ void array_container_andnot(const array_container_t *array_1, - const array_container_t *array_2, - array_container_t *out) { - if (out->capacity < array_1->cardinality) - array_container_grow(out, array_1->cardinality, false); -#ifdef ROARING_VECTOR_OPERATIONS_ENABLED - if((out != array_1) && (out != array_2)) { - out->cardinality = - difference_vector16(array_1->array, array_1->cardinality, - array_2->array, array_2->cardinality, out->array); - } else { - out->cardinality = - difference_uint16(array_1->array, array_1->cardinality, array_2->array, - array_2->cardinality, out->array); - } -#else - out->cardinality = - difference_uint16(array_1->array, array_1->cardinality, array_2->array, - array_2->cardinality, out->array); -#endif -} +const array_container_t *array_2, +array_container_t *out); -/* Computes the symmetric difference of array1 and array2 and write the - * result - * to arrayout. - * It is assumed that arrayout is distinct from both array1 and array2. - */ -void array_container_xor(const array_container_t *array_1, - const array_container_t *array_2, - array_container_t *out) { - const int32_t card_1 = array_1->cardinality, card_2 = array_2->cardinality; - const int32_t max_cardinality = card_1 + card_2; - if (out->capacity < max_cardinality) { - array_container_grow(out, max_cardinality, false); - } +/* Append x to the set. Assumes that the value is larger than any preceding + * values. */ +static inline void array_container_append(array_container_t *arr, +uint16_t pos) { +const int32_t capacity = arr->capacity; -#ifdef ROARING_VECTOR_OPERATIONS_ENABLED - out->cardinality = - xor_vector16(array_1->array, array_1->cardinality, array_2->array, - array_2->cardinality, out->array); -#else - out->cardinality = - xor_uint16(array_1->array, array_1->cardinality, array_2->array, - array_2->cardinality, out->array); -#endif +if (array_container_full(arr)) { +array_container_grow(arr, capacity + 1, true); } -static inline int32_t minimum_int32(int32_t a, int32_t b) { - return (a < b) ? a : b; +arr->array[arr->cardinality++] = pos; } -/* computes the intersection of array1 and array2 and write the result to - * arrayout. - * It is assumed that arrayout is distinct from both array1 and array2. - * */ -void array_container_intersection(const array_container_t *array1, - const array_container_t *array2, - array_container_t *out) { - int32_t card_1 = array1->cardinality, card_2 = array2->cardinality, - min_card = minimum_int32(card_1, card_2); - const int threshold = 64; // subject to tuning -#ifdef USEAVX - if (out->capacity < min_card) { - array_container_grow(out, min_card + sizeof(__m128i) / sizeof(uint16_t), - false); - } -#else - if (out->capacity < min_card) { - array_container_grow(out, min_card, false); - } -#endif +/** + * Add value to the set if final cardinality doesn't exceed max_cardinality. + * Return code: + * 1 -- value was added + * 0 -- value was already present + * -1 -- value was not added because cardinality would exceed max_cardinality + */ +static inline int array_container_try_add(array_container_t *arr, uint16_t value, +int32_t max_cardinality) { +const int32_t cardinality = arr->cardinality; - if (card_1 * threshold < card_2) { - out->cardinality = intersect_skewed_uint16( - array1->array, card_1, array2->array, card_2, out->array); - } else if (card_2 * threshold < card_1) { - out->cardinality = intersect_skewed_uint16( - array2->array, card_2, array1->array, card_1, out->array); - } else { -#ifdef USEAVX - out->cardinality = intersect_vector16( - array1->array, card_1, array2->array, card_2, out->array); -#else - out->cardinality = intersect_uint16(array1->array, card_1, - array2->array, card_2, out->array); -#endif - } +// best case, we can append. +if ((array_container_empty(arr) || arr->array[cardinality - 1] < value) && +cardinality < max_cardinality) { +array_container_append(arr, value); +return 1; } -/* computes the size of the intersection of array1 and array2 - * */ -int array_container_intersection_cardinality(const array_container_t *array1, - const array_container_t *array2) { - int32_t card_1 = array1->cardinality, card_2 = array2->cardinality; - const int threshold = 64; // subject to tuning - if (card_1 * threshold < card_2) { - return intersect_skewed_uint16_cardinality(array1->array, card_1, - array2->array, card_2); - } else if (card_2 * threshold < card_1) { - return intersect_skewed_uint16_cardinality(array2->array, card_2, - array1->array, card_1); - } else { -#ifdef USEAVX - return intersect_vector16_cardinality(array1->array, card_1, - array2->array, card_2); -#else - return intersect_uint16_cardinality(array1->array, card_1, - array2->array, card_2); -#endif - } +const int32_t loc = binarySearch(arr->array, cardinality, value); + +if (loc >= 0) { +return 0; +} else if (cardinality < max_cardinality) { +if (array_container_full(arr)) { +array_container_grow(arr, arr->capacity + 1, true); +} +const int32_t insert_idx = -loc - 1; +memmove(arr->array + insert_idx + 1, arr->array + insert_idx, +(cardinality - insert_idx) * sizeof(uint16_t)); +arr->array[insert_idx] = value; +arr->cardinality++; +return 1; +} else { +return -1; +} } -bool array_container_intersect(const array_container_t *array1, - const array_container_t *array2) { - int32_t card_1 = array1->cardinality, card_2 = array2->cardinality; - const int threshold = 64; // subject to tuning - if (card_1 * threshold < card_2) { - return intersect_skewed_uint16_nonempty( - array1->array, card_1, array2->array, card_2); - } else if (card_2 * threshold < card_1) { - return intersect_skewed_uint16_nonempty( - array2->array, card_2, array1->array, card_1); - } else { - // we do not bother vectorizing - return intersect_uint16_nonempty(array1->array, card_1, - array2->array, card_2); - } +/* Add value to the set. Returns true if x was not already present. */ +static inline bool array_container_add(array_container_t *arr, uint16_t value) { +return array_container_try_add(arr, value, INT32_MAX) == 1; } -/* computes the intersection of array1 and array2 and write the result to - * array1. - * */ -void array_container_intersection_inplace(array_container_t *src_1, - const array_container_t *src_2) { - // todo: can any of this be vectorized? - int32_t card_1 = src_1->cardinality, card_2 = src_2->cardinality; - const int threshold = 64; // subject to tuning - if (card_1 * threshold < card_2) { - src_1->cardinality = intersect_skewed_uint16( - src_1->array, card_1, src_2->array, card_2, src_1->array); - } else if (card_2 * threshold < card_1) { - src_1->cardinality = intersect_skewed_uint16( - src_2->array, card_2, src_1->array, card_1, src_1->array); - } else { - src_1->cardinality = intersect_uint16( - src_1->array, card_1, src_2->array, card_2, src_1->array); - } +/* Remove x from the set. Returns true if x was present. */ +static inline bool array_container_remove(array_container_t *arr, +uint16_t pos) { +const int32_t idx = binarySearch(arr->array, arr->cardinality, pos); +const bool is_present = idx >= 0; +if (is_present) { +memmove(arr->array + idx, arr->array + idx + 1, +(arr->cardinality - idx - 1) * sizeof(uint16_t)); +arr->cardinality--; } -int array_container_to_uint32_array(void *vout, const array_container_t *cont, - uint32_t base) { - int outpos = 0; - uint32_t *out = (uint32_t *)vout; - for (int i = 0; i < cont->cardinality; ++i) { - const uint32_t val = base + cont->array[i]; - memcpy(out + outpos, &val, - sizeof(uint32_t)); // should be compiled as a MOV on x64 - outpos++; - } - return outpos; +return is_present; } -void array_container_printf(const array_container_t *v) { - if (v->cardinality == 0) { - printf("{}"); - return; - } - printf("{"); - printf("%d", v->array[0]); - for (int i = 1; i < v->cardinality; ++i) { - printf(",%d", v->array[i]); - } - printf("}"); +/* Check whether x is present. */ +inline bool array_container_contains(const array_container_t *arr, +uint16_t pos) { +// return binarySearch(arr->array, arr->cardinality, pos) >= 0; +// binary search with fallback to linear search for short ranges +int32_t low = 0; +const uint16_t * carr = (const uint16_t *) arr->array; +int32_t high = arr->cardinality - 1; +// while (high - low >= 0) { +while(high >= low + 16) { +int32_t middleIndex = (low + high)>>1; +uint16_t middleValue = carr[middleIndex]; +if (middleValue < pos) { +low = middleIndex + 1; +} else if (middleValue > pos) { +high = middleIndex - 1; +} else { +return true; +} } -void array_container_printf_as_uint32_array(const array_container_t *v, - uint32_t base) { - if (v->cardinality == 0) { - return; - } - printf("%u", v->array[0] + base); - for (int i = 1; i < v->cardinality; ++i) { - printf(",%u", v->array[i] + base); - } +for (int i=low; i <= high; i++) { +uint16_t v = carr[i]; +if (v == pos) { +return true; +} +if ( v > pos ) return false; } +return false; -/* Compute the number of runs */ -int32_t array_container_number_of_runs(const array_container_t *a) { - // Can SIMD work here? - int32_t nr_runs = 0; - int32_t prev = -2; - for (const uint16_t *p = a->array; p != a->array + a->cardinality; ++p) { - if (*p != prev + 1) nr_runs++; - prev = *p; - } - return nr_runs; } -int32_t array_container_serialize(const array_container_t *container, char *buf) { - int32_t l, off; - uint16_t cardinality = (uint16_t)container->cardinality; +void array_container_offset(const array_container_t *c, +container_t **loc, container_t **hic, +uint16_t offset); - memcpy(buf, &cardinality, off = sizeof(cardinality)); - l = sizeof(uint16_t) * container->cardinality; - if (l) memcpy(&buf[off], container->array, l); +//* Check whether a range of values from range_start (included) to range_end (excluded) is present. */ +static inline bool array_container_contains_range(const array_container_t *arr, +uint32_t range_start, uint32_t range_end) { +const int32_t range_count = range_end - range_start; +const uint16_t rs_included = range_start; +const uint16_t re_included = range_end - 1; - return (off + l); +// Empty range is always included +if (range_count <= 0) { +return true; +} +if (range_count > arr->cardinality) { +return false; } -/** - * Writes the underlying array to buf, outputs how many bytes were written. - * The number of bytes written should be - * array_container_size_in_bytes(container). - * - */ -int32_t array_container_write(const array_container_t *container, char *buf) { - memcpy(buf, container->array, container->cardinality * sizeof(uint16_t)); - return array_container_size_in_bytes(container); +const int32_t start = binarySearch(arr->array, arr->cardinality, rs_included); +// If this sorted array contains all items in the range: +// * the start item must be found +// * the last item in range range_count must exist, and be the expected end value +return (start >= 0) && (arr->cardinality >= start + range_count) && +(arr->array[start + range_count - 1] == re_included); } -bool array_container_is_subset(const array_container_t *container1, - const array_container_t *container2) { - if (container1->cardinality > container2->cardinality) { - return false; - } - int i1 = 0, i2 = 0; - while (i1 < container1->cardinality && i2 < container2->cardinality) { - if (container1->array[i1] == container2->array[i2]) { - i1++; - i2++; - } else if (container1->array[i1] > container2->array[i2]) { - i2++; - } else { // container1->array[i1] < container2->array[i2] - return false; - } - } - if (i1 == container1->cardinality) { - return true; - } else { - return false; - } +/* Returns the smallest value (assumes not empty) */ +inline uint16_t array_container_minimum(const array_container_t *arr) { +if (arr->cardinality == 0) return 0; +return arr->array[0]; } -int32_t array_container_read(int32_t cardinality, array_container_t *container, - const char *buf) { - if (container->capacity < cardinality) { - array_container_grow(container, cardinality, false); - } - container->cardinality = cardinality; - memcpy(container->array, buf, container->cardinality * sizeof(uint16_t)); +/* Returns the largest value (assumes not empty) */ +inline uint16_t array_container_maximum(const array_container_t *arr) { +if (arr->cardinality == 0) return 0; +return arr->array[arr->cardinality - 1]; +} + +/* Returns the number of values equal or smaller than x */ +inline int array_container_rank(const array_container_t *arr, uint16_t x) { +const int32_t idx = binarySearch(arr->array, arr->cardinality, x); +const bool is_present = idx >= 0; +if (is_present) { +return idx + 1; +} else { +return -idx - 1; +} +} - return array_container_size_in_bytes(container); +/* Returns the index of x , if not exsist return -1 */ +inline int array_container_get_index(const array_container_t *arr, uint16_t x) { +const int32_t idx = binarySearch(arr->array, arr->cardinality, x); +const bool is_present = idx >= 0; +if (is_present) { +return idx; +} else { +return -1; +} } -uint32_t array_container_serialization_len(const array_container_t *container) { - return (sizeof(uint16_t) /* container->cardinality converted to 16 bit */ + - (sizeof(uint16_t) * container->cardinality)); +/* Returns the index of the first value equal or larger than x, or -1 */ +inline int array_container_index_equalorlarger(const array_container_t *arr, uint16_t x) { +const int32_t idx = binarySearch(arr->array, arr->cardinality, x); +const bool is_present = idx >= 0; +if (is_present) { +return idx; +} else { +int32_t candidate = - idx - 1; +if(candidate < arr->cardinality) return candidate; +return -1; +} } -void *array_container_deserialize(const char *buf, size_t buf_len) { - array_container_t *ptr; +/* + * Adds all values in range [min,max] using hint: + * nvals_less is the number of array values less than $min + * nvals_greater is the number of array values greater than $max + */ +static inline void array_container_add_range_nvals(array_container_t *array, +uint32_t min, uint32_t max, +int32_t nvals_less, +int32_t nvals_greater) { +int32_t union_cardinality = nvals_less + (max - min + 1) + nvals_greater; +if (union_cardinality > array->capacity) { +array_container_grow(array, union_cardinality, true); +} +memmove(&(array->array[union_cardinality - nvals_greater]), +&(array->array[array->cardinality - nvals_greater]), +nvals_greater * sizeof(uint16_t)); +for (uint32_t i = 0; i <= max - min; i++) { +array->array[nvals_less + i] = min + i; +} +array->cardinality = union_cardinality; +} - if (buf_len < 2) /* capacity converted to 16 bit */ - return (NULL); - else - buf_len -= 2; +/** + * Adds all values in range [min,max]. This function is currently unused + * and left as a documentation. + */ +/*static inline void array_container_add_range(array_container_t *array, + uint32_t min, uint32_t max) { + int32_t nvals_greater = count_greater(array->array, array->cardinality, max); + int32_t nvals_less = count_less(array->array, array->cardinality - nvals_greater, min); + array_container_add_range_nvals(array, min, max, nvals_less, nvals_greater); +}*/ - if ((ptr = (array_container_t *)malloc(sizeof(array_container_t))) != - NULL) { - size_t len; - int32_t off; - uint16_t cardinality; +/* + * Removes all elements array[pos] .. array[pos+count-1] + */ +static inline void array_container_remove_range(array_container_t *array, +uint32_t pos, uint32_t count) { +if (count != 0) { +memmove(&(array->array[pos]), &(array->array[pos+count]), +(array->cardinality - pos - count) * sizeof(uint16_t)); +array->cardinality -= count; +} +} - memcpy(&cardinality, buf, off = sizeof(cardinality)); +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace internal { +#endif - ptr->capacity = ptr->cardinality = (uint32_t)cardinality; - len = sizeof(uint16_t) * ptr->cardinality; +#endif /* INCLUDE_CONTAINERS_ARRAY_H_ */ +/* end file include/roaring/containers/array.h */ +/* begin file include/roaring/containers/bitset.h */ +/* + * bitset.h + * + */ - if (len != buf_len) { - free(ptr); - return (NULL); - } +#ifndef INCLUDE_CONTAINERS_BITSET_H_ +#define INCLUDE_CONTAINERS_BITSET_H_ - if ((ptr->array = (uint16_t *)malloc(sizeof(uint16_t) * - ptr->capacity)) == NULL) { - free(ptr); - return (NULL); - } +#include +#include - if (len) memcpy(ptr->array, &buf[off], len); - /* Check if returned values are monotonically increasing */ - for (int32_t i = 0, j = 0; i < ptr->cardinality; i++) { - if (ptr->array[i] < j) { - free(ptr->array); - free(ptr); - return (NULL); - } else - j = ptr->array[i]; - } - } - return (ptr); -} +#ifdef __cplusplus +extern "C" { namespace roaring { -bool array_container_iterate(const array_container_t *cont, uint32_t base, - roaring_iterator iterator, void *ptr) { - for (int i = 0; i < cont->cardinality; i++) - if (!iterator(cont->array[i] + base, ptr)) return false; - return true; -} +// Note: in pure C++ code, you should avoid putting `using` in header files +using api::roaring_iterator; +using api::roaring_iterator64; -bool array_container_iterate64(const array_container_t *cont, uint32_t base, - roaring_iterator64 iterator, uint64_t high_bits, - void *ptr) { - for (int i = 0; i < cont->cardinality; i++) - if (!iterator(high_bits | (uint64_t)(cont->array[i] + base), ptr)) - return false; - return true; -} -/* end file src/containers/array.c */ -/* begin file src/containers/bitset.c */ -/* - * bitset.c - * - */ -#ifndef _POSIX_C_SOURCE -#define _POSIX_C_SOURCE 200809L +namespace internal { #endif -#include -#include -#include -#include -extern inline int bitset_container_cardinality(const bitset_container_t *bitset); -extern inline bool bitset_container_nonzero_cardinality(bitset_container_t *bitset); -extern inline void bitset_container_set(bitset_container_t *bitset, uint16_t pos); -extern inline void bitset_container_unset(bitset_container_t *bitset, uint16_t pos); -extern inline bool bitset_container_get(const bitset_container_t *bitset, - uint16_t pos); -extern inline int32_t bitset_container_serialized_size_in_bytes(void); -extern inline bool bitset_container_add(bitset_container_t *bitset, uint16_t pos); -extern inline bool bitset_container_remove(bitset_container_t *bitset, uint16_t pos); -extern inline bool bitset_container_contains(const bitset_container_t *bitset, - uint16_t pos); -void bitset_container_clear(bitset_container_t *bitset) { - memset(bitset->array, 0, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); - bitset->cardinality = 0; -} +enum { +BITSET_CONTAINER_SIZE_IN_WORDS = (1 << 16) / 64, +BITSET_UNKNOWN_CARDINALITY = -1 +}; -void bitset_container_set_all(bitset_container_t *bitset) { - memset(bitset->array, INT64_C(-1), - sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); - bitset->cardinality = (1 << 16); -} +STRUCT_CONTAINER(bitset_container_s) { +int32_t cardinality; +uint64_t *words; +}; +typedef struct bitset_container_s bitset_container_t; +#define CAST_bitset(c) CAST(bitset_container_t *, c) // safer downcast +#define const_CAST_bitset(c) CAST(const bitset_container_t *, c) +#define movable_CAST_bitset(c) movable_CAST(bitset_container_t **, c) /* Create a new bitset. Return NULL in case of failure. */ -bitset_container_t *bitset_container_create(void) { - bitset_container_t *bitset = - (bitset_container_t *)malloc(sizeof(bitset_container_t)); +bitset_container_t *bitset_container_create(void); - if (!bitset) { - return NULL; - } - // sizeof(__m256i) == 32 - bitset->array = (uint64_t *)roaring_bitmap_aligned_malloc( - 32, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); - if (!bitset->array) { - free(bitset); - return NULL; - } - bitset_container_clear(bitset); - return bitset; -} +/* Free memory. */ +void bitset_container_free(bitset_container_t *bitset); -/* Copy one container into another. We assume that they are distinct. */ -void bitset_container_copy(const bitset_container_t *source, - bitset_container_t *dest) { - dest->cardinality = source->cardinality; - memcpy(dest->array, source->array, - sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); -} +/* Clear bitset (sets bits to 0). */ +void bitset_container_clear(bitset_container_t *bitset); -void bitset_container_add_from_range(bitset_container_t *bitset, uint32_t min, - uint32_t max, uint16_t step) { - if (step == 0) return; // refuse to crash - if ((64 % step) == 0) { // step divides 64 - uint64_t mask = 0; // construct the repeated mask - for (uint32_t value = (min % step); value < 64; value += step) { - mask |= ((uint64_t)1 << value); - } - uint32_t firstword = min / 64; - uint32_t endword = (max - 1) / 64; - bitset->cardinality = (max - min + step - 1) / step; - if (firstword == endword) { - bitset->array[firstword] |= - mask & (((~UINT64_C(0)) << (min % 64)) & - ((~UINT64_C(0)) >> ((~max + 1) % 64))); - return; - } - bitset->array[firstword] = mask & ((~UINT64_C(0)) << (min % 64)); - for (uint32_t i = firstword + 1; i < endword; i++) - bitset->array[i] = mask; - bitset->array[endword] = mask & ((~UINT64_C(0)) >> ((~max + 1) % 64)); - } else { - for (uint32_t value = min; value < max; value += step) { - bitset_container_add(bitset, value); - } - } -} +/* Set all bits to 1. */ +void bitset_container_set_all(bitset_container_t *bitset); -/* Free memory. */ -void bitset_container_free(bitset_container_t *bitset) { - if(bitset->array != NULL) {// Jon Strabala reports that some tools complain otherwise - roaring_bitmap_aligned_free(bitset->array); - bitset->array = NULL; // pedantic - } - free(bitset); -} +/* Duplicate bitset */ +bitset_container_t *bitset_container_clone(const bitset_container_t *src); -/* duplicate container. */ -bitset_container_t *bitset_container_clone(const bitset_container_t *src) { - bitset_container_t *bitset = - (bitset_container_t *)malloc(sizeof(bitset_container_t)); +/* Set the bit in [begin,end). WARNING: as of April 2016, this method is slow + * and + * should not be used in performance-sensitive code. Ever. */ +void bitset_container_set_range(bitset_container_t *bitset, uint32_t begin, +uint32_t end); + +#if defined(CROARING_ASMBITMANIPOPTIMIZATION) && defined(__AVX2__) +/* Set the ith bit. */ +static inline void bitset_container_set(bitset_container_t *bitset, +uint16_t pos) { +uint64_t shift = 6; +uint64_t offset; +uint64_t p = pos; +ASM_SHIFT_RIGHT(p, shift, offset); +uint64_t load = bitset->words[offset]; +ASM_SET_BIT_INC_WAS_CLEAR(load, p, bitset->cardinality); +bitset->words[offset] = load; +} + +/* Unset the ith bit. Currently unused. Could be used for optimization. */ +/*static inline void bitset_container_unset(bitset_container_t *bitset, + uint16_t pos) { + uint64_t shift = 6; + uint64_t offset; + uint64_t p = pos; + ASM_SHIFT_RIGHT(p, shift, offset); + uint64_t load = bitset->words[offset]; + ASM_CLEAR_BIT_DEC_WAS_SET(load, p, bitset->cardinality); + bitset->words[offset] = load; +}*/ - if (!bitset) { - return NULL; - } - // sizeof(__m256i) == 32 - bitset->array = (uint64_t *)roaring_bitmap_aligned_malloc( - 32, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); - if (!bitset->array) { - free(bitset); - return NULL; - } - bitset->cardinality = src->cardinality; - memcpy(bitset->array, src->array, - sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); - return bitset; +/* Add `pos' to `bitset'. Returns true if `pos' was not present. Might be slower + * than bitset_container_set. */ +static inline bool bitset_container_add(bitset_container_t *bitset, +uint16_t pos) { +uint64_t shift = 6; +uint64_t offset; +uint64_t p = pos; +ASM_SHIFT_RIGHT(p, shift, offset); +uint64_t load = bitset->words[offset]; +// could be possibly slightly further optimized +const int32_t oldcard = bitset->cardinality; +ASM_SET_BIT_INC_WAS_CLEAR(load, p, bitset->cardinality); +bitset->words[offset] = load; +return bitset->cardinality - oldcard; +} + +/* Remove `pos' from `bitset'. Returns true if `pos' was present. Might be + * slower than bitset_container_unset. */ +static inline bool bitset_container_remove(bitset_container_t *bitset, +uint16_t pos) { +uint64_t shift = 6; +uint64_t offset; +uint64_t p = pos; +ASM_SHIFT_RIGHT(p, shift, offset); +uint64_t load = bitset->words[offset]; +// could be possibly slightly further optimized +const int32_t oldcard = bitset->cardinality; +ASM_CLEAR_BIT_DEC_WAS_SET(load, p, bitset->cardinality); +bitset->words[offset] = load; +return oldcard - bitset->cardinality; +} + +/* Get the value of the ith bit. */ +inline bool bitset_container_get(const bitset_container_t *bitset, +uint16_t pos) { +uint64_t word = bitset->words[pos >> 6]; +const uint64_t p = pos; +ASM_INPLACESHIFT_RIGHT(word, p); +return word & 1; } -void bitset_container_set_range(bitset_container_t *bitset, uint32_t begin, - uint32_t end) { - bitset_set_range(bitset->array, begin, end); - bitset->cardinality = - bitset_container_compute_cardinality(bitset); // could be smarter -} +#else +/* Set the ith bit. */ +static inline void bitset_container_set(bitset_container_t *bitset, +uint16_t pos) { +const uint64_t old_word = bitset->words[pos >> 6]; +const int index = pos & 63; +const uint64_t new_word = old_word | (UINT64_C(1) << index); +bitset->cardinality += (uint32_t)((old_word ^ new_word) >> index); +bitset->words[pos >> 6] = new_word; +} + +/* Unset the ith bit. Currently unused. */ +/*static inline void bitset_container_unset(bitset_container_t *bitset, + uint16_t pos) { + const uint64_t old_word = bitset->words[pos >> 6]; + const int index = pos & 63; + const uint64_t new_word = old_word & (~(UINT64_C(1) << index)); + bitset->cardinality -= (uint32_t)((old_word ^ new_word) >> index); + bitset->words[pos >> 6] = new_word; +}*/ -bool bitset_container_intersect(const bitset_container_t *src_1, - const bitset_container_t *src_2) { - // could vectorize, but this is probably already quite fast in practice - const uint64_t * __restrict__ array_1 = src_1->array; - const uint64_t * __restrict__ array_2 = src_2->array; - for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i ++) { - if((array_1[i] & array_2[i]) != 0) return true; - } - return false; +/* Add `pos' to `bitset'. Returns true if `pos' was not present. Might be slower + * than bitset_container_set. */ +static inline bool bitset_container_add(bitset_container_t *bitset, +uint16_t pos) { +const uint64_t old_word = bitset->words[pos >> 6]; +const int index = pos & 63; +const uint64_t new_word = old_word | (UINT64_C(1) << index); +const uint64_t increment = (old_word ^ new_word) >> index; +bitset->cardinality += (uint32_t)increment; +bitset->words[pos >> 6] = new_word; +return increment > 0; +} + +/* Remove `pos' from `bitset'. Returns true if `pos' was present. Might be + * slower than bitset_container_unset. */ +static inline bool bitset_container_remove(bitset_container_t *bitset, +uint16_t pos) { +const uint64_t old_word = bitset->words[pos >> 6]; +const int index = pos & 63; +const uint64_t new_word = old_word & (~(UINT64_C(1) << index)); +const uint64_t increment = (old_word ^ new_word) >> index; +bitset->cardinality -= (uint32_t)increment; +bitset->words[pos >> 6] = new_word; +return increment > 0; +} + +/* Get the value of the ith bit. */ +inline bool bitset_container_get(const bitset_container_t *bitset, +uint16_t pos) { +const uint64_t word = bitset->words[pos >> 6]; +return (word >> (pos & 63)) & 1; } - -#ifdef USEAVX -#ifndef WORDS_IN_AVX2_REG -#define WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t) #endif -/* Get the number of bits set (force computation) */ -int bitset_container_compute_cardinality(const bitset_container_t *bitset) { - return (int) avx2_harley_seal_popcount256( - (const __m256i *)bitset->array, - BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG)); -} -#elif defined(USENEON) -int bitset_container_compute_cardinality(const bitset_container_t *bitset) { - uint16x8_t n0 = vdupq_n_u16(0); - uint16x8_t n1 = vdupq_n_u16(0); - uint16x8_t n2 = vdupq_n_u16(0); - uint16x8_t n3 = vdupq_n_u16(0); - for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { - uint64x2_t c0 = vld1q_u64(&bitset->array[i + 0]); - n0 = vaddq_u16(n0, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c0)))); - uint64x2_t c1 = vld1q_u64(&bitset->array[i + 2]); - n1 = vaddq_u16(n1, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c1)))); - uint64x2_t c2 = vld1q_u64(&bitset->array[i + 4]); - n2 = vaddq_u16(n2, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c2)))); - uint64x2_t c3 = vld1q_u64(&bitset->array[i + 6]); - n3 = vaddq_u16(n3, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c3)))); - } - uint64x2_t n = vdupq_n_u64(0); - n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n0))); - n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n1))); - n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n2))); - n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n3))); - return vgetq_lane_u64(n, 0) + vgetq_lane_u64(n, 1); +/* +* Check if all bits are set in a range of positions from pos_start (included) to +* pos_end (excluded). +*/ +static inline bool bitset_container_get_range(const bitset_container_t *bitset, +uint32_t pos_start, uint32_t pos_end) { + +const uint32_t start = pos_start >> 6; +const uint32_t end = pos_end >> 6; + +const uint64_t first = ~((1ULL << (pos_start & 0x3F)) - 1); +const uint64_t last = (1ULL << (pos_end & 0x3F)) - 1; + +if (start == end) return ((bitset->words[end] & first & last) == (first & last)); +if ((bitset->words[start] & first) != first) return false; + +if ((end < BITSET_CONTAINER_SIZE_IN_WORDS) && ((bitset->words[end] & last) != last)){ + +return false; } -#else +for (uint16_t i = start + 1; (i < BITSET_CONTAINER_SIZE_IN_WORDS) && (i < end); ++i){ -/* Get the number of bits set (force computation) */ -int bitset_container_compute_cardinality(const bitset_container_t *bitset) { - const uint64_t *array = bitset->array; - int32_t sum = 0; - for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 4) { - sum += hamming(array[i]); - sum += hamming(array[i + 1]); - sum += hamming(array[i + 2]); - sum += hamming(array[i + 3]); - } - return sum; +if (bitset->words[i] != UINT64_C(0xFFFFFFFFFFFFFFFF)) return false; } -#endif +return true; +} -#ifdef USEAVX +/* Check whether `bitset' is present in `array'. Calls bitset_container_get. */ +inline bool bitset_container_contains(const bitset_container_t *bitset, +uint16_t pos) { +return bitset_container_get(bitset, pos); +} -#define BITSET_CONTAINER_FN_REPEAT 8 -#ifndef WORDS_IN_AVX2_REG +/* +* Check whether a range of bits from position `pos_start' (included) to `pos_end' (excluded) +* is present in `bitset'. Calls bitset_container_get_all. +*/ +static inline bool bitset_container_contains_range(const bitset_container_t *bitset, +uint32_t pos_start, uint32_t pos_end) { +return bitset_container_get_range(bitset, pos_start, pos_end); +} + +/* Get the number of bits set */ +ALLOW_UNALIGNED +static inline int bitset_container_cardinality( +const bitset_container_t *bitset) { +return bitset->cardinality; +} + + + + +/* Copy one container into another. We assume that they are distinct. */ +void bitset_container_copy(const bitset_container_t *source, +bitset_container_t *dest); + +/* Add all the values [min,max) at a distance k*step from min: min, + * min+step,.... */ +void bitset_container_add_from_range(bitset_container_t *bitset, uint32_t min, +uint32_t max, uint16_t step); + +/* Get the number of bits set (force computation). This does not modify bitset. + * To update the cardinality, you should do + * bitset->cardinality = bitset_container_compute_cardinality(bitset).*/ +int bitset_container_compute_cardinality(const bitset_container_t *bitset); + +/* Check whether this bitset is empty, + * it never modifies the bitset struct. */ +static inline bool bitset_container_empty( +const bitset_container_t *bitset) { +if (bitset->cardinality == BITSET_UNKNOWN_CARDINALITY) { +for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i ++) { +if((bitset->words[i]) != 0) return false; +} +return true; +} +return bitset->cardinality == 0; +} + + +/* Get whether there is at least one bit set (see bitset_container_empty for the reverse), + the bitset is never modified */ +static inline bool bitset_container_const_nonzero_cardinality( +const bitset_container_t *bitset) { +return !bitset_container_empty(bitset); +} + +/* + * Check whether the two bitsets intersect + */ +bool bitset_container_intersect(const bitset_container_t *src_1, +const bitset_container_t *src_2); + +/* Computes the union of bitsets `src_1' and `src_2' into `dst' and return the + * cardinality. */ +int bitset_container_or(const bitset_container_t *src_1, +const bitset_container_t *src_2, +bitset_container_t *dst); + +/* Computes the union of bitsets `src_1' and `src_2' and return the cardinality. + */ +int bitset_container_or_justcard(const bitset_container_t *src_1, +const bitset_container_t *src_2); + +/* Computes the union of bitsets `src_1' and `src_2' into `dst' and return the + * cardinality. Same as bitset_container_or. */ +int bitset_container_union(const bitset_container_t *src_1, +const bitset_container_t *src_2, +bitset_container_t *dst); + +/* Computes the union of bitsets `src_1' and `src_2' and return the + * cardinality. Same as bitset_container_or_justcard. */ +int bitset_container_union_justcard(const bitset_container_t *src_1, +const bitset_container_t *src_2); + +/* Computes the union of bitsets `src_1' and `src_2' into `dst', but does + * not update the cardinality. Provided to optimize chained operations. */ +int bitset_container_union_nocard(const bitset_container_t *src_1, +const bitset_container_t *src_2, +bitset_container_t *dst); + +/* Computes the union of bitsets `src_1' and `src_2' into `dst', but does not + * update the cardinality. Provided to optimize chained operations. */ +int bitset_container_or_nocard(const bitset_container_t *src_1, +const bitset_container_t *src_2, +bitset_container_t *dst); + +/* Computes the intersection of bitsets `src_1' and `src_2' into `dst' and + * return the cardinality. */ +int bitset_container_and(const bitset_container_t *src_1, +const bitset_container_t *src_2, +bitset_container_t *dst); + +/* Computes the intersection of bitsets `src_1' and `src_2' and return the + * cardinality. */ +int bitset_container_and_justcard(const bitset_container_t *src_1, +const bitset_container_t *src_2); + +/* Computes the intersection of bitsets `src_1' and `src_2' into `dst' and + * return the cardinality. Same as bitset_container_and. */ +int bitset_container_intersection(const bitset_container_t *src_1, +const bitset_container_t *src_2, +bitset_container_t *dst); + +/* Computes the intersection of bitsets `src_1' and `src_2' and return the + * cardinality. Same as bitset_container_and_justcard. */ +int bitset_container_intersection_justcard(const bitset_container_t *src_1, +const bitset_container_t *src_2); + +/* Computes the intersection of bitsets `src_1' and `src_2' into `dst', but does + * not update the cardinality. Provided to optimize chained operations. */ +int bitset_container_intersection_nocard(const bitset_container_t *src_1, +const bitset_container_t *src_2, +bitset_container_t *dst); + +/* Computes the intersection of bitsets `src_1' and `src_2' into `dst', but does + * not update the cardinality. Provided to optimize chained operations. */ +int bitset_container_and_nocard(const bitset_container_t *src_1, +const bitset_container_t *src_2, +bitset_container_t *dst); + +/* Computes the exclusive or of bitsets `src_1' and `src_2' into `dst' and + * return the cardinality. */ +int bitset_container_xor(const bitset_container_t *src_1, +const bitset_container_t *src_2, +bitset_container_t *dst); + +/* Computes the exclusive or of bitsets `src_1' and `src_2' and return the + * cardinality. */ +int bitset_container_xor_justcard(const bitset_container_t *src_1, +const bitset_container_t *src_2); + +/* Computes the exclusive or of bitsets `src_1' and `src_2' into `dst', but does + * not update the cardinality. Provided to optimize chained operations. */ +int bitset_container_xor_nocard(const bitset_container_t *src_1, +const bitset_container_t *src_2, +bitset_container_t *dst); + +/* Computes the and not of bitsets `src_1' and `src_2' into `dst' and return the + * cardinality. */ +int bitset_container_andnot(const bitset_container_t *src_1, +const bitset_container_t *src_2, +bitset_container_t *dst); + +/* Computes the and not of bitsets `src_1' and `src_2' and return the + * cardinality. */ +int bitset_container_andnot_justcard(const bitset_container_t *src_1, +const bitset_container_t *src_2); + +/* Computes the and not or of bitsets `src_1' and `src_2' into `dst', but does + * not update the cardinality. Provided to optimize chained operations. */ +int bitset_container_andnot_nocard(const bitset_container_t *src_1, +const bitset_container_t *src_2, +bitset_container_t *dst); + +void bitset_container_offset(const bitset_container_t *c, +container_t **loc, container_t **hic, +uint16_t offset); +/* + * Write out the 16-bit integers contained in this container as a list of 32-bit + * integers using base + * as the starting value (it might be expected that base has zeros in its 16 + * least significant bits). + * The function returns the number of values written. + * The caller is responsible for allocating enough memory in out. + * The out pointer should point to enough memory (the cardinality times 32 + * bits). + */ +int bitset_container_to_uint32_array(uint32_t *out, +const bitset_container_t *bc, +uint32_t base); + +/* + * Print this container using printf (useful for debugging). + */ +void bitset_container_printf(const bitset_container_t *v); + +/* + * Print this container using printf as a comma-separated list of 32-bit + * integers starting at base. + */ +void bitset_container_printf_as_uint32_array(const bitset_container_t *v, +uint32_t base); + +bool bitset_container_validate(const bitset_container_t *v, const char **reason); + +/** + * Return the serialized size in bytes of a container. + */ +static inline int32_t bitset_container_serialized_size_in_bytes(void) { +return BITSET_CONTAINER_SIZE_IN_WORDS * 8; +} + +/** + * Return the the number of runs. + */ +int bitset_container_number_of_runs(bitset_container_t *bc); + +bool bitset_container_iterate(const bitset_container_t *cont, uint32_t base, +roaring_iterator iterator, void *ptr); +bool bitset_container_iterate64(const bitset_container_t *cont, uint32_t base, +roaring_iterator64 iterator, uint64_t high_bits, +void *ptr); + +/** + * Writes the underlying array to buf, outputs how many bytes were written. + * This is meant to be byte-by-byte compatible with the Java and Go versions of + * Roaring. + * The number of bytes written should be + * bitset_container_size_in_bytes(container). + */ +int32_t bitset_container_write(const bitset_container_t *container, char *buf); + +/** + * Reads the instance from buf, outputs how many bytes were read. + * This is meant to be byte-by-byte compatible with the Java and Go versions of + * Roaring. + * The number of bytes read should be bitset_container_size_in_bytes(container). + * You need to provide the (known) cardinality. + */ +int32_t bitset_container_read(int32_t cardinality, +bitset_container_t *container, const char *buf); +/** + * Return the serialized size in bytes of a container (see + * bitset_container_write). + * This is meant to be compatible with the Java and Go versions of Roaring and + * assumes + * that the cardinality of the container is already known or can be computed. + */ +static inline int32_t bitset_container_size_in_bytes( +const bitset_container_t *container) { +(void)container; +return BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); +} + +/** + * Return true if the two containers have the same content. + */ +bool bitset_container_equals(const bitset_container_t *container1, +const bitset_container_t *container2); + +/** +* Return true if container1 is a subset of container2. +*/ +bool bitset_container_is_subset(const bitset_container_t *container1, +const bitset_container_t *container2); + +/** + * If the element of given rank is in this container, supposing that the first + * element has rank start_rank, then the function returns true and sets element + * accordingly. + * Otherwise, it returns false and update start_rank. + */ +bool bitset_container_select(const bitset_container_t *container, +uint32_t *start_rank, uint32_t rank, +uint32_t *element); + +/* Returns the smallest value (assumes not empty) */ +uint16_t bitset_container_minimum(const bitset_container_t *container); + +/* Returns the largest value (assumes not empty) */ +uint16_t bitset_container_maximum(const bitset_container_t *container); + +/* Returns the number of values equal or smaller than x */ +int bitset_container_rank(const bitset_container_t *container, uint16_t x); + +/* Returns the index of x , if not exsist return -1 */ +int bitset_container_get_index(const bitset_container_t *container, uint16_t x); + +/* Returns the index of the first value equal or larger than x, or -1 */ +int bitset_container_index_equalorlarger(const bitset_container_t *container, uint16_t x); + +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace internal { +#endif + +#endif /* INCLUDE_CONTAINERS_BITSET_H_ */ +/* end file include/roaring/containers/bitset.h */ +/* begin file include/roaring/containers/run.h */ +/* + * run.h + * + */ + +#ifndef INCLUDE_CONTAINERS_RUN_H_ +#define INCLUDE_CONTAINERS_RUN_H_ + +#include +#include +#include +#include + + + +#ifdef __cplusplus +extern "C" { namespace roaring { + +// Note: in pure C++ code, you should avoid putting `using` in header files +using api::roaring_iterator; +using api::roaring_iterator64; + +namespace internal { +#endif + +/* struct rle16_s - run length pair + * + * @value: start position of the run + * @length: length of the run is `length + 1` + * + * An RLE pair {v, l} would represent the integers between the interval + * [v, v+l+1], e.g. {3, 2} = [3, 4, 5]. + */ +struct rle16_s { +uint16_t value; +uint16_t length; +}; + +typedef struct rle16_s rle16_t; + +#ifdef __cplusplus +#define MAKE_RLE16(val,len) \ + {(uint16_t)(val), (uint16_t)(len)} // no tagged structs until c++20 +#else +#define MAKE_RLE16(val,len) \ + (rle16_t){.value = (uint16_t)(val), .length = (uint16_t)(len)} +#endif + +/* struct run_container_s - run container bitmap + * + * @n_runs: number of rle_t pairs in `runs`. + * @capacity: capacity in rle_t pairs `runs` can hold. + * @runs: pairs of rle_t. + */ +STRUCT_CONTAINER(run_container_s) { +int32_t n_runs; +int32_t capacity; +rle16_t *runs; +}; + +typedef struct run_container_s run_container_t; + +#define CAST_run(c) CAST(run_container_t *, c) // safer downcast +#define const_CAST_run(c) CAST(const run_container_t *, c) +#define movable_CAST_run(c) movable_CAST(run_container_t **, c) + +/* Create a new run container. Return NULL in case of failure. */ +run_container_t *run_container_create(void); + +/* Create a new run container with given capacity. Return NULL in case of + * failure. */ +run_container_t *run_container_create_given_capacity(int32_t size); + +/* + * Shrink the capacity to the actual size, return the number of bytes saved. + */ +int run_container_shrink_to_fit(run_container_t *src); + +/* Free memory owned by `run'. */ +void run_container_free(run_container_t *run); + +/* Duplicate container */ +run_container_t *run_container_clone(const run_container_t *src); + +/* + * Effectively deletes the value at index index, repacking data. + */ +static inline void recoverRoomAtIndex(run_container_t *run, uint16_t index) { +memmove(run->runs + index, run->runs + (1 + index), +(run->n_runs - index - 1) * sizeof(rle16_t)); +run->n_runs--; +} + +/** + * Good old binary search through rle data + */ +inline int32_t interleavedBinarySearch(const rle16_t *array, int32_t lenarray, +uint16_t ikey) { +int32_t low = 0; +int32_t high = lenarray - 1; +while (low <= high) { +int32_t middleIndex = (low + high) >> 1; +uint16_t middleValue = array[middleIndex].value; +if (middleValue < ikey) { +low = middleIndex + 1; +} else if (middleValue > ikey) { +high = middleIndex - 1; +} else { +return middleIndex; +} +} +return -(low + 1); +} + +/* + * Returns index of the run which contains $ikey + */ +static inline int32_t rle16_find_run(const rle16_t *array, int32_t lenarray, +uint16_t ikey) { +int32_t low = 0; +int32_t high = lenarray - 1; +while (low <= high) { +int32_t middleIndex = (low + high) >> 1; +uint16_t min = array[middleIndex].value; +uint16_t max = array[middleIndex].value + array[middleIndex].length; +if (ikey > max) { +low = middleIndex + 1; +} else if (ikey < min) { +high = middleIndex - 1; +} else { +return middleIndex; +} +} +return -(low + 1); +} + + +/** + * Returns number of runs which can'be be merged with the key because they + * are less than the key. + * Note that [5,6,7,8] can be merged with the key 9 and won't be counted. + */ +static inline int32_t rle16_count_less(const rle16_t* array, int32_t lenarray, +uint16_t key) { +if (lenarray == 0) return 0; +int32_t low = 0; +int32_t high = lenarray - 1; +while (low <= high) { +int32_t middleIndex = (low + high) >> 1; +uint16_t min_value = array[middleIndex].value; +uint16_t max_value = array[middleIndex].value + array[middleIndex].length; +if (max_value + UINT32_C(1) < key) { // uint32 arithmetic +low = middleIndex + 1; +} else if (key < min_value) { +high = middleIndex - 1; +} else { +return middleIndex; +} +} +return low; +} + +static inline int32_t rle16_count_greater(const rle16_t* array, int32_t lenarray, +uint16_t key) { +if (lenarray == 0) return 0; +int32_t low = 0; +int32_t high = lenarray - 1; +while (low <= high) { +int32_t middleIndex = (low + high) >> 1; +uint16_t min_value = array[middleIndex].value; +uint16_t max_value = array[middleIndex].value + array[middleIndex].length; +if (max_value < key) { +low = middleIndex + 1; +} else if (key + UINT32_C(1) < min_value) { // uint32 arithmetic +high = middleIndex - 1; +} else { +return lenarray - (middleIndex + 1); +} +} +return lenarray - low; +} + +/** + * increase capacity to at least min. Whether the + * existing data needs to be copied over depends on copy. If "copy" is false, + * then the new content will be uninitialized, otherwise a copy is made. + */ +void run_container_grow(run_container_t *run, int32_t min, bool copy); + +/** + * Moves the data so that we can write data at index + */ +static inline void makeRoomAtIndex(run_container_t *run, uint16_t index) { +/* This function calls realloc + memmove sequentially to move by one index. + * Potentially copying twice the array. + */ +if (run->n_runs + 1 > run->capacity) +run_container_grow(run, run->n_runs + 1, true); +memmove(run->runs + 1 + index, run->runs + index, +(run->n_runs - index) * sizeof(rle16_t)); +run->n_runs++; +} + +/* Add `pos' to `run'. Returns true if `pos' was not present. */ +bool run_container_add(run_container_t *run, uint16_t pos); + +/* Remove `pos' from `run'. Returns true if `pos' was present. */ +static inline bool run_container_remove(run_container_t *run, uint16_t pos) { +int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos); +if (index >= 0) { +int32_t le = run->runs[index].length; +if (le == 0) { +recoverRoomAtIndex(run, (uint16_t)index); +} else { +run->runs[index].value++; +run->runs[index].length--; +} +return true; +} +index = -index - 2; // points to preceding value, possibly -1 +if (index >= 0) { // possible match +int32_t offset = pos - run->runs[index].value; +int32_t le = run->runs[index].length; +if (offset < le) { +// need to break in two +run->runs[index].length = (uint16_t)(offset - 1); +// need to insert +uint16_t newvalue = pos + 1; +int32_t newlength = le - offset - 1; +makeRoomAtIndex(run, (uint16_t)(index + 1)); +run->runs[index + 1].value = newvalue; +run->runs[index + 1].length = (uint16_t)newlength; +return true; + +} else if (offset == le) { +run->runs[index].length--; +return true; +} +} +// no match +return false; +} + +/* Check whether `pos' is present in `run'. */ +inline bool run_container_contains(const run_container_t *run, uint16_t pos) { +int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos); +if (index >= 0) return true; +index = -index - 2; // points to preceding value, possibly -1 +if (index != -1) { // possible match +int32_t offset = pos - run->runs[index].value; +int32_t le = run->runs[index].length; +if (offset <= le) return true; +} +return false; +} + +/* +* Check whether all positions in a range of positions from pos_start (included) +* to pos_end (excluded) is present in `run'. +*/ +static inline bool run_container_contains_range(const run_container_t *run, +uint32_t pos_start, uint32_t pos_end) { +uint32_t count = 0; +int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos_start); +if (index < 0) { +index = -index - 2; +if ((index == -1) || ((pos_start - run->runs[index].value) > run->runs[index].length)){ +return false; +} +} +for (int32_t i = index; i < run->n_runs; ++i) { +const uint32_t stop = run->runs[i].value + run->runs[i].length; +if (run->runs[i].value >= pos_end) break; +if (stop >= pos_end) { +count += (((pos_end - run->runs[i].value) > 0) ? (pos_end - run->runs[i].value) : 0); +break; +} +const uint32_t min = (stop - pos_start) > 0 ? (stop - pos_start) : 0; +count += (min < run->runs[i].length) ? min : run->runs[i].length; +} +return count >= (pos_end - pos_start - 1); +} + +/* Get the cardinality of `run'. Requires an actual computation. */ +int run_container_cardinality(const run_container_t *run); + +/* Card > 0?, see run_container_empty for the reverse */ +static inline bool run_container_nonzero_cardinality( +const run_container_t *run) { +return run->n_runs > 0; // runs never empty +} + +/* Card == 0?, see run_container_nonzero_cardinality for the reverse */ +static inline bool run_container_empty( +const run_container_t *run) { +return run->n_runs == 0; // runs never empty +} + + + +/* Copy one container into another. We assume that they are distinct. */ +void run_container_copy(const run_container_t *src, run_container_t *dst); + +/** + * Append run described by vl to the run container, possibly merging. + * It is assumed that the run would be inserted at the end of the container, no + * check is made. + * It is assumed that the run container has the necessary capacity: caller is + * responsible for checking memory capacity. + * + * + * This is not a safe function, it is meant for performance: use with care. + */ +static inline void run_container_append(run_container_t *run, rle16_t vl, +rle16_t *previousrl) { +const uint32_t previousend = previousrl->value + previousrl->length; +if (vl.value > previousend + 1) { // we add a new one +run->runs[run->n_runs] = vl; +run->n_runs++; +*previousrl = vl; +} else { +uint32_t newend = vl.value + vl.length + UINT32_C(1); +if (newend > previousend) { // we merge +previousrl->length = (uint16_t)(newend - 1 - previousrl->value); +run->runs[run->n_runs - 1] = *previousrl; +} +} +} + +/** + * Like run_container_append but it is assumed that the content of run is empty. + */ +static inline rle16_t run_container_append_first(run_container_t *run, +rle16_t vl) { +run->runs[run->n_runs] = vl; +run->n_runs++; +return vl; +} + +/** + * append a single value given by val to the run container, possibly merging. + * It is assumed that the value would be inserted at the end of the container, + * no check is made. + * It is assumed that the run container has the necessary capacity: caller is + * responsible for checking memory capacity. + * + * This is not a safe function, it is meant for performance: use with care. + */ +static inline void run_container_append_value(run_container_t *run, +uint16_t val, +rle16_t *previousrl) { +const uint32_t previousend = previousrl->value + previousrl->length; +if (val > previousend + 1) { // we add a new one +*previousrl = MAKE_RLE16(val, 0); +run->runs[run->n_runs] = *previousrl; +run->n_runs++; +} else if (val == previousend + 1) { // we merge +previousrl->length++; +run->runs[run->n_runs - 1] = *previousrl; +} +} + +/** + * Like run_container_append_value but it is assumed that the content of run is + * empty. + */ +static inline rle16_t run_container_append_value_first(run_container_t *run, +uint16_t val) { +rle16_t newrle = MAKE_RLE16(val, 0); +run->runs[run->n_runs] = newrle; +run->n_runs++; +return newrle; +} + +/* Check whether the container spans the whole chunk (cardinality = 1<<16). + * This check can be done in constant time (inexpensive). */ +static inline bool run_container_is_full(const run_container_t *run) { +rle16_t vl = run->runs[0]; +return (run->n_runs == 1) && (vl.value == 0) && (vl.length == 0xFFFF); +} + +/* Compute the union of `src_1' and `src_2' and write the result to `dst' + * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ +void run_container_union(const run_container_t *src_1, +const run_container_t *src_2, run_container_t *dst); + +/* Compute the union of `src_1' and `src_2' and write the result to `src_1' */ +void run_container_union_inplace(run_container_t *src_1, +const run_container_t *src_2); + +/* Compute the intersection of src_1 and src_2 and write the result to + * dst. It is assumed that dst is distinct from both src_1 and src_2. */ +void run_container_intersection(const run_container_t *src_1, +const run_container_t *src_2, +run_container_t *dst); + +/* Compute the size of the intersection of src_1 and src_2 . */ +int run_container_intersection_cardinality(const run_container_t *src_1, +const run_container_t *src_2); + +/* Check whether src_1 and src_2 intersect. */ +bool run_container_intersect(const run_container_t *src_1, +const run_container_t *src_2); + +/* Compute the symmetric difference of `src_1' and `src_2' and write the result + * to `dst' + * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ +void run_container_xor(const run_container_t *src_1, +const run_container_t *src_2, run_container_t *dst); + +/* + * Write out the 16-bit integers contained in this container as a list of 32-bit + * integers using base + * as the starting value (it might be expected that base has zeros in its 16 + * least significant bits). + * The function returns the number of values written. + * The caller is responsible for allocating enough memory in out. + */ +int run_container_to_uint32_array(void *vout, const run_container_t *cont, +uint32_t base); + +/* + * Print this container using printf (useful for debugging). + */ +void run_container_printf(const run_container_t *v); + +/* + * Print this container using printf as a comma-separated list of 32-bit + * integers starting at base. + */ +void run_container_printf_as_uint32_array(const run_container_t *v, +uint32_t base); + +bool run_container_validate(const run_container_t *run, const char **reason); + +/** + * Return the serialized size in bytes of a container having "num_runs" runs. + */ +static inline int32_t run_container_serialized_size_in_bytes(int32_t num_runs) { +return sizeof(uint16_t) + +sizeof(rle16_t) * num_runs; // each run requires 2 2-byte entries. +} + +bool run_container_iterate(const run_container_t *cont, uint32_t base, +roaring_iterator iterator, void *ptr); +bool run_container_iterate64(const run_container_t *cont, uint32_t base, +roaring_iterator64 iterator, uint64_t high_bits, +void *ptr); + +/** + * Writes the underlying array to buf, outputs how many bytes were written. + * This is meant to be byte-by-byte compatible with the Java and Go versions of + * Roaring. + * The number of bytes written should be run_container_size_in_bytes(container). + */ +int32_t run_container_write(const run_container_t *container, char *buf); + +/** + * Reads the instance from buf, outputs how many bytes were read. + * This is meant to be byte-by-byte compatible with the Java and Go versions of + * Roaring. + * The number of bytes read should be bitset_container_size_in_bytes(container). + * The cardinality parameter is provided for consistency with other containers, + * but + * it might be effectively ignored.. + */ +int32_t run_container_read(int32_t cardinality, run_container_t *container, +const char *buf); + +/** + * Return the serialized size in bytes of a container (see run_container_write). + * This is meant to be compatible with the Java and Go versions of Roaring. + */ +static inline int32_t run_container_size_in_bytes( +const run_container_t *container) { +return run_container_serialized_size_in_bytes(container->n_runs); +} + +/** + * Return true if the two containers have the same content. + */ +ALLOW_UNALIGNED +static inline bool run_container_equals(const run_container_t *container1, +const run_container_t *container2) { +if (container1->n_runs != container2->n_runs) { +return false; +} +return memequals(container1->runs, container2->runs, +container1->n_runs * sizeof(rle16_t)); +} + +/** +* Return true if container1 is a subset of container2. +*/ +bool run_container_is_subset(const run_container_t *container1, +const run_container_t *container2); + +/** + * Used in a start-finish scan that appends segments, for XOR and NOT + */ + +void run_container_smart_append_exclusive(run_container_t *src, +const uint16_t start, +const uint16_t length); + +/** +* The new container consists of a single run [start,stop). +* It is required that stop>start, the caller is responsability for this check. +* It is required that stop <= (1<<16), the caller is responsability for this check. +* The cardinality of the created container is stop - start. +* Returns NULL on failure +*/ +static inline run_container_t *run_container_create_range(uint32_t start, +uint32_t stop) { +run_container_t *rc = run_container_create_given_capacity(1); +if (rc) { +rle16_t r; +r.value = (uint16_t)start; +r.length = (uint16_t)(stop - start - 1); +run_container_append_first(rc, r); +} +return rc; +} + +/** + * If the element of given rank is in this container, supposing that the first + * element has rank start_rank, then the function returns true and sets element + * accordingly. + * Otherwise, it returns false and update start_rank. + */ +bool run_container_select(const run_container_t *container, +uint32_t *start_rank, uint32_t rank, +uint32_t *element); + +/* Compute the difference of src_1 and src_2 and write the result to + * dst. It is assumed that dst is distinct from both src_1 and src_2. */ + +void run_container_andnot(const run_container_t *src_1, +const run_container_t *src_2, run_container_t *dst); + +void run_container_offset(const run_container_t *c, +container_t **loc, container_t **hic, +uint16_t offset); + +/* Returns the smallest value (assumes not empty) */ +inline uint16_t run_container_minimum(const run_container_t *run) { +if (run->n_runs == 0) return 0; +return run->runs[0].value; +} + +/* Returns the largest value (assumes not empty) */ +inline uint16_t run_container_maximum(const run_container_t *run) { +if (run->n_runs == 0) return 0; +return run->runs[run->n_runs - 1].value + run->runs[run->n_runs - 1].length; +} + +/* Returns the number of values equal or smaller than x */ +int run_container_rank(const run_container_t *arr, uint16_t x); + +/* Returns the index of x, if not exsist return -1 */ +int run_container_get_index(const run_container_t *arr, uint16_t x); + +/* Returns the index of the first run containing a value at least as large as x, or -1 */ +inline int run_container_index_equalorlarger(const run_container_t *arr, uint16_t x) { +int32_t index = interleavedBinarySearch(arr->runs, arr->n_runs, x); +if (index >= 0) return index; +index = -index - 2; // points to preceding run, possibly -1 +if (index != -1) { // possible match +int32_t offset = x - arr->runs[index].value; +int32_t le = arr->runs[index].length; +if (offset <= le) return index; +} +index += 1; +if(index < arr->n_runs) { +return index; +} +return -1; +} + +/* + * Add all values in range [min, max] using hint. + */ +static inline void run_container_add_range_nruns(run_container_t* run, +uint32_t min, uint32_t max, +int32_t nruns_less, +int32_t nruns_greater) { +int32_t nruns_common = run->n_runs - nruns_less - nruns_greater; +if (nruns_common == 0) { +makeRoomAtIndex(run, nruns_less); +run->runs[nruns_less].value = min; +run->runs[nruns_less].length = max - min; +} else { +uint32_t common_min = run->runs[nruns_less].value; +uint32_t common_max = run->runs[nruns_less + nruns_common - 1].value + +run->runs[nruns_less + nruns_common - 1].length; +uint32_t result_min = (common_min < min) ? common_min : min; +uint32_t result_max = (common_max > max) ? common_max : max; + +run->runs[nruns_less].value = result_min; +run->runs[nruns_less].length = result_max - result_min; + +memmove(&(run->runs[nruns_less + 1]), +&(run->runs[run->n_runs - nruns_greater]), +nruns_greater*sizeof(rle16_t)); +run->n_runs = nruns_less + 1 + nruns_greater; +} +} + +/** + * Add all values in range [min, max]. This function is currently unused + * and left as documentation. + */ +/*static inline void run_container_add_range(run_container_t* run, + uint32_t min, uint32_t max) { + int32_t nruns_greater = rle16_count_greater(run->runs, run->n_runs, max); + int32_t nruns_less = rle16_count_less(run->runs, run->n_runs - nruns_greater, min); + run_container_add_range_nruns(run, min, max, nruns_less, nruns_greater); +}*/ + +/** + * Shifts last $count elements either left (distance < 0) or right (distance > 0) + */ +static inline void run_container_shift_tail(run_container_t* run, +int32_t count, int32_t distance) { +if (distance > 0) { +if (run->capacity < count+distance) { +run_container_grow(run, count+distance, true); +} +} +int32_t srcpos = run->n_runs - count; +int32_t dstpos = srcpos + distance; +memmove(&(run->runs[dstpos]), &(run->runs[srcpos]), sizeof(rle16_t) * count); +run->n_runs += distance; +} + +/** + * Remove all elements in range [min, max] + */ +static inline void run_container_remove_range(run_container_t *run, uint32_t min, uint32_t max) { +int32_t first = rle16_find_run(run->runs, run->n_runs, min); +int32_t last = rle16_find_run(run->runs, run->n_runs, max); + +if (first >= 0 && min > run->runs[first].value && +max < ((uint32_t)run->runs[first].value + (uint32_t)run->runs[first].length)) { +// split this run into two adjacent runs + +// right subinterval +makeRoomAtIndex(run, first+1); +run->runs[first+1].value = max + 1; +run->runs[first+1].length = (run->runs[first].value + run->runs[first].length) - (max + 1); + +// left subinterval +run->runs[first].length = (min - 1) - run->runs[first].value; + +return; +} + +// update left-most partial run +if (first >= 0) { +if (min > run->runs[first].value) { +run->runs[first].length = (min - 1) - run->runs[first].value; +first++; +} +} else { +first = -first-1; +} + +// update right-most run +if (last >= 0) { +uint16_t run_max = run->runs[last].value + run->runs[last].length; +if (run_max > max) { +run->runs[last].value = max + 1; +run->runs[last].length = run_max - (max + 1); +last--; +} +} else { +last = (-last-1) - 1; +} + +// remove intermediate runs +if (first <= last) { +run_container_shift_tail(run, run->n_runs - (last+1), -(last-first+1)); +} +} + +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace internal { +#endif + +#endif /* INCLUDE_CONTAINERS_RUN_H_ */ +/* end file include/roaring/containers/run.h */ +/* begin file include/roaring/containers/convert.h */ +/* + * convert.h + * + */ + +#ifndef INCLUDE_CONTAINERS_CONVERT_H_ +#define INCLUDE_CONTAINERS_CONVERT_H_ + + +#ifdef __cplusplus +extern "C" { namespace roaring { namespace internal { +#endif + +/* Convert an array into a bitset. The input container is not freed or modified. + */ +bitset_container_t *bitset_container_from_array(const array_container_t *arr); + +/* Convert a run into a bitset. The input container is not freed or modified. */ +bitset_container_t *bitset_container_from_run(const run_container_t *arr); + +/* Convert a run into an array. The input container is not freed or modified. */ +array_container_t *array_container_from_run(const run_container_t *arr); + +/* Convert a bitset into an array. The input container is not freed or modified. + */ +array_container_t *array_container_from_bitset(const bitset_container_t *bits); + +/* Convert an array into a run. The input container is not freed or modified. + */ +run_container_t *run_container_from_array(const array_container_t *c); + +/* convert a run into either an array or a bitset + * might free the container. This does not free the input run container. */ +container_t *convert_to_bitset_or_array_container( +run_container_t *rc, int32_t card, +uint8_t *resulttype); + +/* convert containers to and from runcontainers, as is most space efficient. + * The container might be freed. */ +container_t *convert_run_optimize( +container_t *c, uint8_t typecode_original, +uint8_t *typecode_after); + +/* converts a run container to either an array or a bitset, IF it saves space. + */ +/* If a conversion occurs, the caller is responsible to free the original + * container and + * he becomes reponsible to free the new one. */ +container_t *convert_run_to_efficient_container( +run_container_t *c, uint8_t *typecode_after); + +// like convert_run_to_efficient_container but frees the old result if needed +container_t *convert_run_to_efficient_container_and_free( +run_container_t *c, uint8_t *typecode_after); + +/** + * Create new container which is a union of run container and + * range [min, max]. Caller is responsible for freeing run container. + */ +container_t *container_from_run_range( +const run_container_t *run, +uint32_t min, uint32_t max, +uint8_t *typecode_after); + +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace internal { +#endif + +#endif /* INCLUDE_CONTAINERS_CONVERT_H_ */ +/* end file include/roaring/containers/convert.h */ +/* begin file include/roaring/containers/mixed_equal.h */ +/* + * mixed_equal.h + * + */ + +#ifndef CONTAINERS_MIXED_EQUAL_H_ +#define CONTAINERS_MIXED_EQUAL_H_ + + +#ifdef __cplusplus +extern "C" { namespace roaring { namespace internal { +#endif + +/** + * Return true if the two containers have the same content. + */ +bool array_container_equal_bitset(const array_container_t* container1, +const bitset_container_t* container2); + +/** + * Return true if the two containers have the same content. + */ +bool run_container_equals_array(const run_container_t* container1, +const array_container_t* container2); +/** + * Return true if the two containers have the same content. + */ +bool run_container_equals_bitset(const run_container_t* container1, +const bitset_container_t* container2); + +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace internal { +#endif + +#endif /* CONTAINERS_MIXED_EQUAL_H_ */ +/* end file include/roaring/containers/mixed_equal.h */ +/* begin file include/roaring/containers/mixed_subset.h */ +/* + * mixed_subset.h + * + */ + +#ifndef CONTAINERS_MIXED_SUBSET_H_ +#define CONTAINERS_MIXED_SUBSET_H_ + + +#ifdef __cplusplus +extern "C" { namespace roaring { namespace internal { +#endif + +/** + * Return true if container1 is a subset of container2. + */ +bool array_container_is_subset_bitset(const array_container_t* container1, +const bitset_container_t* container2); + +/** +* Return true if container1 is a subset of container2. + */ +bool run_container_is_subset_array(const run_container_t* container1, +const array_container_t* container2); + +/** +* Return true if container1 is a subset of container2. + */ +bool array_container_is_subset_run(const array_container_t* container1, +const run_container_t* container2); + +/** +* Return true if container1 is a subset of container2. + */ +bool run_container_is_subset_bitset(const run_container_t* container1, +const bitset_container_t* container2); + +/** +* Return true if container1 is a subset of container2. +*/ +bool bitset_container_is_subset_run(const bitset_container_t* container1, +const run_container_t* container2); + +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace internal { +#endif + +#endif /* CONTAINERS_MIXED_SUBSET_H_ */ +/* end file include/roaring/containers/mixed_subset.h */ +/* begin file include/roaring/containers/mixed_andnot.h */ +/* + * mixed_andnot.h + */ +#ifndef INCLUDE_CONTAINERS_MIXED_ANDNOT_H_ +#define INCLUDE_CONTAINERS_MIXED_ANDNOT_H_ + + +#ifdef __cplusplus +extern "C" { namespace roaring { namespace internal { +#endif + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst, a valid array container that could be the same as dst.*/ +void array_bitset_container_andnot(const array_container_t *src_1, +const bitset_container_t *src_2, +array_container_t *dst); + +/* Compute the andnot of src_1 and src_2 and write the result to + * src_1 */ + +void array_bitset_container_iandnot(array_container_t *src_1, +const bitset_container_t *src_2); + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst, which does not initially have a valid container. + * Return true for a bitset result; false for array + */ + +bool bitset_array_container_andnot( +const bitset_container_t *src_1, const array_container_t *src_2, +container_t **dst); + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst (which has no container initially). It will modify src_1 + * to be dst if the result is a bitset. Otherwise, it will + * free src_1 and dst will be a new array container. In both + * cases, the caller is responsible for deallocating dst. + * Returns true iff dst is a bitset */ + +bool bitset_array_container_iandnot( +bitset_container_t *src_1, const array_container_t *src_2, +container_t **dst); + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst. Result may be either a bitset or an array container + * (returns "result is bitset"). dst does not initially have + * any container, but becomes either a bitset container (return + * result true) or an array container. + */ + +bool run_bitset_container_andnot( +const run_container_t *src_1, const bitset_container_t *src_2, +container_t **dst); + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst. Result may be either a bitset or an array container + * (returns "result is bitset"). dst does not initially have + * any container, but becomes either a bitset container (return + * result true) or an array container. + */ + +bool run_bitset_container_iandnot( +run_container_t *src_1, const bitset_container_t *src_2, +container_t **dst); + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst. Result may be either a bitset or an array container + * (returns "result is bitset"). dst does not initially have + * any container, but becomes either a bitset container (return + * result true) or an array container. + */ + +bool bitset_run_container_andnot( +const bitset_container_t *src_1, const run_container_t *src_2, +container_t **dst); + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst (which has no container initially). It will modify src_1 + * to be dst if the result is a bitset. Otherwise, it will + * free src_1 and dst will be a new array container. In both + * cases, the caller is responsible for deallocating dst. + * Returns true iff dst is a bitset */ + +bool bitset_run_container_iandnot( +bitset_container_t *src_1, const run_container_t *src_2, +container_t **dst); + +/* dst does not indicate a valid container initially. Eventually it + * can become any type of container. + */ + +int run_array_container_andnot( +const run_container_t *src_1, const array_container_t *src_2, +container_t **dst); + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst (which has no container initially). It will modify src_1 + * to be dst if the result is a bitset. Otherwise, it will + * free src_1 and dst will be a new array container. In both + * cases, the caller is responsible for deallocating dst. + * Returns true iff dst is a bitset */ + +int run_array_container_iandnot( +run_container_t *src_1, const array_container_t *src_2, +container_t **dst); + +/* dst must be a valid array container, allowed to be src_1 */ + +void array_run_container_andnot(const array_container_t *src_1, +const run_container_t *src_2, +array_container_t *dst); + +/* dst does not indicate a valid container initially. Eventually it + * can become any kind of container. + */ + +void array_run_container_iandnot(array_container_t *src_1, +const run_container_t *src_2); + +/* dst does not indicate a valid container initially. Eventually it + * can become any kind of container. + */ + +int run_run_container_andnot( +const run_container_t *src_1, const run_container_t *src_2, +container_t **dst); + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst (which has no container initially). It will modify src_1 + * to be dst if the result is a bitset. Otherwise, it will + * free src_1 and dst will be a new array container. In both + * cases, the caller is responsible for deallocating dst. + * Returns true iff dst is a bitset */ + +int run_run_container_iandnot( +run_container_t *src_1, const run_container_t *src_2, +container_t **dst); + +/* + * dst is a valid array container and may be the same as src_1 + */ + +void array_array_container_andnot(const array_container_t *src_1, +const array_container_t *src_2, +array_container_t *dst); + +/* inplace array-array andnot will always be able to reuse the space of + * src_1 */ +void array_array_container_iandnot(array_container_t *src_1, +const array_container_t *src_2); + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst (which has no container initially). Return value is + * "dst is a bitset" + */ + +bool bitset_bitset_container_andnot( +const bitset_container_t *src_1, const bitset_container_t *src_2, +container_t **dst); + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst (which has no container initially). It will modify src_1 + * to be dst if the result is a bitset. Otherwise, it will + * free src_1 and dst will be a new array container. In both + * cases, the caller is responsible for deallocating dst. + * Returns true iff dst is a bitset */ + +bool bitset_bitset_container_iandnot( +bitset_container_t *src_1, const bitset_container_t *src_2, +container_t **dst); + +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace internal { +#endif + +#endif +/* end file include/roaring/containers/mixed_andnot.h */ +/* begin file include/roaring/containers/mixed_intersection.h */ +/* + * mixed_intersection.h + * + */ + +#ifndef INCLUDE_CONTAINERS_MIXED_INTERSECTION_H_ +#define INCLUDE_CONTAINERS_MIXED_INTERSECTION_H_ + +/* These functions appear to exclude cases where the + * inputs have the same type and the output is guaranteed + * to have the same type as the inputs. Eg, array intersection + */ + + +#ifdef __cplusplus +extern "C" { namespace roaring { namespace internal { +#endif + +/* Compute the intersection of src_1 and src_2 and write the result to + * dst. It is allowed for dst to be equal to src_1. We assume that dst is a + * valid container. */ +void array_bitset_container_intersection(const array_container_t *src_1, +const bitset_container_t *src_2, +array_container_t *dst); + +/* Compute the size of the intersection of src_1 and src_2. */ +int array_bitset_container_intersection_cardinality( +const array_container_t *src_1, const bitset_container_t *src_2); + + + +/* Checking whether src_1 and src_2 intersect. */ +bool array_bitset_container_intersect(const array_container_t *src_1, +const bitset_container_t *src_2); + +/* + * Compute the intersection between src_1 and src_2 and write the result + * to *dst. If the return function is true, the result is a bitset_container_t + * otherwise is a array_container_t. We assume that dst is not pre-allocated. In + * case of failure, *dst will be NULL. + */ +bool bitset_bitset_container_intersection(const bitset_container_t *src_1, +const bitset_container_t *src_2, +container_t **dst); + +/* Compute the intersection between src_1 and src_2 and write the result to + * dst. It is allowed for dst to be equal to src_1. We assume that dst is a + * valid container. */ +void array_run_container_intersection(const array_container_t *src_1, +const run_container_t *src_2, +array_container_t *dst); + +/* Compute the intersection between src_1 and src_2 and write the result to + * *dst. If the result is true then the result is a bitset_container_t + * otherwise is a array_container_t. + * If *dst == src_2, then an in-place intersection is attempted + **/ +bool run_bitset_container_intersection(const run_container_t *src_1, +const bitset_container_t *src_2, +container_t **dst); + +/* Compute the size of the intersection between src_1 and src_2 . */ +int array_run_container_intersection_cardinality(const array_container_t *src_1, +const run_container_t *src_2); + +/* Compute the size of the intersection between src_1 and src_2 + **/ +int run_bitset_container_intersection_cardinality(const run_container_t *src_1, +const bitset_container_t *src_2); + + +/* Check that src_1 and src_2 intersect. */ +bool array_run_container_intersect(const array_container_t *src_1, +const run_container_t *src_2); + +/* Check that src_1 and src_2 intersect. + **/ +bool run_bitset_container_intersect(const run_container_t *src_1, +const bitset_container_t *src_2); + +/* + * Same as bitset_bitset_container_intersection except that if the output is to + * be a + * bitset_container_t, then src_1 is modified and no allocation is made. + * If the output is to be an array_container_t, then caller is responsible + * to free the container. + * In all cases, the result is in *dst. + */ +bool bitset_bitset_container_intersection_inplace( +bitset_container_t *src_1, const bitset_container_t *src_2, +container_t **dst); + +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace internal { +#endif + +#endif /* INCLUDE_CONTAINERS_MIXED_INTERSECTION_H_ */ +/* end file include/roaring/containers/mixed_intersection.h */ +/* begin file include/roaring/containers/mixed_negation.h */ +/* + * mixed_negation.h + * + */ + +#ifndef INCLUDE_CONTAINERS_MIXED_NEGATION_H_ +#define INCLUDE_CONTAINERS_MIXED_NEGATION_H_ + + +#ifdef __cplusplus +extern "C" { namespace roaring { namespace internal { +#endif + +/* Negation across the entire range of the container. + * Compute the negation of src and write the result + * to *dst. The complement of a + * sufficiently sparse set will always be dense and a hence a bitmap + * We assume that dst is pre-allocated and a valid bitset container + * There can be no in-place version. + */ +void array_container_negation(const array_container_t *src, +bitset_container_t *dst); + +/* Negation across the entire range of the container + * Compute the negation of src and write the result + * to *dst. A true return value indicates a bitset result, + * otherwise the result is an array container. + * We assume that dst is not pre-allocated. In + * case of failure, *dst will be NULL. + */ +bool bitset_container_negation( +const bitset_container_t *src, +container_t **dst); + +/* inplace version */ +/* + * Same as bitset_container_negation except that if the output is to + * be a + * bitset_container_t, then src is modified and no allocation is made. + * If the output is to be an array_container_t, then caller is responsible + * to free the container. + * In all cases, the result is in *dst. + */ +bool bitset_container_negation_inplace( +bitset_container_t *src, +container_t **dst); + +/* Negation across the entire range of container + * Compute the negation of src and write the result + * to *dst. + * Return values are the *_TYPECODES as defined * in containers.h + * We assume that dst is not pre-allocated. In + * case of failure, *dst will be NULL. + */ +int run_container_negation(const run_container_t *src, container_t **dst); + +/* + * Same as run_container_negation except that if the output is to + * be a + * run_container_t, and has the capacity to hold the result, + * then src is modified and no allocation is made. + * In all cases, the result is in *dst. + */ +int run_container_negation_inplace(run_container_t *src, container_t **dst); + +/* Negation across a range of the container. + * Compute the negation of src and write the result + * to *dst. Returns true if the result is a bitset container + * and false for an array container. *dst is not preallocated. + */ +bool array_container_negation_range( +const array_container_t *src, +const int range_start, const int range_end, +container_t **dst); + +/* Even when the result would fit, it is unclear how to make an + * inplace version without inefficient copying. Thus this routine + * may be a wrapper for the non-in-place version + */ +bool array_container_negation_range_inplace( +array_container_t *src, +const int range_start, const int range_end, +container_t **dst); + +/* Negation across a range of the container + * Compute the negation of src and write the result + * to *dst. A true return value indicates a bitset result, + * otherwise the result is an array container. + * We assume that dst is not pre-allocated. In + * case of failure, *dst will be NULL. + */ +bool bitset_container_negation_range( +const bitset_container_t *src, +const int range_start, const int range_end, +container_t **dst); + +/* inplace version */ +/* + * Same as bitset_container_negation except that if the output is to + * be a + * bitset_container_t, then src is modified and no allocation is made. + * If the output is to be an array_container_t, then caller is responsible + * to free the container. + * In all cases, the result is in *dst. + */ +bool bitset_container_negation_range_inplace( +bitset_container_t *src, +const int range_start, const int range_end, +container_t **dst); + +/* Negation across a range of container + * Compute the negation of src and write the result + * to *dst. Return values are the *_TYPECODES as defined * in containers.h + * We assume that dst is not pre-allocated. In + * case of failure, *dst will be NULL. + */ +int run_container_negation_range( +const run_container_t *src, +const int range_start, const int range_end, +container_t **dst); + +/* + * Same as run_container_negation except that if the output is to + * be a + * run_container_t, and has the capacity to hold the result, + * then src is modified and no allocation is made. + * In all cases, the result is in *dst. + */ +int run_container_negation_range_inplace( +run_container_t *src, +const int range_start, const int range_end, +container_t **dst); + +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace internal { +#endif + +#endif /* INCLUDE_CONTAINERS_MIXED_NEGATION_H_ */ +/* end file include/roaring/containers/mixed_negation.h */ +/* begin file include/roaring/containers/mixed_union.h */ +/* + * mixed_intersection.h + * + */ + +#ifndef INCLUDE_CONTAINERS_MIXED_UNION_H_ +#define INCLUDE_CONTAINERS_MIXED_UNION_H_ + +/* These functions appear to exclude cases where the + * inputs have the same type and the output is guaranteed + * to have the same type as the inputs. Eg, bitset unions + */ + + +#ifdef __cplusplus +extern "C" { namespace roaring { namespace internal { +#endif + +/* Compute the union of src_1 and src_2 and write the result to + * dst. It is allowed for src_2 to be dst. */ +void array_bitset_container_union(const array_container_t *src_1, +const bitset_container_t *src_2, +bitset_container_t *dst); + +/* Compute the union of src_1 and src_2 and write the result to + * dst. It is allowed for src_2 to be dst. This version does not + * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). */ +void array_bitset_container_lazy_union(const array_container_t *src_1, +const bitset_container_t *src_2, +bitset_container_t *dst); + +/* + * Compute the union between src_1 and src_2 and write the result + * to *dst. If the return function is true, the result is a bitset_container_t + * otherwise is a array_container_t. We assume that dst is not pre-allocated. In + * case of failure, *dst will be NULL. + */ +bool array_array_container_union( +const array_container_t *src_1, const array_container_t *src_2, +container_t **dst); + +/* + * Compute the union between src_1 and src_2 and write the result + * to *dst if it cannot be written to src_1. If the return function is true, + * the result is a bitset_container_t + * otherwise is a array_container_t. When the result is an array_container_t, it + * it either written to src_1 (if *dst is null) or to *dst. + * If the result is a bitset_container_t and *dst is null, then there was a failure. + */ +bool array_array_container_inplace_union( +array_container_t *src_1, const array_container_t *src_2, +container_t **dst); + +/* + * Same as array_array_container_union except that it will more eagerly produce + * a bitset. + */ +bool array_array_container_lazy_union( +const array_container_t *src_1, const array_container_t *src_2, +container_t **dst); + +/* + * Same as array_array_container_inplace_union except that it will more eagerly produce + * a bitset. + */ +bool array_array_container_lazy_inplace_union( +array_container_t *src_1, const array_container_t *src_2, +container_t **dst); + +/* Compute the union of src_1 and src_2 and write the result to + * dst. We assume that dst is a + * valid container. The result might need to be further converted to array or + * bitset container, + * the caller is responsible for the eventual conversion. */ +void array_run_container_union(const array_container_t *src_1, +const run_container_t *src_2, +run_container_t *dst); + +/* Compute the union of src_1 and src_2 and write the result to + * src2. The result might need to be further converted to array or + * bitset container, + * the caller is responsible for the eventual conversion. */ +void array_run_container_inplace_union(const array_container_t *src_1, +run_container_t *src_2); + +/* Compute the union of src_1 and src_2 and write the result to + * dst. It is allowed for dst to be src_2. + * If run_container_is_full(src_1) is true, you must not be calling this + *function. + **/ +void run_bitset_container_union(const run_container_t *src_1, +const bitset_container_t *src_2, +bitset_container_t *dst); + +/* Compute the union of src_1 and src_2 and write the result to + * dst. It is allowed for dst to be src_2. This version does not + * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). + * If run_container_is_full(src_1) is true, you must not be calling this + * function. + * */ +void run_bitset_container_lazy_union(const run_container_t *src_1, +const bitset_container_t *src_2, +bitset_container_t *dst); + +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace internal { +#endif + +#endif /* INCLUDE_CONTAINERS_MIXED_UNION_H_ */ +/* end file include/roaring/containers/mixed_union.h */ +/* begin file include/roaring/containers/mixed_xor.h */ +/* + * mixed_xor.h + * + */ + +#ifndef INCLUDE_CONTAINERS_MIXED_XOR_H_ +#define INCLUDE_CONTAINERS_MIXED_XOR_H_ + +/* These functions appear to exclude cases where the + * inputs have the same type and the output is guaranteed + * to have the same type as the inputs. Eg, bitset unions + */ + +/* + * Java implementation (as of May 2016) for array_run, run_run + * and bitset_run don't do anything different for inplace. + * (They are not truly in place.) + */ + + + +#ifdef __cplusplus +extern "C" { namespace roaring { namespace internal { +#endif + +/* Compute the xor of src_1 and src_2 and write the result to + * dst (which has no container initially). + * Result is true iff dst is a bitset */ +bool array_bitset_container_xor( +const array_container_t *src_1, const bitset_container_t *src_2, +container_t **dst); + +/* Compute the xor of src_1 and src_2 and write the result to + * dst. It is allowed for src_2 to be dst. This version does not + * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). + */ + +void array_bitset_container_lazy_xor(const array_container_t *src_1, +const bitset_container_t *src_2, +bitset_container_t *dst); +/* Compute the xor of src_1 and src_2 and write the result to + * dst (which has no container initially). Return value is + * "dst is a bitset" + */ + +bool bitset_bitset_container_xor( +const bitset_container_t *src_1, const bitset_container_t *src_2, +container_t **dst); + +/* Compute the xor of src_1 and src_2 and write the result to + * dst. Result may be either a bitset or an array container + * (returns "result is bitset"). dst does not initially have + * any container, but becomes either a bitset container (return + * result true) or an array container. + */ + +bool run_bitset_container_xor( +const run_container_t *src_1, const bitset_container_t *src_2, +container_t **dst); + +/* lazy xor. Dst is initialized and may be equal to src_2. + * Result is left as a bitset container, even if actual + * cardinality would dictate an array container. + */ + +void run_bitset_container_lazy_xor(const run_container_t *src_1, +const bitset_container_t *src_2, +bitset_container_t *dst); + +/* dst does not indicate a valid container initially. Eventually it + * can become any kind of container. + */ + +int array_run_container_xor( +const array_container_t *src_1, const run_container_t *src_2, +container_t **dst); + +/* dst does not initially have a valid container. Creates either + * an array or a bitset container, indicated by return code + */ + +bool array_array_container_xor( +const array_container_t *src_1, const array_container_t *src_2, +container_t **dst); + +/* dst does not initially have a valid container. Creates either + * an array or a bitset container, indicated by return code. + * A bitset container will not have a valid cardinality and the + * container type might not be correct for the actual cardinality + */ + +bool array_array_container_lazy_xor( +const array_container_t *src_1, const array_container_t *src_2, +container_t **dst); + +/* Dst is a valid run container. (Can it be src_2? Let's say not.) + * Leaves result as run container, even if other options are + * smaller. + */ + +void array_run_container_lazy_xor(const array_container_t *src_1, +const run_container_t *src_2, +run_container_t *dst); + +/* dst does not indicate a valid container initially. Eventually it + * can become any kind of container. + */ + +int run_run_container_xor( +const run_container_t *src_1, const run_container_t *src_2, +container_t **dst); + +/* INPLACE versions (initial implementation may not exploit all inplace + * opportunities (if any...) + */ + +/* Compute the xor of src_1 and src_2 and write the result to + * dst (which has no container initially). It will modify src_1 + * to be dst if the result is a bitset. Otherwise, it will + * free src_1 and dst will be a new array container. In both + * cases, the caller is responsible for deallocating dst. + * Returns true iff dst is a bitset */ + +bool bitset_array_container_ixor( +bitset_container_t *src_1, const array_container_t *src_2, +container_t **dst); + +bool bitset_bitset_container_ixor( +bitset_container_t *src_1, const bitset_container_t *src_2, +container_t **dst); + +bool array_bitset_container_ixor( +array_container_t *src_1, const bitset_container_t *src_2, +container_t **dst); + +/* Compute the xor of src_1 and src_2 and write the result to + * dst. Result may be either a bitset or an array container + * (returns "result is bitset"). dst does not initially have + * any container, but becomes either a bitset container (return + * result true) or an array container. + */ + +bool run_bitset_container_ixor( +run_container_t *src_1, const bitset_container_t *src_2, +container_t **dst); + +bool bitset_run_container_ixor( +bitset_container_t *src_1, const run_container_t *src_2, +container_t **dst); + +/* dst does not indicate a valid container initially. Eventually it + * can become any kind of container. + */ + +int array_run_container_ixor( +array_container_t *src_1, const run_container_t *src_2, +container_t **dst); + +int run_array_container_ixor( +run_container_t *src_1, const array_container_t *src_2, +container_t **dst); + +bool array_array_container_ixor( +array_container_t *src_1, const array_container_t *src_2, +container_t **dst); + +int run_run_container_ixor( +run_container_t *src_1, const run_container_t *src_2, +container_t **dst); + +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace internal { +#endif + +#endif +/* end file include/roaring/containers/mixed_xor.h */ +/* begin file include/roaring/containers/containers.h */ +#ifndef CONTAINERS_CONTAINERS_H +#define CONTAINERS_CONTAINERS_H + +#include +#include +#include + + +#ifdef __cplusplus +extern "C" { namespace roaring { namespace internal { +#endif + +// would enum be possible or better? + +/** + * The switch case statements follow + * BITSET_CONTAINER_TYPE -- ARRAY_CONTAINER_TYPE -- RUN_CONTAINER_TYPE + * so it makes more sense to number them 1, 2, 3 (in the vague hope that the + * compiler might exploit this ordering). + */ + +#define BITSET_CONTAINER_TYPE 1 +#define ARRAY_CONTAINER_TYPE 2 +#define RUN_CONTAINER_TYPE 3 +#define SHARED_CONTAINER_TYPE 4 + +/** + * Macros for pairing container type codes, suitable for switch statements. + * Use PAIR_CONTAINER_TYPES() for the switch, CONTAINER_PAIR() for the cases: + * + * switch (PAIR_CONTAINER_TYPES(type1, type2)) { + * case CONTAINER_PAIR(BITSET,ARRAY): + * ... + * } + */ +#define PAIR_CONTAINER_TYPES(type1,type2) \ + (4 * (type1) + (type2)) + +#define CONTAINER_PAIR(name1,name2) \ + (4 * (name1##_CONTAINER_TYPE) + (name2##_CONTAINER_TYPE)) + +/** + * A shared container is a wrapper around a container + * with reference counting. + */ +STRUCT_CONTAINER(shared_container_s) { +container_t *container; +uint8_t typecode; +croaring_refcount_t counter; // to be managed atomically +}; + +typedef struct shared_container_s shared_container_t; + +#define CAST_shared(c) CAST(shared_container_t *, c) // safer downcast +#define const_CAST_shared(c) CAST(const shared_container_t *, c) +#define movable_CAST_shared(c) movable_CAST(shared_container_t **, c) + +/* + * With copy_on_write = true + * Create a new shared container if the typecode is not SHARED_CONTAINER_TYPE, + * otherwise, increase the count + * If copy_on_write = false, then clone. + * Return NULL in case of failure. + **/ +container_t *get_copy_of_container(container_t *container, uint8_t *typecode, +bool copy_on_write); + +/* Frees a shared container (actually decrement its counter and only frees when + * the counter falls to zero). */ +void shared_container_free(shared_container_t *container); + +/* extract a copy from the shared container, freeing the shared container if +there is just one instance left, +clone instances when the counter is higher than one +*/ +container_t *shared_container_extract_copy(shared_container_t *container, +uint8_t *typecode); + +/* access to container underneath */ +static inline const container_t *container_unwrap_shared( +const container_t *candidate_shared_container, uint8_t *type +){ +if (*type == SHARED_CONTAINER_TYPE) { +*type = const_CAST_shared(candidate_shared_container)->typecode; +assert(*type != SHARED_CONTAINER_TYPE); +return const_CAST_shared(candidate_shared_container)->container; +} else { +return candidate_shared_container; +} +} + + +/* access to container underneath */ +static inline container_t *container_mutable_unwrap_shared( +container_t *c, uint8_t *type +) { +if (*type == SHARED_CONTAINER_TYPE) { // the passed in container is shared +*type = CAST_shared(c)->typecode; +assert(*type != SHARED_CONTAINER_TYPE); +return CAST_shared(c)->container; // return the enclosed container +} else { +return c; // wasn't shared, so return as-is +} +} + +/* access to container underneath and queries its type */ +static inline uint8_t get_container_type( +const container_t *c, uint8_t type +){ +if (type == SHARED_CONTAINER_TYPE) { +return const_CAST_shared(c)->typecode; +} else { +return type; +} +} + +/** + * Copies a container, requires a typecode. This allocates new memory, caller + * is responsible for deallocation. If the container is not shared, then it is + * physically cloned. Sharable containers are not cloneable. + */ +container_t *container_clone(const container_t *container, uint8_t typecode); + +/* access to container underneath, cloning it if needed */ +static inline container_t *get_writable_copy_if_shared( +container_t *c, uint8_t *type +){ +if (*type == SHARED_CONTAINER_TYPE) { // shared, return enclosed container +return shared_container_extract_copy(CAST_shared(c), type); +} else { +return c; // not shared, so return as-is +} +} + +/** + * End of shared container code + */ + +static const char *container_names[] = {"bitset", "array", "run", "shared"}; +static const char *shared_container_names[] = { +"bitset (shared)", "array (shared)", "run (shared)"}; + +// no matter what the initial container was, convert it to a bitset +// if a new container is produced, caller responsible for freeing the previous +// one +// container should not be a shared container +static inline bitset_container_t *container_to_bitset( +container_t *c, uint8_t typecode +){ +bitset_container_t *result = NULL; +switch (typecode) { +case BITSET_CONTAINER_TYPE: +return CAST_bitset(c); // nothing to do +case ARRAY_CONTAINER_TYPE: +result = bitset_container_from_array(CAST_array(c)); +return result; +case RUN_CONTAINER_TYPE: +result = bitset_container_from_run(CAST_run(c)); +return result; +case SHARED_CONTAINER_TYPE: +assert(false); +roaring_unreachable; +} +assert(false); +roaring_unreachable; +return 0; // unreached +} + +/** + * Get the container name from the typecode + * (unused at time of writing) + */ +/*static inline const char *get_container_name(uint8_t typecode) { + switch (typecode) { + case BITSET_CONTAINER_TYPE: + return container_names[0]; + case ARRAY_CONTAINER_TYPE: + return container_names[1]; + case RUN_CONTAINER_TYPE: + return container_names[2]; + case SHARED_CONTAINER_TYPE: + return container_names[3]; + default: + assert(false); + roaring_unreachable; + return "unknown"; + } +}*/ + +static inline const char *get_full_container_name( +const container_t *c, uint8_t typecode +){ +switch (typecode) { +case BITSET_CONTAINER_TYPE: +return container_names[0]; +case ARRAY_CONTAINER_TYPE: +return container_names[1]; +case RUN_CONTAINER_TYPE: +return container_names[2]; +case SHARED_CONTAINER_TYPE: +switch (const_CAST_shared(c)->typecode) { +case BITSET_CONTAINER_TYPE: +return shared_container_names[0]; +case ARRAY_CONTAINER_TYPE: +return shared_container_names[1]; +case RUN_CONTAINER_TYPE: +return shared_container_names[2]; +default: +assert(false); +roaring_unreachable; +return "unknown"; +} +break; +default: +assert(false); +roaring_unreachable; +return "unknown"; +} +roaring_unreachable; +return NULL; +} + +/** + * Get the container cardinality (number of elements), requires a typecode + */ +static inline int container_get_cardinality( +const container_t *c, uint8_t typecode +){ +c = container_unwrap_shared(c, &typecode); +switch (typecode) { +case BITSET_CONTAINER_TYPE: +return bitset_container_cardinality(const_CAST_bitset(c)); +case ARRAY_CONTAINER_TYPE: +return array_container_cardinality(const_CAST_array(c)); +case RUN_CONTAINER_TYPE: +return run_container_cardinality(const_CAST_run(c)); +} +assert(false); +roaring_unreachable; +return 0; // unreached +} + + + +// returns true if a container is known to be full. Note that a lazy bitset +// container +// might be full without us knowing +static inline bool container_is_full(const container_t *c, uint8_t typecode) { +c = container_unwrap_shared(c, &typecode); +switch (typecode) { +case BITSET_CONTAINER_TYPE: +return bitset_container_cardinality( +const_CAST_bitset(c)) == (1 << 16); +case ARRAY_CONTAINER_TYPE: +return array_container_cardinality( +const_CAST_array(c)) == (1 << 16); +case RUN_CONTAINER_TYPE: +return run_container_is_full(const_CAST_run(c)); +} +assert(false); +roaring_unreachable; +return 0; // unreached +} + +static inline int container_shrink_to_fit( +container_t *c, uint8_t type +){ +c = container_mutable_unwrap_shared(c, &type); +switch (type) { +case BITSET_CONTAINER_TYPE: +return 0; // no shrinking possible +case ARRAY_CONTAINER_TYPE: +return array_container_shrink_to_fit(CAST_array(c)); +case RUN_CONTAINER_TYPE: +return run_container_shrink_to_fit(CAST_run(c)); +} +assert(false); +roaring_unreachable; +return 0; // unreached +} + + +/** + * make a container with a run of ones + */ +/* initially always use a run container, even if an array might be + * marginally + * smaller */ +static inline container_t *container_range_of_ones( +uint32_t range_start, uint32_t range_end, +uint8_t *result_type +){ +assert(range_end >= range_start); +uint64_t cardinality = range_end - range_start + 1; +if(cardinality <= 2) { +*result_type = ARRAY_CONTAINER_TYPE; +return array_container_create_range(range_start, range_end); +} else { +*result_type = RUN_CONTAINER_TYPE; +return run_container_create_range(range_start, range_end); +} +} + + +/* Create a container with all the values between in [min,max) at a + distance k*step from min. */ +static inline container_t *container_from_range( +uint8_t *type, uint32_t min, +uint32_t max, uint16_t step +){ +if (step == 0) return NULL; // being paranoid +if (step == 1) { +return container_range_of_ones(min,max,type); +// Note: the result is not always a run (need to check the cardinality) +//*type = RUN_CONTAINER_TYPE; +//return run_container_create_range(min, max); +} +int size = (max - min + step - 1) / step; +if (size <= DEFAULT_MAX_SIZE) { // array container +*type = ARRAY_CONTAINER_TYPE; +array_container_t *array = array_container_create_given_capacity(size); +array_container_add_from_range(array, min, max, step); +assert(array->cardinality == size); +return array; +} else { // bitset container +*type = BITSET_CONTAINER_TYPE; +bitset_container_t *bitset = bitset_container_create(); +bitset_container_add_from_range(bitset, min, max, step); +assert(bitset->cardinality == size); +return bitset; +} +} + +/** + * "repair" the container after lazy operations. + */ +static inline container_t *container_repair_after_lazy( +container_t *c, uint8_t *type +){ +c = get_writable_copy_if_shared(c, type); // !!! unnecessary cloning +container_t *result = NULL; +switch (*type) { +case BITSET_CONTAINER_TYPE: { +bitset_container_t *bc = CAST_bitset(c); +bc->cardinality = bitset_container_compute_cardinality(bc); +if (bc->cardinality <= DEFAULT_MAX_SIZE) { +result = array_container_from_bitset(bc); +bitset_container_free(bc); +*type = ARRAY_CONTAINER_TYPE; +return result; +} +return c; } +case ARRAY_CONTAINER_TYPE: +return c; // nothing to do +case RUN_CONTAINER_TYPE: +return convert_run_to_efficient_container_and_free( +CAST_run(c), type); +case SHARED_CONTAINER_TYPE: +assert(false); +} +assert(false); +roaring_unreachable; +return 0; // unreached +} + +/** + * Writes the underlying array to buf, outputs how many bytes were written. + * This is meant to be byte-by-byte compatible with the Java and Go versions of + * Roaring. + * The number of bytes written should be + * container_write(container, buf). + * + */ +static inline int32_t container_write( +const container_t *c, uint8_t typecode, +char *buf +){ +c = container_unwrap_shared(c, &typecode); +switch (typecode) { +case BITSET_CONTAINER_TYPE: +return bitset_container_write(const_CAST_bitset(c), buf); +case ARRAY_CONTAINER_TYPE: +return array_container_write(const_CAST_array(c), buf); +case RUN_CONTAINER_TYPE: +return run_container_write(const_CAST_run(c), buf); +} +assert(false); +roaring_unreachable; +return 0; // unreached +} + +/** + * Get the container size in bytes under portable serialization (see + * container_write), requires a + * typecode + */ +static inline int32_t container_size_in_bytes( +const container_t *c, uint8_t typecode +){ +c = container_unwrap_shared(c, &typecode); +switch (typecode) { +case BITSET_CONTAINER_TYPE: +return bitset_container_size_in_bytes(const_CAST_bitset(c)); +case ARRAY_CONTAINER_TYPE: +return array_container_size_in_bytes(const_CAST_array(c)); +case RUN_CONTAINER_TYPE: +return run_container_size_in_bytes(const_CAST_run(c)); +} +assert(false); +roaring_unreachable; +return 0; // unreached +} + +/** + * print the container (useful for debugging), requires a typecode + */ +void container_printf(const container_t *container, uint8_t typecode); + +/** + * print the content of the container as a comma-separated list of 32-bit values + * starting at base, requires a typecode + */ +void container_printf_as_uint32_array(const container_t *container, +uint8_t typecode, uint32_t base); + +bool container_internal_validate(const container_t *container, +uint8_t typecode, const char **reason); + +/** + * Checks whether a container is not empty, requires a typecode + */ +static inline bool container_nonzero_cardinality( +const container_t *c, uint8_t typecode +){ +c = container_unwrap_shared(c, &typecode); +switch (typecode) { +case BITSET_CONTAINER_TYPE: +return bitset_container_const_nonzero_cardinality( +const_CAST_bitset(c)); +case ARRAY_CONTAINER_TYPE: +return array_container_nonzero_cardinality(const_CAST_array(c)); +case RUN_CONTAINER_TYPE: +return run_container_nonzero_cardinality(const_CAST_run(c)); +} +assert(false); +roaring_unreachable; +return 0; // unreached +} + +/** + * Recover memory from a container, requires a typecode + */ +void container_free(container_t *container, uint8_t typecode); + +/** + * Convert a container to an array of values, requires a typecode as well as a + * "base" (most significant values) + * Returns number of ints added. + */ +static inline int container_to_uint32_array( +uint32_t *output, +const container_t *c, uint8_t typecode, +uint32_t base +){ +c = container_unwrap_shared(c, &typecode); +switch (typecode) { +case BITSET_CONTAINER_TYPE: +return bitset_container_to_uint32_array( +output, const_CAST_bitset(c), base); +case ARRAY_CONTAINER_TYPE: +return array_container_to_uint32_array( +output, const_CAST_array(c), base); +case RUN_CONTAINER_TYPE: +return run_container_to_uint32_array( +output, const_CAST_run(c), base); +} +assert(false); +roaring_unreachable; +return 0; // unreached +} + +/** + * Add a value to a container, requires a typecode, fills in new_typecode and + * return (possibly different) container. + * This function may allocate a new container, and caller is responsible for + * memory deallocation + */ +static inline container_t *container_add( +container_t *c, uint16_t val, +uint8_t typecode, // !!! should be second argument? +uint8_t *new_typecode +){ +c = get_writable_copy_if_shared(c, &typecode); +switch (typecode) { +case BITSET_CONTAINER_TYPE: +bitset_container_set(CAST_bitset(c), val); +*new_typecode = BITSET_CONTAINER_TYPE; +return c; +case ARRAY_CONTAINER_TYPE: { +array_container_t *ac = CAST_array(c); +if (array_container_try_add(ac, val, DEFAULT_MAX_SIZE) != -1) { +*new_typecode = ARRAY_CONTAINER_TYPE; +return ac; +} else { +bitset_container_t* bitset = bitset_container_from_array(ac); +bitset_container_add(bitset, val); +*new_typecode = BITSET_CONTAINER_TYPE; +return bitset; +} +} break; +case RUN_CONTAINER_TYPE: +// per Java, no container type adjustments are done (revisit?) +run_container_add(CAST_run(c), val); +*new_typecode = RUN_CONTAINER_TYPE; +return c; +default: +assert(false); +roaring_unreachable; +return NULL; +} +} + +/** + * Remove a value from a container, requires a typecode, fills in new_typecode + * and + * return (possibly different) container. + * This function may allocate a new container, and caller is responsible for + * memory deallocation + */ +static inline container_t *container_remove( +container_t *c, uint16_t val, +uint8_t typecode, // !!! should be second argument? +uint8_t *new_typecode +){ +c = get_writable_copy_if_shared(c, &typecode); +switch (typecode) { +case BITSET_CONTAINER_TYPE: +if (bitset_container_remove(CAST_bitset(c), val)) { +int card = bitset_container_cardinality(CAST_bitset(c)); +if (card <= DEFAULT_MAX_SIZE) { +*new_typecode = ARRAY_CONTAINER_TYPE; +return array_container_from_bitset(CAST_bitset(c)); +} +} +*new_typecode = typecode; +return c; +case ARRAY_CONTAINER_TYPE: +*new_typecode = typecode; +array_container_remove(CAST_array(c), val); +return c; +case RUN_CONTAINER_TYPE: +// per Java, no container type adjustments are done (revisit?) +run_container_remove(CAST_run(c), val); +*new_typecode = RUN_CONTAINER_TYPE; +return c; +default: +assert(false); +roaring_unreachable; +return NULL; +} +} + +/** + * Check whether a value is in a container, requires a typecode + */ +static inline bool container_contains( +const container_t *c, +uint16_t val, +uint8_t typecode // !!! should be second argument? +){ +c = container_unwrap_shared(c, &typecode); +switch (typecode) { +case BITSET_CONTAINER_TYPE: +return bitset_container_get(const_CAST_bitset(c), val); +case ARRAY_CONTAINER_TYPE: +return array_container_contains(const_CAST_array(c), val); +case RUN_CONTAINER_TYPE: +return run_container_contains(const_CAST_run(c), val); +default: +assert(false); +roaring_unreachable; +return false; +} +} + +/** + * Check whether a range of values from range_start (included) to range_end (excluded) + * is in a container, requires a typecode + */ +static inline bool container_contains_range( +const container_t *c, +uint32_t range_start, uint32_t range_end, +uint8_t typecode // !!! should be second argument? +){ +c = container_unwrap_shared(c, &typecode); +switch (typecode) { +case BITSET_CONTAINER_TYPE: +return bitset_container_get_range(const_CAST_bitset(c), +range_start, range_end); +case ARRAY_CONTAINER_TYPE: +return array_container_contains_range(const_CAST_array(c), +range_start, range_end); +case RUN_CONTAINER_TYPE: +return run_container_contains_range(const_CAST_run(c), +range_start, range_end); +default: +assert(false); +roaring_unreachable; +return false; +} +} + +/** + * Returns true if the two containers have the same content. Note that + * two containers having different types can be "equal" in this sense. + */ +static inline bool container_equals( +const container_t *c1, uint8_t type1, +const container_t *c2, uint8_t type2 +){ +c1 = container_unwrap_shared(c1, &type1); +c2 = container_unwrap_shared(c2, &type2); +switch (PAIR_CONTAINER_TYPES(type1, type2)) { +case CONTAINER_PAIR(BITSET,BITSET): +return bitset_container_equals(const_CAST_bitset(c1), +const_CAST_bitset(c2)); + +case CONTAINER_PAIR(BITSET,RUN): +return run_container_equals_bitset(const_CAST_run(c2), +const_CAST_bitset(c1)); + +case CONTAINER_PAIR(RUN,BITSET): +return run_container_equals_bitset(const_CAST_run(c1), +const_CAST_bitset(c2)); + +case CONTAINER_PAIR(BITSET,ARRAY): +// java would always return false? +return array_container_equal_bitset(const_CAST_array(c2), +const_CAST_bitset(c1)); + +case CONTAINER_PAIR(ARRAY,BITSET): +// java would always return false? +return array_container_equal_bitset(const_CAST_array(c1), +const_CAST_bitset(c2)); + +case CONTAINER_PAIR(ARRAY,RUN): +return run_container_equals_array(const_CAST_run(c2), +const_CAST_array(c1)); + +case CONTAINER_PAIR(RUN,ARRAY): +return run_container_equals_array(const_CAST_run(c1), +const_CAST_array(c2)); + +case CONTAINER_PAIR(ARRAY,ARRAY): +return array_container_equals(const_CAST_array(c1), +const_CAST_array(c2)); + +case CONTAINER_PAIR(RUN,RUN): +return run_container_equals(const_CAST_run(c1), +const_CAST_run(c2)); + +default: +assert(false); +roaring_unreachable; +return false; +} +} + +/** + * Returns true if the container c1 is a subset of the container c2. Note that + * c1 can be a subset of c2 even if they have a different type. + */ +static inline bool container_is_subset( +const container_t *c1, uint8_t type1, +const container_t *c2, uint8_t type2 +){ +c1 = container_unwrap_shared(c1, &type1); +c2 = container_unwrap_shared(c2, &type2); +switch (PAIR_CONTAINER_TYPES(type1, type2)) { +case CONTAINER_PAIR(BITSET,BITSET): +return bitset_container_is_subset(const_CAST_bitset(c1), +const_CAST_bitset(c2)); + +case CONTAINER_PAIR(BITSET,RUN): +return bitset_container_is_subset_run(const_CAST_bitset(c1), +const_CAST_run(c2)); + +case CONTAINER_PAIR(RUN,BITSET): +return run_container_is_subset_bitset(const_CAST_run(c1), +const_CAST_bitset(c2)); + +case CONTAINER_PAIR(BITSET,ARRAY): +return false; // by construction, size(c1) > size(c2) + +case CONTAINER_PAIR(ARRAY,BITSET): +return array_container_is_subset_bitset(const_CAST_array(c1), +const_CAST_bitset(c2)); + +case CONTAINER_PAIR(ARRAY,RUN): +return array_container_is_subset_run(const_CAST_array(c1), +const_CAST_run(c2)); + +case CONTAINER_PAIR(RUN,ARRAY): +return run_container_is_subset_array(const_CAST_run(c1), +const_CAST_array(c2)); + +case CONTAINER_PAIR(ARRAY,ARRAY): +return array_container_is_subset(const_CAST_array(c1), +const_CAST_array(c2)); + +case CONTAINER_PAIR(RUN,RUN): +return run_container_is_subset(const_CAST_run(c1), +const_CAST_run(c2)); + +default: +assert(false); +roaring_unreachable; +return false; +} +} + +// macro-izations possibilities for generic non-inplace binary-op dispatch + +/** + * Compute intersection between two containers, generate a new container (having + * type result_type), requires a typecode. This allocates new memory, caller + * is responsible for deallocation. + */ +static inline container_t *container_and( +const container_t *c1, uint8_t type1, +const container_t *c2, uint8_t type2, +uint8_t *result_type +){ +c1 = container_unwrap_shared(c1, &type1); +c2 = container_unwrap_shared(c2, &type2); +container_t *result = NULL; +switch (PAIR_CONTAINER_TYPES(type1, type2)) { +case CONTAINER_PAIR(BITSET,BITSET): +*result_type = bitset_bitset_container_intersection( +const_CAST_bitset(c1), +const_CAST_bitset(c2), &result) +? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(ARRAY,ARRAY): +result = array_container_create(); +array_container_intersection(const_CAST_array(c1), +const_CAST_array(c2), +CAST_array(result)); +*result_type = ARRAY_CONTAINER_TYPE; // never bitset +return result; + +case CONTAINER_PAIR(RUN,RUN): +result = run_container_create(); +run_container_intersection(const_CAST_run(c1), +const_CAST_run(c2), +CAST_run(result)); +return convert_run_to_efficient_container_and_free( +CAST_run(result), result_type); + +case CONTAINER_PAIR(BITSET,ARRAY): +result = array_container_create(); +array_bitset_container_intersection(const_CAST_array(c2), +const_CAST_bitset(c1), +CAST_array(result)); +*result_type = ARRAY_CONTAINER_TYPE; // never bitset +return result; + +case CONTAINER_PAIR(ARRAY,BITSET): +result = array_container_create(); +*result_type = ARRAY_CONTAINER_TYPE; // never bitset +array_bitset_container_intersection(const_CAST_array(c1), +const_CAST_bitset(c2), +CAST_array(result)); +return result; + +case CONTAINER_PAIR(BITSET,RUN): +*result_type = run_bitset_container_intersection( +const_CAST_run(c2), +const_CAST_bitset(c1), &result) +? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(RUN,BITSET): +*result_type = run_bitset_container_intersection( +const_CAST_run(c1), +const_CAST_bitset(c2), &result) +? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(ARRAY,RUN): +result = array_container_create(); +*result_type = ARRAY_CONTAINER_TYPE; // never bitset +array_run_container_intersection(const_CAST_array(c1), +const_CAST_run(c2), +CAST_array(result)); +return result; + +case CONTAINER_PAIR(RUN,ARRAY): +result = array_container_create(); +*result_type = ARRAY_CONTAINER_TYPE; // never bitset +array_run_container_intersection(const_CAST_array(c2), +const_CAST_run(c1), +CAST_array(result)); +return result; + +default: +assert(false); +roaring_unreachable; +return NULL; +} +} + +/** + * Compute the size of the intersection between two containers. + */ +static inline int container_and_cardinality( +const container_t *c1, uint8_t type1, +const container_t *c2, uint8_t type2 +){ +c1 = container_unwrap_shared(c1, &type1); +c2 = container_unwrap_shared(c2, &type2); +switch (PAIR_CONTAINER_TYPES(type1, type2)) { +case CONTAINER_PAIR(BITSET,BITSET): +return bitset_container_and_justcard( +const_CAST_bitset(c1), const_CAST_bitset(c2)); + +case CONTAINER_PAIR(ARRAY,ARRAY): +return array_container_intersection_cardinality( +const_CAST_array(c1), const_CAST_array(c2)); + +case CONTAINER_PAIR(RUN,RUN): +return run_container_intersection_cardinality( +const_CAST_run(c1), const_CAST_run(c2)); + +case CONTAINER_PAIR(BITSET,ARRAY): +return array_bitset_container_intersection_cardinality( +const_CAST_array(c2), const_CAST_bitset(c1)); + +case CONTAINER_PAIR(ARRAY,BITSET): +return array_bitset_container_intersection_cardinality( +const_CAST_array(c1), const_CAST_bitset(c2)); + +case CONTAINER_PAIR(BITSET,RUN): +return run_bitset_container_intersection_cardinality( +const_CAST_run(c2), const_CAST_bitset(c1)); + +case CONTAINER_PAIR(RUN,BITSET): +return run_bitset_container_intersection_cardinality( +const_CAST_run(c1), const_CAST_bitset(c2)); + +case CONTAINER_PAIR(ARRAY,RUN): +return array_run_container_intersection_cardinality( +const_CAST_array(c1), const_CAST_run(c2)); + +case CONTAINER_PAIR(RUN,ARRAY): +return array_run_container_intersection_cardinality( +const_CAST_array(c2), const_CAST_run(c1)); + +default: +assert(false); +roaring_unreachable; +return 0; +} +} + +/** + * Check whether two containers intersect. + */ +static inline bool container_intersect( +const container_t *c1, uint8_t type1, +const container_t *c2, uint8_t type2 +){ +c1 = container_unwrap_shared(c1, &type1); +c2 = container_unwrap_shared(c2, &type2); +switch (PAIR_CONTAINER_TYPES(type1, type2)) { +case CONTAINER_PAIR(BITSET,BITSET): +return bitset_container_intersect(const_CAST_bitset(c1), +const_CAST_bitset(c2)); + +case CONTAINER_PAIR(ARRAY,ARRAY): +return array_container_intersect(const_CAST_array(c1), +const_CAST_array(c2)); + +case CONTAINER_PAIR(RUN,RUN): +return run_container_intersect(const_CAST_run(c1), +const_CAST_run(c2)); + +case CONTAINER_PAIR(BITSET,ARRAY): +return array_bitset_container_intersect(const_CAST_array(c2), +const_CAST_bitset(c1)); + +case CONTAINER_PAIR(ARRAY,BITSET): +return array_bitset_container_intersect(const_CAST_array(c1), +const_CAST_bitset(c2)); + +case CONTAINER_PAIR(BITSET,RUN): +return run_bitset_container_intersect(const_CAST_run(c2), +const_CAST_bitset(c1)); + +case CONTAINER_PAIR(RUN,BITSET): +return run_bitset_container_intersect(const_CAST_run(c1), +const_CAST_bitset(c2)); + +case CONTAINER_PAIR(ARRAY,RUN): +return array_run_container_intersect(const_CAST_array(c1), +const_CAST_run(c2)); + +case CONTAINER_PAIR(RUN,ARRAY): +return array_run_container_intersect(const_CAST_array(c2), +const_CAST_run(c1)); + +default: +assert(false); +roaring_unreachable; +return 0; +} +} + +/** + * Compute intersection between two containers, with result in the first + container if possible. If the returned pointer is identical to c1, + then the container has been modified. If the returned pointer is different + from c1, then a new container has been created and the caller is responsible + for freeing it. + The type of the first container may change. Returns the modified + (and possibly new) container. +*/ +static inline container_t *container_iand( +container_t *c1, uint8_t type1, +const container_t *c2, uint8_t type2, +uint8_t *result_type +){ +c1 = get_writable_copy_if_shared(c1, &type1); +c2 = container_unwrap_shared(c2, &type2); +container_t *result = NULL; +switch (PAIR_CONTAINER_TYPES(type1, type2)) { +case CONTAINER_PAIR(BITSET,BITSET): +*result_type = +bitset_bitset_container_intersection_inplace( +CAST_bitset(c1), const_CAST_bitset(c2), &result) +? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(ARRAY,ARRAY): +array_container_intersection_inplace(CAST_array(c1), +const_CAST_array(c2)); +*result_type = ARRAY_CONTAINER_TYPE; +return c1; + +case CONTAINER_PAIR(RUN,RUN): +result = run_container_create(); +run_container_intersection(const_CAST_run(c1), +const_CAST_run(c2), +CAST_run(result)); +// as of January 2016, Java code used non-in-place intersection for +// two runcontainers +return convert_run_to_efficient_container_and_free( +CAST_run(result), result_type); + +case CONTAINER_PAIR(BITSET,ARRAY): +// c1 is a bitmap so no inplace possible +result = array_container_create(); +array_bitset_container_intersection(const_CAST_array(c2), +const_CAST_bitset(c1), +CAST_array(result)); +*result_type = ARRAY_CONTAINER_TYPE; // never bitset +return result; + +case CONTAINER_PAIR(ARRAY,BITSET): +*result_type = ARRAY_CONTAINER_TYPE; // never bitset +array_bitset_container_intersection( +const_CAST_array(c1), const_CAST_bitset(c2), +CAST_array(c1)); // result is allowed to be same as c1 +return c1; + +case CONTAINER_PAIR(BITSET,RUN): +// will attempt in-place computation +*result_type = run_bitset_container_intersection( +const_CAST_run(c2), +const_CAST_bitset(c1), &c1) +? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; +return c1; + +case CONTAINER_PAIR(RUN,BITSET): +*result_type = run_bitset_container_intersection( +const_CAST_run(c1), +const_CAST_bitset(c2), &result) +? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(ARRAY,RUN): +result = array_container_create(); +*result_type = ARRAY_CONTAINER_TYPE; // never bitset +array_run_container_intersection(const_CAST_array(c1), +const_CAST_run(c2), +CAST_array(result)); +return result; + +case CONTAINER_PAIR(RUN,ARRAY): +result = array_container_create(); +*result_type = ARRAY_CONTAINER_TYPE; // never bitset +array_run_container_intersection(const_CAST_array(c2), +const_CAST_run(c1), +CAST_array(result)); +return result; + +default: +assert(false); +roaring_unreachable; +return NULL; +} +} + +/** + * Compute union between two containers, generate a new container (having type + * result_type), requires a typecode. This allocates new memory, caller + * is responsible for deallocation. + */ +static inline container_t *container_or( +const container_t *c1, uint8_t type1, +const container_t *c2, uint8_t type2, +uint8_t *result_type +){ +c1 = container_unwrap_shared(c1, &type1); +c2 = container_unwrap_shared(c2, &type2); +container_t *result = NULL; +switch (PAIR_CONTAINER_TYPES(type1, type2)) { +case CONTAINER_PAIR(BITSET,BITSET): +result = bitset_container_create(); +bitset_container_or(const_CAST_bitset(c1), +const_CAST_bitset(c2), +CAST_bitset(result)); +*result_type = BITSET_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(ARRAY,ARRAY): +*result_type = array_array_container_union( +const_CAST_array(c1), +const_CAST_array(c2), &result) +? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(RUN,RUN): +result = run_container_create(); +run_container_union(const_CAST_run(c1), +const_CAST_run(c2), +CAST_run(result)); +*result_type = RUN_CONTAINER_TYPE; +// todo: could be optimized since will never convert to array +result = convert_run_to_efficient_container_and_free( +CAST_run(result), result_type); +return result; + +case CONTAINER_PAIR(BITSET,ARRAY): +result = bitset_container_create(); +array_bitset_container_union(const_CAST_array(c2), +const_CAST_bitset(c1), +CAST_bitset(result)); +*result_type = BITSET_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(ARRAY,BITSET): +result = bitset_container_create(); +array_bitset_container_union(const_CAST_array(c1), +const_CAST_bitset(c2), +CAST_bitset(result)); +*result_type = BITSET_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(BITSET,RUN): +if (run_container_is_full(const_CAST_run(c2))) { +result = run_container_create(); +*result_type = RUN_CONTAINER_TYPE; +run_container_copy(const_CAST_run(c2), +CAST_run(result)); +return result; +} +result = bitset_container_create(); +run_bitset_container_union(const_CAST_run(c2), +const_CAST_bitset(c1), +CAST_bitset(result)); +*result_type = BITSET_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(RUN,BITSET): +if (run_container_is_full(const_CAST_run(c1))) { +result = run_container_create(); +*result_type = RUN_CONTAINER_TYPE; +run_container_copy(const_CAST_run(c1), +CAST_run(result)); +return result; +} +result = bitset_container_create(); +run_bitset_container_union(const_CAST_run(c1), +const_CAST_bitset(c2), +CAST_bitset(result)); +*result_type = BITSET_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(ARRAY,RUN): +result = run_container_create(); +array_run_container_union(const_CAST_array(c1), +const_CAST_run(c2), +CAST_run(result)); +result = convert_run_to_efficient_container_and_free( +CAST_run(result), result_type); +return result; + +case CONTAINER_PAIR(RUN,ARRAY): +result = run_container_create(); +array_run_container_union(const_CAST_array(c2), +const_CAST_run(c1), +CAST_run(result)); +result = convert_run_to_efficient_container_and_free( +CAST_run(result), result_type); +return result; + +default: +assert(false); +roaring_unreachable; +return NULL; // unreached +} +} + +/** + * Compute union between two containers, generate a new container (having type + * result_type), requires a typecode. This allocates new memory, caller + * is responsible for deallocation. + * + * This lazy version delays some operations such as the maintenance of the + * cardinality. It requires repair later on the generated containers. + */ +static inline container_t *container_lazy_or( +const container_t *c1, uint8_t type1, +const container_t *c2, uint8_t type2, +uint8_t *result_type +){ +c1 = container_unwrap_shared(c1, &type1); +c2 = container_unwrap_shared(c2, &type2); +container_t *result = NULL; +switch (PAIR_CONTAINER_TYPES(type1, type2)) { +case CONTAINER_PAIR(BITSET,BITSET): +result = bitset_container_create(); +bitset_container_or_nocard( +const_CAST_bitset(c1), const_CAST_bitset(c2), +CAST_bitset(result)); // is lazy +*result_type = BITSET_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(ARRAY,ARRAY): +*result_type = array_array_container_lazy_union( +const_CAST_array(c1), +const_CAST_array(c2), &result) +? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(RUN,RUN): +result = run_container_create(); +run_container_union(const_CAST_run(c1), +const_CAST_run(c2), +CAST_run(result)); +*result_type = RUN_CONTAINER_TYPE; +// we are being lazy +result = convert_run_to_efficient_container_and_free( +CAST_run(result), result_type); +return result; + +case CONTAINER_PAIR(BITSET,ARRAY): +result = bitset_container_create(); +array_bitset_container_lazy_union( +const_CAST_array(c2), const_CAST_bitset(c1), +CAST_bitset(result)); // is lazy +*result_type = BITSET_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(ARRAY,BITSET): +result = bitset_container_create(); +array_bitset_container_lazy_union( +const_CAST_array(c1), const_CAST_bitset(c2), +CAST_bitset(result)); // is lazy +*result_type = BITSET_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(BITSET,RUN): +if (run_container_is_full(const_CAST_run(c2))) { +result = run_container_create(); +*result_type = RUN_CONTAINER_TYPE; +run_container_copy(const_CAST_run(c2), CAST_run(result)); +return result; +} +result = bitset_container_create(); +run_bitset_container_lazy_union( +const_CAST_run(c2), const_CAST_bitset(c1), +CAST_bitset(result)); // is lazy +*result_type = BITSET_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(RUN,BITSET): +if (run_container_is_full(const_CAST_run(c1))) { +result = run_container_create(); +*result_type = RUN_CONTAINER_TYPE; +run_container_copy(const_CAST_run(c1), CAST_run(result)); +return result; +} +result = bitset_container_create(); +run_bitset_container_lazy_union( +const_CAST_run(c1), const_CAST_bitset(c2), +CAST_bitset(result)); // is lazy +*result_type = BITSET_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(ARRAY,RUN): +result = run_container_create(); +array_run_container_union(const_CAST_array(c1), +const_CAST_run(c2), +CAST_run(result)); +*result_type = RUN_CONTAINER_TYPE; +// next line skipped since we are lazy +// result = convert_run_to_efficient_container(result, result_type); +return result; + +case CONTAINER_PAIR(RUN,ARRAY): +result = run_container_create(); +array_run_container_union( +const_CAST_array(c2), const_CAST_run(c1), +CAST_run(result)); // TODO make lazy +*result_type = RUN_CONTAINER_TYPE; +// next line skipped since we are lazy +// result = convert_run_to_efficient_container(result, result_type); +return result; + +default: +assert(false); +roaring_unreachable; +return NULL; // unreached +} +} + +/** + * Compute the union between two containers, with result in the first container. + * If the returned pointer is identical to c1, then the container has been + * modified. + * If the returned pointer is different from c1, then a new container has been + * created and the caller is responsible for freeing it. + * The type of the first container may change. Returns the modified + * (and possibly new) container +*/ +static inline container_t *container_ior( +container_t *c1, uint8_t type1, +const container_t *c2, uint8_t type2, +uint8_t *result_type +){ +c1 = get_writable_copy_if_shared(c1, &type1); +c2 = container_unwrap_shared(c2, &type2); +container_t *result = NULL; +switch (PAIR_CONTAINER_TYPES(type1, type2)) { +case CONTAINER_PAIR(BITSET,BITSET): +bitset_container_or(const_CAST_bitset(c1), +const_CAST_bitset(c2), +CAST_bitset(c1)); +#ifdef OR_BITSET_CONVERSION_TO_FULL +if (CAST_bitset(c1)->cardinality == (1 << 16)) { // we convert +result = run_container_create_range(0, (1 << 16)); +*result_type = RUN_CONTAINER_TYPE; +return result; +} +#endif +*result_type = BITSET_CONTAINER_TYPE; +return c1; + +case CONTAINER_PAIR(ARRAY,ARRAY): +*result_type = array_array_container_inplace_union( +CAST_array(c1), const_CAST_array(c2), &result) +? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; +if((result == NULL) +&& (*result_type == ARRAY_CONTAINER_TYPE)) { +return c1; // the computation was done in-place! +} +return result; + +case CONTAINER_PAIR(RUN,RUN): +run_container_union_inplace(CAST_run(c1), const_CAST_run(c2)); +return convert_run_to_efficient_container(CAST_run(c1), +result_type); + +case CONTAINER_PAIR(BITSET,ARRAY): +array_bitset_container_union(const_CAST_array(c2), +const_CAST_bitset(c1), +CAST_bitset(c1)); +*result_type = BITSET_CONTAINER_TYPE; // never array +return c1; + +case CONTAINER_PAIR(ARRAY,BITSET): +// c1 is an array, so no in-place possible +result = bitset_container_create(); +*result_type = BITSET_CONTAINER_TYPE; +array_bitset_container_union(const_CAST_array(c1), +const_CAST_bitset(c2), +CAST_bitset(result)); +return result; + +case CONTAINER_PAIR(BITSET,RUN): +if (run_container_is_full(const_CAST_run(c2))) { +result = run_container_create(); +*result_type = RUN_CONTAINER_TYPE; +run_container_copy(const_CAST_run(c2), CAST_run(result)); +return result; +} +run_bitset_container_union(const_CAST_run(c2), +const_CAST_bitset(c1), +CAST_bitset(c1)); // allowed +*result_type = BITSET_CONTAINER_TYPE; +return c1; + +case CONTAINER_PAIR(RUN,BITSET): +if (run_container_is_full(const_CAST_run(c1))) { +*result_type = RUN_CONTAINER_TYPE; +return c1; +} +result = bitset_container_create(); +run_bitset_container_union(const_CAST_run(c1), +const_CAST_bitset(c2), +CAST_bitset(result)); +*result_type = BITSET_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(ARRAY,RUN): +result = run_container_create(); +array_run_container_union(const_CAST_array(c1), +const_CAST_run(c2), +CAST_run(result)); +result = convert_run_to_efficient_container_and_free( +CAST_run(result), result_type); +return result; + +case CONTAINER_PAIR(RUN,ARRAY): +array_run_container_inplace_union(const_CAST_array(c2), +CAST_run(c1)); +c1 = convert_run_to_efficient_container(CAST_run(c1), +result_type); +return c1; + +default: +assert(false); +roaring_unreachable; +return NULL; +} +} + +/** + * Compute the union between two containers, with result in the first container. + * If the returned pointer is identical to c1, then the container has been + * modified. + * If the returned pointer is different from c1, then a new container has been + * created and the caller is responsible for freeing it. + * The type of the first container may change. Returns the modified + * (and possibly new) container + * + * This lazy version delays some operations such as the maintenance of the + * cardinality. It requires repair later on the generated containers. +*/ +static inline container_t *container_lazy_ior( +container_t *c1, uint8_t type1, +const container_t *c2, uint8_t type2, +uint8_t *result_type +){ +assert(type1 != SHARED_CONTAINER_TYPE); +// c1 = get_writable_copy_if_shared(c1,&type1); +c2 = container_unwrap_shared(c2, &type2); +container_t *result = NULL; +switch (PAIR_CONTAINER_TYPES(type1, type2)) { +case CONTAINER_PAIR(BITSET,BITSET): +#ifdef LAZY_OR_BITSET_CONVERSION_TO_FULL +// if we have two bitsets, we might as well compute the cardinality +bitset_container_or(const_CAST_bitset(c1), +const_CAST_bitset(c2), +CAST_bitset(c1)); +// it is possible that two bitsets can lead to a full container +if (CAST_bitset(c1)->cardinality == (1 << 16)) { // we convert +result = run_container_create_range(0, (1 << 16)); +*result_type = RUN_CONTAINER_TYPE; +return result; +} +#else +bitset_container_or_nocard(const_CAST_bitset(c1), +const_CAST_bitset(c2), +CAST_bitset(c1)); + +#endif +*result_type = BITSET_CONTAINER_TYPE; +return c1; + +case CONTAINER_PAIR(ARRAY,ARRAY): +*result_type = array_array_container_lazy_inplace_union( +CAST_array(c1), +const_CAST_array(c2), &result) +? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; +if((result == NULL) +&& (*result_type == ARRAY_CONTAINER_TYPE)) { +return c1; // the computation was done in-place! +} +return result; + +case CONTAINER_PAIR(RUN,RUN): +run_container_union_inplace(CAST_run(c1), +const_CAST_run(c2)); +*result_type = RUN_CONTAINER_TYPE; +return convert_run_to_efficient_container(CAST_run(c1), +result_type); + +case CONTAINER_PAIR(BITSET,ARRAY): +array_bitset_container_lazy_union( +const_CAST_array(c2), const_CAST_bitset(c1), +CAST_bitset(c1)); // is lazy +*result_type = BITSET_CONTAINER_TYPE; // never array +return c1; + +case CONTAINER_PAIR(ARRAY,BITSET): +// c1 is an array, so no in-place possible +result = bitset_container_create(); +*result_type = BITSET_CONTAINER_TYPE; +array_bitset_container_lazy_union( +const_CAST_array(c1), const_CAST_bitset(c2), +CAST_bitset(result)); // is lazy +return result; + +case CONTAINER_PAIR(BITSET,RUN): +if (run_container_is_full(const_CAST_run(c2))) { +result = run_container_create(); +*result_type = RUN_CONTAINER_TYPE; +run_container_copy(const_CAST_run(c2), +CAST_run(result)); +return result; +} +run_bitset_container_lazy_union( +const_CAST_run(c2), const_CAST_bitset(c1), +CAST_bitset(c1)); // allowed // lazy +*result_type = BITSET_CONTAINER_TYPE; +return c1; + +case CONTAINER_PAIR(RUN,BITSET): +if (run_container_is_full(const_CAST_run(c1))) { +*result_type = RUN_CONTAINER_TYPE; +return c1; +} +result = bitset_container_create(); +run_bitset_container_lazy_union( +const_CAST_run(c1), const_CAST_bitset(c2), +CAST_bitset(result)); // lazy +*result_type = BITSET_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(ARRAY,RUN): +result = run_container_create(); +array_run_container_union(const_CAST_array(c1), +const_CAST_run(c2), +CAST_run(result)); +*result_type = RUN_CONTAINER_TYPE; +// next line skipped since we are lazy +// result = convert_run_to_efficient_container_and_free(result, +// result_type); +return result; + +case CONTAINER_PAIR(RUN,ARRAY): +array_run_container_inplace_union(const_CAST_array(c2), +CAST_run(c1)); +*result_type = RUN_CONTAINER_TYPE; +// next line skipped since we are lazy +// result = convert_run_to_efficient_container_and_free(result, +// result_type); +return c1; + +default: +assert(false); +roaring_unreachable; +return NULL; +} +} + +/** + * Compute symmetric difference (xor) between two containers, generate a new + * container (having type result_type), requires a typecode. This allocates new + * memory, caller is responsible for deallocation. + */ +static inline container_t* container_xor( +const container_t *c1, uint8_t type1, +const container_t *c2, uint8_t type2, +uint8_t *result_type +){ +c1 = container_unwrap_shared(c1, &type1); +c2 = container_unwrap_shared(c2, &type2); +container_t *result = NULL; +switch (PAIR_CONTAINER_TYPES(type1, type2)) { +case CONTAINER_PAIR(BITSET,BITSET): +*result_type = bitset_bitset_container_xor( +const_CAST_bitset(c1), +const_CAST_bitset(c2), &result) +? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(ARRAY,ARRAY): +*result_type = array_array_container_xor( +const_CAST_array(c1), +const_CAST_array(c2), &result) +? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(RUN,RUN): +*result_type = +run_run_container_xor(const_CAST_run(c1), +const_CAST_run(c2), &result); +return result; + +case CONTAINER_PAIR(BITSET,ARRAY): +*result_type = array_bitset_container_xor( +const_CAST_array(c2), +const_CAST_bitset(c1), &result) +? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(ARRAY,BITSET): +*result_type = array_bitset_container_xor( +const_CAST_array(c1), +const_CAST_bitset(c2), &result) +? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(BITSET,RUN): +*result_type = run_bitset_container_xor( +const_CAST_run(c2), +const_CAST_bitset(c1), &result) +? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(RUN,BITSET): +*result_type = run_bitset_container_xor( +const_CAST_run(c1), +const_CAST_bitset(c2), &result) +? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(ARRAY,RUN): +*result_type = +array_run_container_xor(const_CAST_array(c1), +const_CAST_run(c2), &result); +return result; + +case CONTAINER_PAIR(RUN,ARRAY): +*result_type = +array_run_container_xor(const_CAST_array(c2), +const_CAST_run(c1), &result); +return result; + +default: +assert(false); +roaring_unreachable; +return NULL; // unreached +} +} + +/* Applies an offset to the non-empty container 'c'. + * The results are stored in new containers returned via 'lo' and 'hi', for the + * low and high halves of the result (where the low half matches the original key + * and the high one corresponds to values for the following key). + * Either one of 'lo' and 'hi' are allowed to be 'NULL', but not both. + * Whenever one of them is not 'NULL', it should point to a 'NULL' container. + * Whenever one of them is 'NULL' the shifted elements for that part will not be + * computed. + * If either of the resulting containers turns out to be empty, the pointed + * container will remain 'NULL'. + */ +static inline void container_add_offset(const container_t *c, uint8_t type, +container_t **lo, container_t **hi, +uint16_t offset) { +assert(offset != 0); +assert(container_nonzero_cardinality(c, type)); +assert(lo != NULL || hi != NULL); +assert(lo == NULL || *lo == NULL); +assert(hi == NULL || *hi == NULL); + +switch (type) { +case BITSET_CONTAINER_TYPE: +bitset_container_offset(const_CAST_bitset(c), lo, hi, offset); +break; +case ARRAY_CONTAINER_TYPE: +array_container_offset(const_CAST_array(c), lo, hi, offset); +break; +case RUN_CONTAINER_TYPE: +run_container_offset(const_CAST_run(c), lo, hi, offset); +break; +default: +assert(false); +roaring_unreachable; +break; +} +} + +/** + * Compute xor between two containers, generate a new container (having type + * result_type), requires a typecode. This allocates new memory, caller + * is responsible for deallocation. + * + * This lazy version delays some operations such as the maintenance of the + * cardinality. It requires repair later on the generated containers. + */ +static inline container_t *container_lazy_xor( +const container_t *c1, uint8_t type1, +const container_t *c2, uint8_t type2, +uint8_t *result_type +){ +c1 = container_unwrap_shared(c1, &type1); +c2 = container_unwrap_shared(c2, &type2); +container_t *result = NULL; +switch (PAIR_CONTAINER_TYPES(type1, type2)) { +case CONTAINER_PAIR(BITSET,BITSET): +result = bitset_container_create(); +bitset_container_xor_nocard( +const_CAST_bitset(c1), const_CAST_bitset(c2), +CAST_bitset(result)); // is lazy +*result_type = BITSET_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(ARRAY,ARRAY): +*result_type = array_array_container_lazy_xor( +const_CAST_array(c1), +const_CAST_array(c2), &result) +? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(RUN,RUN): +// nothing special done yet. +*result_type = +run_run_container_xor(const_CAST_run(c1), +const_CAST_run(c2), &result); +return result; + +case CONTAINER_PAIR(BITSET,ARRAY): +result = bitset_container_create(); +*result_type = BITSET_CONTAINER_TYPE; +array_bitset_container_lazy_xor(const_CAST_array(c2), +const_CAST_bitset(c1), +CAST_bitset(result)); +return result; + +case CONTAINER_PAIR(ARRAY,BITSET): +result = bitset_container_create(); +*result_type = BITSET_CONTAINER_TYPE; +array_bitset_container_lazy_xor(const_CAST_array(c1), +const_CAST_bitset(c2), +CAST_bitset(result)); +return result; + +case CONTAINER_PAIR(BITSET,RUN): +result = bitset_container_create(); +run_bitset_container_lazy_xor(const_CAST_run(c2), +const_CAST_bitset(c1), +CAST_bitset(result)); +*result_type = BITSET_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(RUN,BITSET): +result = bitset_container_create(); +run_bitset_container_lazy_xor(const_CAST_run(c1), +const_CAST_bitset(c2), +CAST_bitset(result)); +*result_type = BITSET_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(ARRAY,RUN): +result = run_container_create(); +array_run_container_lazy_xor(const_CAST_array(c1), +const_CAST_run(c2), +CAST_run(result)); +*result_type = RUN_CONTAINER_TYPE; +// next line skipped since we are lazy +// result = convert_run_to_efficient_container(result, result_type); +return result; + +case CONTAINER_PAIR(RUN,ARRAY): +result = run_container_create(); +array_run_container_lazy_xor(const_CAST_array(c2), +const_CAST_run(c1), +CAST_run(result)); +*result_type = RUN_CONTAINER_TYPE; +// next line skipped since we are lazy +// result = convert_run_to_efficient_container(result, result_type); +return result; + +default: +assert(false); +roaring_unreachable; +return NULL; // unreached +} +} + +/** + * Compute the xor between two containers, with result in the first container. + * If the returned pointer is identical to c1, then the container has been + * modified. + * If the returned pointer is different from c1, then a new container has been + * created and the caller is responsible for freeing it. + * The type of the first container may change. Returns the modified + * (and possibly new) container +*/ +static inline container_t *container_ixor( +container_t *c1, uint8_t type1, +const container_t *c2, uint8_t type2, +uint8_t *result_type +){ +c1 = get_writable_copy_if_shared(c1, &type1); +c2 = container_unwrap_shared(c2, &type2); +container_t *result = NULL; +switch (PAIR_CONTAINER_TYPES(type1, type2)) { +case CONTAINER_PAIR(BITSET,BITSET): +*result_type = bitset_bitset_container_ixor( +CAST_bitset(c1), const_CAST_bitset(c2), &result) +? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(ARRAY,ARRAY): +*result_type = array_array_container_ixor( +CAST_array(c1), const_CAST_array(c2), &result) +? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(RUN,RUN): +*result_type = run_run_container_ixor( +CAST_run(c1), const_CAST_run(c2), &result); +return result; + +case CONTAINER_PAIR(BITSET,ARRAY): +*result_type = bitset_array_container_ixor( +CAST_bitset(c1), const_CAST_array(c2), &result) +? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(ARRAY,BITSET): +*result_type = array_bitset_container_ixor( +CAST_array(c1), const_CAST_bitset(c2), &result) +? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(BITSET,RUN): +*result_type = +bitset_run_container_ixor( +CAST_bitset(c1), const_CAST_run(c2), &result) +? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; + +return result; + +case CONTAINER_PAIR(RUN,BITSET): +*result_type = run_bitset_container_ixor( +CAST_run(c1), const_CAST_bitset(c2), &result) +? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(ARRAY,RUN): +*result_type = array_run_container_ixor( +CAST_array(c1), const_CAST_run(c2), &result); +return result; + +case CONTAINER_PAIR(RUN,ARRAY): +*result_type = run_array_container_ixor( +CAST_run(c1), const_CAST_array(c2), &result); +return result; + +default: +assert(false); +roaring_unreachable; +return NULL; +} +} + +/** + * Compute the xor between two containers, with result in the first container. + * If the returned pointer is identical to c1, then the container has been + * modified. + * If the returned pointer is different from c1, then a new container has been + * created and the caller is responsible for freeing it. + * The type of the first container may change. Returns the modified + * (and possibly new) container + * + * This lazy version delays some operations such as the maintenance of the + * cardinality. It requires repair later on the generated containers. +*/ +static inline container_t *container_lazy_ixor( +container_t *c1, uint8_t type1, +const container_t *c2, uint8_t type2, +uint8_t *result_type +){ +assert(type1 != SHARED_CONTAINER_TYPE); +// c1 = get_writable_copy_if_shared(c1,&type1); +c2 = container_unwrap_shared(c2, &type2); +switch (PAIR_CONTAINER_TYPES(type1, type2)) { +case CONTAINER_PAIR(BITSET,BITSET): +bitset_container_xor_nocard(CAST_bitset(c1), +const_CAST_bitset(c2), +CAST_bitset(c1)); // is lazy +*result_type = BITSET_CONTAINER_TYPE; +return c1; + +// TODO: other cases being lazy, esp. when we know inplace not likely +// could see the corresponding code for union +default: +// we may have a dirty bitset (without a precomputed cardinality) +// and calling container_ixor on it might be unsafe. +if (type1 == BITSET_CONTAINER_TYPE) { +bitset_container_t *bc = CAST_bitset(c1); +if (bc->cardinality == BITSET_UNKNOWN_CARDINALITY) { +bc->cardinality = bitset_container_compute_cardinality(bc); +} +} +return container_ixor(c1, type1, c2, type2, result_type); +} +} + +/** + * Compute difference (andnot) between two containers, generate a new + * container (having type result_type), requires a typecode. This allocates new + * memory, caller is responsible for deallocation. + */ +static inline container_t *container_andnot( +const container_t *c1, uint8_t type1, +const container_t *c2, uint8_t type2, +uint8_t *result_type +){ +c1 = container_unwrap_shared(c1, &type1); +c2 = container_unwrap_shared(c2, &type2); +container_t *result = NULL; +switch (PAIR_CONTAINER_TYPES(type1, type2)) { +case CONTAINER_PAIR(BITSET,BITSET): +*result_type = bitset_bitset_container_andnot( +const_CAST_bitset(c1), +const_CAST_bitset(c2), &result) +? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(ARRAY,ARRAY): +result = array_container_create(); +array_array_container_andnot(const_CAST_array(c1), +const_CAST_array(c2), +CAST_array(result)); +*result_type = ARRAY_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(RUN,RUN): +if (run_container_is_full(const_CAST_run(c2))) { +result = array_container_create(); +*result_type = ARRAY_CONTAINER_TYPE; +return result; +} +*result_type = +run_run_container_andnot(const_CAST_run(c1), +const_CAST_run(c2), &result); +return result; + +case CONTAINER_PAIR(BITSET,ARRAY): +*result_type = bitset_array_container_andnot( +const_CAST_bitset(c1), +const_CAST_array(c2), &result) +? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(ARRAY,BITSET): +result = array_container_create(); +array_bitset_container_andnot(const_CAST_array(c1), +const_CAST_bitset(c2), +CAST_array(result)); +*result_type = ARRAY_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(BITSET,RUN): +if (run_container_is_full(const_CAST_run(c2))) { +result = array_container_create(); +*result_type = ARRAY_CONTAINER_TYPE; +return result; +} +*result_type = bitset_run_container_andnot( +const_CAST_bitset(c1), +const_CAST_run(c2), &result) +? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(RUN,BITSET): +*result_type = run_bitset_container_andnot( +const_CAST_run(c1), +const_CAST_bitset(c2), &result) +? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(ARRAY,RUN): +if (run_container_is_full(const_CAST_run(c2))) { +result = array_container_create(); +*result_type = ARRAY_CONTAINER_TYPE; +return result; +} +result = array_container_create(); +array_run_container_andnot(const_CAST_array(c1), +const_CAST_run(c2), +CAST_array(result)); +*result_type = ARRAY_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(RUN,ARRAY): +*result_type = run_array_container_andnot( +const_CAST_run(c1), const_CAST_array(c2), +&result); +return result; + +default: +assert(false); +roaring_unreachable; +return NULL; // unreached +} +} + +/** + * Compute the andnot between two containers, with result in the first + * container. + * If the returned pointer is identical to c1, then the container has been + * modified. + * If the returned pointer is different from c1, then a new container has been + * created and the caller is responsible for freeing it. + * The type of the first container may change. Returns the modified + * (and possibly new) container +*/ +static inline container_t *container_iandnot( +container_t *c1, uint8_t type1, +const container_t *c2, uint8_t type2, +uint8_t *result_type +){ +c1 = get_writable_copy_if_shared(c1, &type1); +c2 = container_unwrap_shared(c2, &type2); +container_t *result = NULL; +switch (PAIR_CONTAINER_TYPES(type1, type2)) { +case CONTAINER_PAIR(BITSET,BITSET): +*result_type = bitset_bitset_container_iandnot( +CAST_bitset(c1), +const_CAST_bitset(c2), &result) +? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(ARRAY,ARRAY): +array_array_container_iandnot(CAST_array(c1), +const_CAST_array(c2)); +*result_type = ARRAY_CONTAINER_TYPE; +return c1; + +case CONTAINER_PAIR(RUN,RUN): +*result_type = run_run_container_iandnot( +CAST_run(c1), const_CAST_run(c2), &result); +return result; + +case CONTAINER_PAIR(BITSET,ARRAY): +*result_type = bitset_array_container_iandnot( +CAST_bitset(c1), +const_CAST_array(c2), &result) +? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(ARRAY,BITSET): +*result_type = ARRAY_CONTAINER_TYPE; +array_bitset_container_iandnot(CAST_array(c1), +const_CAST_bitset(c2)); +return c1; + +case CONTAINER_PAIR(BITSET,RUN): +*result_type = bitset_run_container_iandnot( +CAST_bitset(c1), +const_CAST_run(c2), &result) +? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(RUN,BITSET): +*result_type = run_bitset_container_iandnot( +CAST_run(c1), +const_CAST_bitset(c2), &result) +? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; +return result; + +case CONTAINER_PAIR(ARRAY,RUN): +*result_type = ARRAY_CONTAINER_TYPE; +array_run_container_iandnot(CAST_array(c1), +const_CAST_run(c2)); +return c1; + +case CONTAINER_PAIR(RUN,ARRAY): +*result_type = run_array_container_iandnot( +CAST_run(c1), const_CAST_array(c2), &result); +return result; + +default: +assert(false); +roaring_unreachable; +return NULL; +} +} + +/** + * Visit all values x of the container once, passing (base+x,ptr) + * to iterator. You need to specify a container and its type. + * Returns true if the iteration should continue. + */ +static inline bool container_iterate( +const container_t *c, uint8_t type, +uint32_t base, +roaring_iterator iterator, void *ptr +){ +c = container_unwrap_shared(c, &type); +switch (type) { +case BITSET_CONTAINER_TYPE: +return bitset_container_iterate(const_CAST_bitset(c), +base, iterator, ptr); +case ARRAY_CONTAINER_TYPE: +return array_container_iterate(const_CAST_array(c), +base, iterator, ptr); +case RUN_CONTAINER_TYPE: +return run_container_iterate(const_CAST_run(c), +base, iterator, ptr); +default: +assert(false); +roaring_unreachable; +} +assert(false); +roaring_unreachable; +return false; +} + +static inline bool container_iterate64( +const container_t *c, uint8_t type, +uint32_t base, +roaring_iterator64 iterator, +uint64_t high_bits, void *ptr +){ +c = container_unwrap_shared(c, &type); +switch (type) { +case BITSET_CONTAINER_TYPE: +return bitset_container_iterate64(const_CAST_bitset(c), base, +iterator, high_bits, ptr); +case ARRAY_CONTAINER_TYPE: +return array_container_iterate64(const_CAST_array(c), base, +iterator, high_bits, ptr); +case RUN_CONTAINER_TYPE: +return run_container_iterate64(const_CAST_run(c), base, +iterator, high_bits, ptr); +default: +assert(false); +roaring_unreachable; +} +assert(false); +roaring_unreachable; +return false; +} + +static inline container_t *container_not( +const container_t *c, uint8_t type, +uint8_t *result_type +){ +c = container_unwrap_shared(c, &type); +container_t *result = NULL; +switch (type) { +case BITSET_CONTAINER_TYPE: +*result_type = bitset_container_negation( +const_CAST_bitset(c), &result) +? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; +return result; +case ARRAY_CONTAINER_TYPE: +result = bitset_container_create(); +*result_type = BITSET_CONTAINER_TYPE; +array_container_negation(const_CAST_array(c), +CAST_bitset(result)); +return result; +case RUN_CONTAINER_TYPE: +*result_type = +run_container_negation(const_CAST_run(c), &result); +return result; + +default: +assert(false); +roaring_unreachable; +} +assert(false); +roaring_unreachable; +return NULL; +} + +static inline container_t *container_not_range( +const container_t *c, uint8_t type, +uint32_t range_start, uint32_t range_end, +uint8_t *result_type +){ +c = container_unwrap_shared(c, &type); +container_t *result = NULL; +switch (type) { +case BITSET_CONTAINER_TYPE: +*result_type = +bitset_container_negation_range( +const_CAST_bitset(c), range_start, range_end, &result) +? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; +return result; +case ARRAY_CONTAINER_TYPE: +*result_type = +array_container_negation_range( +const_CAST_array(c), range_start, range_end, &result) +? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; +return result; +case RUN_CONTAINER_TYPE: +*result_type = run_container_negation_range( +const_CAST_run(c), range_start, range_end, &result); +return result; + +default: +assert(false); +roaring_unreachable; +} +assert(false); +roaring_unreachable; +return NULL; +} + +static inline container_t *container_inot( +container_t *c, uint8_t type, +uint8_t *result_type +){ +c = get_writable_copy_if_shared(c, &type); +container_t *result = NULL; +switch (type) { +case BITSET_CONTAINER_TYPE: +*result_type = bitset_container_negation_inplace( +CAST_bitset(c), &result) +? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; +return result; +case ARRAY_CONTAINER_TYPE: +// will never be inplace +result = bitset_container_create(); +*result_type = BITSET_CONTAINER_TYPE; +array_container_negation(CAST_array(c), +CAST_bitset(result)); +array_container_free(CAST_array(c)); +return result; +case RUN_CONTAINER_TYPE: +*result_type = +run_container_negation_inplace(CAST_run(c), &result); +return result; + +default: +assert(false); +roaring_unreachable; +} +assert(false); +roaring_unreachable; +return NULL; +} + +static inline container_t *container_inot_range( +container_t *c, uint8_t type, +uint32_t range_start, uint32_t range_end, +uint8_t *result_type +){ +c = get_writable_copy_if_shared(c, &type); +container_t *result = NULL; +switch (type) { +case BITSET_CONTAINER_TYPE: +*result_type = +bitset_container_negation_range_inplace( +CAST_bitset(c), range_start, range_end, &result) +? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; +return result; +case ARRAY_CONTAINER_TYPE: +*result_type = +array_container_negation_range_inplace( +CAST_array(c), range_start, range_end, &result) +? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; +return result; +case RUN_CONTAINER_TYPE: +*result_type = run_container_negation_range_inplace( +CAST_run(c), range_start, range_end, &result); +return result; + +default: +assert(false); +roaring_unreachable; +} +assert(false); +roaring_unreachable; +return NULL; +} + +/** + * If the element of given rank is in this container, supposing that + * the first + * element has rank start_rank, then the function returns true and + * sets element + * accordingly. + * Otherwise, it returns false and update start_rank. + */ +static inline bool container_select( +const container_t *c, uint8_t type, +uint32_t *start_rank, uint32_t rank, +uint32_t *element +){ +c = container_unwrap_shared(c, &type); +switch (type) { +case BITSET_CONTAINER_TYPE: +return bitset_container_select(const_CAST_bitset(c), +start_rank, rank, element); +case ARRAY_CONTAINER_TYPE: +return array_container_select(const_CAST_array(c), +start_rank, rank, element); +case RUN_CONTAINER_TYPE: +return run_container_select(const_CAST_run(c), +start_rank, rank, element); +default: +assert(false); +roaring_unreachable; +} +assert(false); +roaring_unreachable; +return false; +} + +static inline uint16_t container_maximum( +const container_t *c, uint8_t type +){ +c = container_unwrap_shared(c, &type); +switch (type) { +case BITSET_CONTAINER_TYPE: +return bitset_container_maximum(const_CAST_bitset(c)); +case ARRAY_CONTAINER_TYPE: +return array_container_maximum(const_CAST_array(c)); +case RUN_CONTAINER_TYPE: +return run_container_maximum(const_CAST_run(c)); +default: +assert(false); +roaring_unreachable; +} +assert(false); +roaring_unreachable; +return false; +} + +static inline uint16_t container_minimum( +const container_t *c, uint8_t type +){ +c = container_unwrap_shared(c, &type); +switch (type) { +case BITSET_CONTAINER_TYPE: +return bitset_container_minimum(const_CAST_bitset(c)); +case ARRAY_CONTAINER_TYPE: +return array_container_minimum(const_CAST_array(c)); +case RUN_CONTAINER_TYPE: +return run_container_minimum(const_CAST_run(c)); +default: +assert(false); +roaring_unreachable; +} +assert(false); +roaring_unreachable; +return false; +} + +// number of values smaller or equal to x +static inline int container_rank( +const container_t *c, uint8_t type, +uint16_t x +){ +c = container_unwrap_shared(c, &type); +switch (type) { +case BITSET_CONTAINER_TYPE: +return bitset_container_rank(const_CAST_bitset(c), x); +case ARRAY_CONTAINER_TYPE: +return array_container_rank(const_CAST_array(c), x); +case RUN_CONTAINER_TYPE: +return run_container_rank(const_CAST_run(c), x); +default: +assert(false); +roaring_unreachable; +} +assert(false); +roaring_unreachable; +return false; +} + +// return the index of x, if not exsist return -1 +static inline int container_get_index(const container_t *c, uint8_t type, +uint16_t x) { +c = container_unwrap_shared(c, &type); +switch (type) { +case BITSET_CONTAINER_TYPE: +return bitset_container_get_index(const_CAST_bitset(c), x); +case ARRAY_CONTAINER_TYPE: +return array_container_get_index(const_CAST_array(c), x); +case RUN_CONTAINER_TYPE: +return run_container_get_index(const_CAST_run(c), x); +default: +assert(false); +roaring_unreachable; +} +assert(false); +roaring_unreachable; +return false; +} + +/** + * Add all values in range [min, max] to a given container. + * + * If the returned pointer is different from $container, then a new container + * has been created and the caller is responsible for freeing it. + * The type of the first container may change. Returns the modified + * (and possibly new) container. + */ +static inline container_t *container_add_range( +container_t *c, uint8_t type, +uint32_t min, uint32_t max, +uint8_t *result_type +){ +// NB: when selecting new container type, we perform only inexpensive checks +switch (type) { +case BITSET_CONTAINER_TYPE: { +bitset_container_t *bitset = CAST_bitset(c); + +int32_t union_cardinality = 0; +union_cardinality += bitset->cardinality; +union_cardinality += max - min + 1; +union_cardinality -= bitset_lenrange_cardinality(bitset->words, +min, max-min); + +if (union_cardinality == INT32_C(0x10000)) { +*result_type = RUN_CONTAINER_TYPE; +return run_container_create_range(0, INT32_C(0x10000)); +} else { +*result_type = BITSET_CONTAINER_TYPE; +bitset_set_lenrange(bitset->words, min, max - min); +bitset->cardinality = union_cardinality; +return bitset; +} +} +case ARRAY_CONTAINER_TYPE: { +array_container_t *array = CAST_array(c); + +int32_t nvals_greater = count_greater(array->array, array->cardinality, max); +int32_t nvals_less = count_less(array->array, array->cardinality - nvals_greater, min); +int32_t union_cardinality = nvals_less + (max - min + 1) + nvals_greater; + +if (union_cardinality == INT32_C(0x10000)) { +*result_type = RUN_CONTAINER_TYPE; +return run_container_create_range(0, INT32_C(0x10000)); +} else if (union_cardinality <= DEFAULT_MAX_SIZE) { +*result_type = ARRAY_CONTAINER_TYPE; +array_container_add_range_nvals(array, min, max, nvals_less, nvals_greater); +return array; +} else { +*result_type = BITSET_CONTAINER_TYPE; +bitset_container_t *bitset = bitset_container_from_array(array); +bitset_set_lenrange(bitset->words, min, max - min); +bitset->cardinality = union_cardinality; +return bitset; +} +} +case RUN_CONTAINER_TYPE: { +run_container_t *run = CAST_run(c); + +int32_t nruns_greater = rle16_count_greater(run->runs, run->n_runs, max); +int32_t nruns_less = rle16_count_less(run->runs, run->n_runs - nruns_greater, min); + +int32_t run_size_bytes = (nruns_less + 1 + nruns_greater) * sizeof(rle16_t); +int32_t bitset_size_bytes = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); + +if (run_size_bytes <= bitset_size_bytes) { +run_container_add_range_nruns(run, min, max, nruns_less, nruns_greater); +*result_type = RUN_CONTAINER_TYPE; +return run; +} else { +return container_from_run_range(run, min, max, result_type); +} +} +default: +roaring_unreachable; +} +} + +/* + * Removes all elements in range [min, max]. + * Returns one of: + * - NULL if no elements left + * - pointer to the original container + * - pointer to a newly-allocated container (if it is more efficient) + * + * If the returned pointer is different from $container, then a new container + * has been created and the caller is responsible for freeing the original container. + */ +static inline container_t *container_remove_range( +container_t *c, uint8_t type, +uint32_t min, uint32_t max, +uint8_t *result_type +){ +switch (type) { +case BITSET_CONTAINER_TYPE: { +bitset_container_t *bitset = CAST_bitset(c); + +int32_t result_cardinality = bitset->cardinality - +bitset_lenrange_cardinality(bitset->words, min, max-min); + +if (result_cardinality == 0) { +return NULL; +} else if (result_cardinality <= DEFAULT_MAX_SIZE) { +*result_type = ARRAY_CONTAINER_TYPE; +bitset_reset_range(bitset->words, min, max+1); +bitset->cardinality = result_cardinality; +return array_container_from_bitset(bitset); +} else { +*result_type = BITSET_CONTAINER_TYPE; +bitset_reset_range(bitset->words, min, max+1); +bitset->cardinality = result_cardinality; +return bitset; +} +} +case ARRAY_CONTAINER_TYPE: { +array_container_t *array = CAST_array(c); + +int32_t nvals_greater = count_greater(array->array, array->cardinality, max); +int32_t nvals_less = count_less(array->array, array->cardinality - nvals_greater, min); +int32_t result_cardinality = nvals_less + nvals_greater; + +if (result_cardinality == 0) { +return NULL; +} else { +*result_type = ARRAY_CONTAINER_TYPE; +array_container_remove_range(array, nvals_less, +array->cardinality - result_cardinality); +return array; +} +} +case RUN_CONTAINER_TYPE: { +run_container_t *run = CAST_run(c); + +if (run->n_runs == 0) { +return NULL; +} +if (min <= run_container_minimum(run) && max >= run_container_maximum(run)) { +return NULL; +} + +run_container_remove_range(run, min, max); +return convert_run_to_efficient_container(run, result_type); +} +default: +roaring_unreachable; +} +} + +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace internal { +#endif + +#endif +/* end file include/roaring/containers/containers.h */ +/* begin file include/roaring/roaring_array.h */ +#ifndef INCLUDE_ROARING_ARRAY_H +#define INCLUDE_ROARING_ARRAY_H + +#include +#include +#include + + +#ifdef __cplusplus +extern "C" { namespace roaring { + +// Note: in pure C++ code, you should avoid putting `using` in header files +using api::roaring_array_t; + +namespace internal { +#endif + +enum { +SERIAL_COOKIE_NO_RUNCONTAINER = 12346, +SERIAL_COOKIE = 12347, +FROZEN_COOKIE = 13766, +NO_OFFSET_THRESHOLD = 4 +}; + +/** + * Create a new roaring array + */ +roaring_array_t *ra_create(void); + +/** + * Initialize an existing roaring array with the specified capacity (in number + * of containers) + */ +bool ra_init_with_capacity(roaring_array_t *new_ra, uint32_t cap); + +/** + * Initialize with zero capacity + */ +void ra_init(roaring_array_t *t); + +/** + * Copies this roaring array, we assume that dest is not initialized + */ +bool ra_copy(const roaring_array_t *source, roaring_array_t *dest, +bool copy_on_write); + +/* + * Shrinks the capacity, returns the number of bytes saved. + */ +int ra_shrink_to_fit(roaring_array_t *ra); + +/** + * Copies this roaring array, we assume that dest is initialized + */ +bool ra_overwrite(const roaring_array_t *source, roaring_array_t *dest, +bool copy_on_write); + +/** + * Frees the memory used by a roaring array + */ +void ra_clear(roaring_array_t *r); + +/** + * Frees the memory used by a roaring array, but does not free the containers + */ +void ra_clear_without_containers(roaring_array_t *r); + +/** + * Frees just the containers + */ +void ra_clear_containers(roaring_array_t *ra); + +/** + * Get the index corresponding to a 16-bit key + */ +inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x) { +if ((ra->size == 0) || ra->keys[ra->size - 1] == x) return ra->size - 1; +return binarySearch(ra->keys, (int32_t)ra->size, x); +} + +/** + * Retrieves the container at index i, filling in the typecode + */ +inline container_t *ra_get_container_at_index( +const roaring_array_t *ra, uint16_t i, uint8_t *typecode +){ +*typecode = ra->typecodes[i]; +return ra->containers[i]; +} + +/** + * Retrieves the key at index i + */ +inline uint16_t ra_get_key_at_index(const roaring_array_t *ra, uint16_t i) { +return ra->keys[i]; +} + +/** + * Add a new key-value pair at index i + */ +void ra_insert_new_key_value_at( +roaring_array_t *ra, int32_t i, uint16_t key, +container_t *c, uint8_t typecode); + +/** + * Append a new key-value pair + */ +void ra_append( +roaring_array_t *ra, uint16_t key, +container_t *c, uint8_t typecode); + +/** + * Append a new key-value pair to ra, cloning (in COW sense) a value from sa + * at index index + */ +void ra_append_copy(roaring_array_t *ra, const roaring_array_t *sa, +uint16_t index, bool copy_on_write); + +/** + * Append new key-value pairs to ra, cloning (in COW sense) values from sa + * at indexes + * [start_index, end_index) + */ +void ra_append_copy_range(roaring_array_t *ra, const roaring_array_t *sa, +int32_t start_index, int32_t end_index, +bool copy_on_write); + +/** appends from sa to ra, ending with the greatest key that is + * is less or equal stopping_key + */ +void ra_append_copies_until(roaring_array_t *ra, const roaring_array_t *sa, +uint16_t stopping_key, bool copy_on_write); + +/** appends from sa to ra, starting with the smallest key that is + * is strictly greater than before_start + */ + +void ra_append_copies_after(roaring_array_t *ra, const roaring_array_t *sa, +uint16_t before_start, bool copy_on_write); + +/** + * Move the key-value pairs to ra from sa at indexes + * [start_index, end_index), old array should not be freed + * (use ra_clear_without_containers) + **/ +void ra_append_move_range(roaring_array_t *ra, roaring_array_t *sa, +int32_t start_index, int32_t end_index); +/** + * Append new key-value pairs to ra, from sa at indexes + * [start_index, end_index) + */ +void ra_append_range(roaring_array_t *ra, roaring_array_t *sa, +int32_t start_index, int32_t end_index, +bool copy_on_write); + +/** + * Set the container at the corresponding index using the specified + * typecode. + */ +inline void ra_set_container_at_index( +const roaring_array_t *ra, int32_t i, +container_t *c, uint8_t typecode +){ +assert(i < ra->size); +ra->containers[i] = c; +ra->typecodes[i] = typecode; +} + +container_t *ra_get_container(roaring_array_t *ra, uint16_t x, uint8_t *typecode); + +/** + * If needed, increase the capacity of the array so that it can fit k values + * (at + * least); + */ +bool extend_array(roaring_array_t *ra, int32_t k); + +inline int32_t ra_get_size(const roaring_array_t *ra) { return ra->size; } + +static inline int32_t ra_advance_until(const roaring_array_t *ra, uint16_t x, +int32_t pos) { +return advanceUntil(ra->keys, pos, ra->size, x); +} + +int32_t ra_advance_until_freeing(roaring_array_t *ra, uint16_t x, int32_t pos); + +void ra_downsize(roaring_array_t *ra, int32_t new_length); + +inline void ra_replace_key_and_container_at_index( +roaring_array_t *ra, int32_t i, uint16_t key, +container_t *c, uint8_t typecode +){ +assert(i < ra->size); + +ra->keys[i] = key; +ra->containers[i] = c; +ra->typecodes[i] = typecode; +} + +// write set bits to an array +void ra_to_uint32_array(const roaring_array_t *ra, uint32_t *ans); + +bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, size_t limit, uint32_t *ans); + +/** + * write a bitmap to a buffer. This is meant to be compatible with + * the + * Java and Go versions. Return the size in bytes of the serialized + * output (which should be ra_portable_size_in_bytes(ra)). + */ +size_t ra_portable_serialize(const roaring_array_t *ra, char *buf); + +/** + * read a bitmap from a serialized version. This is meant to be compatible + * with the Java and Go versions. + * maxbytes indicates how many bytes available from buf. + * When the function returns true, roaring_array_t is populated with the data + * and *readbytes indicates how many bytes were read. In all cases, if the function + * returns true, then maxbytes >= *readbytes. + */ +bool ra_portable_deserialize(roaring_array_t *ra, const char *buf, const size_t maxbytes, size_t * readbytes); + +/** + * Quickly checks whether there is a serialized bitmap at the pointer, + * not exceeding size "maxbytes" in bytes. This function does not allocate + * memory dynamically. + * + * This function returns 0 if and only if no valid bitmap is found. + * Otherwise, it returns how many bytes are occupied by the bitmap data. + */ +size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes); + +/** + * How many bytes are required to serialize this bitmap (meant to be + * compatible + * with Java and Go versions) + */ +size_t ra_portable_size_in_bytes(const roaring_array_t *ra); + +/** + * return true if it contains at least one run container. + */ +bool ra_has_run_container(const roaring_array_t *ra); + +/** + * Size of the header when serializing (meant to be compatible + * with Java and Go versions) + */ +uint32_t ra_portable_header_size(const roaring_array_t *ra); + +/** + * If the container at the index i is share, unshare it (creating a local + * copy if needed). + */ +static inline void ra_unshare_container_at_index(roaring_array_t *ra, +uint16_t i) { +assert(i < ra->size); +ra->containers[i] = get_writable_copy_if_shared(ra->containers[i], +&ra->typecodes[i]); +} + +/** + * remove at index i, sliding over all entries after i + */ +void ra_remove_at_index(roaring_array_t *ra, int32_t i); + + +/** +* clears all containers, sets the size at 0 and shrinks the memory usage. +*/ +void ra_reset(roaring_array_t *ra); + +/** + * remove at index i, sliding over all entries after i. Free removed container. + */ +void ra_remove_at_index_and_free(roaring_array_t *ra, int32_t i); + +/** + * remove a chunk of indices, sliding over entries after it + */ +// void ra_remove_index_range(roaring_array_t *ra, int32_t begin, int32_t end); + +// used in inplace andNot only, to slide left the containers from +// the mutated RoaringBitmap that are after the largest container of +// the argument RoaringBitmap. It is followed by a call to resize. +// +void ra_copy_range(roaring_array_t *ra, uint32_t begin, uint32_t end, +uint32_t new_begin); + +/** + * Shifts rightmost $count containers to the left (distance < 0) or + * to the right (distance > 0). + * Allocates memory if necessary. + * This function doesn't free or create new containers. + * Caller is responsible for that. + */ +void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance); + +#ifdef __cplusplus +} // namespace internal +} } // extern "C" { namespace roaring { +#endif + +#endif +/* end file include/roaring/roaring_array.h */ +/* begin file src/array_util.c */ +#include +#include +#include +#include +#include +#include + + +#if CROARING_IS_X64 +#ifndef CROARING_COMPILER_SUPPORTS_AVX512 +#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined." +#endif // CROARING_COMPILER_SUPPORTS_AVX512 +#endif + +#ifdef __cplusplus +using namespace ::roaring::internal; +extern "C" { namespace roaring { namespace internal { +#endif + +extern inline int32_t binarySearch(const uint16_t *array, int32_t lenarray, +uint16_t ikey); + +#if CROARING_IS_X64 +// used by intersect_vector16 +ALIGNED(0x1000) +static const uint8_t shuffle_mask16[] = { +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 2, 3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +2, 3, 4, 5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, +6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +4, 5, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, +6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 2, 3, 4, 5, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +2, 3, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 8, 9, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 4, 5, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 8, 9, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, +4, 5, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 8, 9, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, +8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 2, 3, 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 8, 9, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, +6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +2, 3, 4, 5, 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7, +8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 10, 11, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, +10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +4, 5, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 10, 11, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, +10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 2, 3, 4, 5, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, +10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +2, 3, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 10, 11, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, +10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 4, 5, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 10, 11, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, +4, 5, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, 10, 11, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, +10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 2, 3, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, 10, 11, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, +8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +2, 3, 4, 5, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 8, 9, +10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9, +10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 8, 9, 10, 11, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, +6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +4, 5, 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 8, 9, +10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, +6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, +0xFF, 0xFF, 0xFF, 0xFF, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 12, 13, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +2, 3, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 12, 13, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 12, 13, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 4, 5, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 12, 13, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, +4, 5, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 12, 13, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, +12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 2, 3, 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 12, 13, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, +6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +2, 3, 4, 5, 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7, +12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 12, 13, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, 12, 13, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, +8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +4, 5, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 8, 9, 12, 13, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, +8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 2, 3, 4, 5, 8, 9, 12, 13, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, +8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +2, 3, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 8, 9, +12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, +8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 4, 5, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 8, 9, +12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, +4, 5, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, +10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 10, 11, 12, 13, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 10, 11, +12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 2, 3, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 10, 11, 12, 13, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, +10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +2, 3, 4, 5, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 10, 11, +12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 10, 11, +12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 10, 11, 12, 13, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, +6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +4, 5, 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 10, 11, +12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, +6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, +0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, +10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +2, 3, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 8, 9, 10, 11, +12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, +10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 4, 5, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 8, 9, 10, 11, +12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, +4, 5, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, +6, 7, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 8, 9, 10, 11, +12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, +8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 2, 3, 6, 7, 8, 9, 10, 11, 12, 13, +0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 8, 9, 10, 11, +12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, +6, 7, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, +2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7, +8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 14, 15, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, +14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +4, 5, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 14, 15, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, +14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 2, 3, 4, 5, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, +14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +2, 3, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 14, 15, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, +14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 4, 5, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 14, 15, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, +4, 5, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, 14, 15, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, +14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 2, 3, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, 14, 15, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, +8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +2, 3, 4, 5, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 8, 9, +14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9, +14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 8, 9, 14, 15, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, +6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +4, 5, 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 8, 9, +14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, +6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 14, 15, +0xFF, 0xFF, 0xFF, 0xFF, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 10, 11, +14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +2, 3, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 10, 11, 14, 15, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 10, 11, +14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 4, 5, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 10, 11, 14, 15, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, +4, 5, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +6, 7, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 10, 11, 14, 15, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, +10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 2, 3, 6, 7, 10, 11, 14, 15, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 10, 11, 14, 15, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, +6, 7, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +2, 3, 4, 5, 6, 7, 10, 11, 14, 15, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7, +10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 10, 11, +14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, 10, 11, 14, 15, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, +8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +4, 5, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 8, 9, 10, 11, +14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, +8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 2, 3, 4, 5, 8, 9, 10, 11, 14, 15, +0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9, 10, 11, 14, 15, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, +8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +2, 3, 6, 7, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 8, 9, +10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, +8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 14, 15, +0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 8, 9, +10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, +4, 5, 6, 7, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, +12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 12, 13, 14, 15, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 12, 13, +14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 2, 3, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 12, 13, 14, 15, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, +12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +2, 3, 4, 5, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 12, 13, +14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 12, 13, +14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 12, 13, 14, 15, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, +6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +4, 5, 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 12, 13, +14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, +6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, +0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, +12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +2, 3, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 8, 9, 12, 13, +14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, +12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 4, 5, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 8, 9, 12, 13, +14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, +4, 5, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, +6, 7, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 8, 9, 12, 13, +14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, +8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 2, 3, 6, 7, 8, 9, 12, 13, 14, 15, +0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 8, 9, 12, 13, +14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, +6, 7, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, +2, 3, 4, 5, 6, 7, 8, 9, 12, 13, 14, 15, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7, +8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 10, 11, 12, 13, +14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 10, 11, 12, 13, 14, 15, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, +10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +4, 5, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 10, 11, 12, 13, +14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, +10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 2, 3, 4, 5, 10, 11, 12, 13, 14, 15, +0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 10, 11, 12, 13, 14, 15, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, +10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +2, 3, 6, 7, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 10, 11, +12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, +10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, +0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 10, 11, +12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, +4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, +8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, 10, 11, 12, 13, +14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, +10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 2, 3, 8, 9, 10, 11, 12, 13, 14, 15, +0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, 10, 11, 12, 13, +14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, +8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, +2, 3, 4, 5, 8, 9, 10, 11, 12, 13, 14, 15, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 8, 9, +10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 6, 7, 8, 9, +10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0, 1, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 8, 9, 10, 11, +12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, +6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, +4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 8, 9, +10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 2, 3, 4, 5, +6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, +12, 13, 14, 15}; + +/** + * From Schlegel et al., Fast Sorted-Set Intersection using SIMD Instructions + * Optimized by D. Lemire on May 3rd 2013 + */ +CROARING_TARGET_AVX2 +int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a, +const uint16_t *__restrict__ B, size_t s_b, +uint16_t *C) { +size_t count = 0; +size_t i_a = 0, i_b = 0; +const int vectorlength = sizeof(__m128i) / sizeof(uint16_t); +const size_t st_a = (s_a / vectorlength) * vectorlength; +const size_t st_b = (s_b / vectorlength) * vectorlength; +__m128i v_a, v_b; +if ((i_a < st_a) && (i_b < st_b)) { +v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); +v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); +while ((A[i_a] == 0) || (B[i_b] == 0)) { +const __m128i res_v = _mm_cmpestrm( +v_b, vectorlength, v_a, vectorlength, +_SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); +const int r = _mm_extract_epi32(res_v, 0); +__m128i sm16 = _mm_loadu_si128((const __m128i *)shuffle_mask16 + r); +__m128i p = _mm_shuffle_epi8(v_a, sm16); +_mm_storeu_si128((__m128i *)&C[count], p); // can overflow +count += _mm_popcnt_u32(r); +const uint16_t a_max = A[i_a + vectorlength - 1]; +const uint16_t b_max = B[i_b + vectorlength - 1]; +if (a_max <= b_max) { +i_a += vectorlength; +if (i_a == st_a) break; +v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); +} +if (b_max <= a_max) { +i_b += vectorlength; +if (i_b == st_b) break; +v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); +} +} +if ((i_a < st_a) && (i_b < st_b)) +while (true) { +const __m128i res_v = _mm_cmpistrm( +v_b, v_a, +_SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); +const int r = _mm_extract_epi32(res_v, 0); +__m128i sm16 = +_mm_loadu_si128((const __m128i *)shuffle_mask16 + r); +__m128i p = _mm_shuffle_epi8(v_a, sm16); +_mm_storeu_si128((__m128i *)&C[count], p); // can overflow +count += _mm_popcnt_u32(r); +const uint16_t a_max = A[i_a + vectorlength - 1]; +const uint16_t b_max = B[i_b + vectorlength - 1]; +if (a_max <= b_max) { +i_a += vectorlength; +if (i_a == st_a) break; +v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); +} +if (b_max <= a_max) { +i_b += vectorlength; +if (i_b == st_b) break; +v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); +} +} +} +// intersect the tail using scalar intersection +while (i_a < s_a && i_b < s_b) { +uint16_t a = A[i_a]; +uint16_t b = B[i_b]; +if (a < b) { +i_a++; +} else if (b < a) { +i_b++; +} else { +C[count] = a; //==b; +count++; +i_a++; +i_b++; +} +} +return (int32_t)count; +} + +ALLOW_UNALIGNED +int array_container_to_uint32_array_vector16(void *vout, const uint16_t* array, size_t cardinality, +uint32_t base) { +int outpos = 0; +uint32_t *out = (uint32_t *)vout; +size_t i = 0; +for ( ;i + sizeof(__m128i)/sizeof(uint16_t) <= cardinality; i += sizeof(__m128i)/sizeof(uint16_t)) { +__m128i vinput = _mm_loadu_si128((const __m128i*) (array + i)); +__m256i voutput = _mm256_add_epi32(_mm256_cvtepu16_epi32(vinput), _mm256_set1_epi32(base)); +_mm256_storeu_si256((__m256i*)(out + outpos), voutput); +outpos += sizeof(__m256i)/sizeof(uint32_t); +} +for ( ; i < cardinality; ++i) { +const uint32_t val = base + array[i]; +memcpy(out + outpos, &val, +sizeof(uint32_t)); // should be compiled as a MOV on x64 +outpos++; +} +return outpos; +} + +int32_t intersect_vector16_inplace(uint16_t *__restrict__ A, size_t s_a, +const uint16_t *__restrict__ B, size_t s_b) { +size_t count = 0; +size_t i_a = 0, i_b = 0; +const int vectorlength = sizeof(__m128i) / sizeof(uint16_t); +const size_t st_a = (s_a / vectorlength) * vectorlength; +const size_t st_b = (s_b / vectorlength) * vectorlength; +__m128i v_a, v_b; +if ((i_a < st_a) && (i_b < st_b)) { +v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); +v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); +__m128i tmp[2] = {_mm_setzero_si128()}; +size_t tmp_count = 0; +while ((A[i_a] == 0) || (B[i_b] == 0)) { +const __m128i res_v = _mm_cmpestrm( +v_b, vectorlength, v_a, vectorlength, +_SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); +const int r = _mm_extract_epi32(res_v, 0); +__m128i sm16 = _mm_loadu_si128((const __m128i *)shuffle_mask16 + r); +__m128i p = _mm_shuffle_epi8(v_a, sm16); +_mm_storeu_si128((__m128i*)&((uint16_t*)tmp)[tmp_count], p); +tmp_count += _mm_popcnt_u32(r); +const uint16_t a_max = A[i_a + vectorlength - 1]; +const uint16_t b_max = B[i_b + vectorlength - 1]; +if (a_max <= b_max) { +_mm_storeu_si128((__m128i *)&A[count], tmp[0]); +_mm_storeu_si128(tmp, _mm_setzero_si128()); +count += tmp_count; +tmp_count = 0; +i_a += vectorlength; +if (i_a == st_a) break; +v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); +} +if (b_max <= a_max) { +i_b += vectorlength; +if (i_b == st_b) break; +v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); +} +} +if ((i_a < st_a) && (i_b < st_b)) { +while (true) { +const __m128i res_v = _mm_cmpistrm( +v_b, v_a, +_SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); +const int r = _mm_extract_epi32(res_v, 0); +__m128i sm16 = _mm_loadu_si128((const __m128i *)shuffle_mask16 + r); +__m128i p = _mm_shuffle_epi8(v_a, sm16); +_mm_storeu_si128((__m128i*)&((uint16_t*)tmp)[tmp_count], p); +tmp_count += _mm_popcnt_u32(r); +const uint16_t a_max = A[i_a + vectorlength - 1]; +const uint16_t b_max = B[i_b + vectorlength - 1]; +if (a_max <= b_max) { +_mm_storeu_si128((__m128i *)&A[count], tmp[0]); +_mm_storeu_si128(tmp, _mm_setzero_si128()); +count += tmp_count; +tmp_count = 0; +i_a += vectorlength; +if (i_a == st_a) break; +v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); +} +if (b_max <= a_max) { +i_b += vectorlength; +if (i_b == st_b) break; +v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); +} +} +} +// tmp_count <= 8, so this does not affect efficiency so much +for (size_t i = 0; i < tmp_count; i++) { +A[count] = ((uint16_t*)tmp)[i]; +count++; +} +i_a += tmp_count; // We can at least jump pass $tmp_count elements in A +} +// intersect the tail using scalar intersection +while (i_a < s_a && i_b < s_b) { +uint16_t a = A[i_a]; +uint16_t b = B[i_b]; +if (a < b) { +i_a++; +} else if (b < a) { +i_b++; +} else { +A[count] = a; //==b; +count++; +i_a++; +i_b++; +} +} +return (int32_t)count; +} +CROARING_UNTARGET_AVX2 + +CROARING_TARGET_AVX2 +int32_t intersect_vector16_cardinality(const uint16_t *__restrict__ A, +size_t s_a, +const uint16_t *__restrict__ B, +size_t s_b) { +size_t count = 0; +size_t i_a = 0, i_b = 0; +const int vectorlength = sizeof(__m128i) / sizeof(uint16_t); +const size_t st_a = (s_a / vectorlength) * vectorlength; +const size_t st_b = (s_b / vectorlength) * vectorlength; +__m128i v_a, v_b; +if ((i_a < st_a) && (i_b < st_b)) { +v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); +v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); +while ((A[i_a] == 0) || (B[i_b] == 0)) { +const __m128i res_v = _mm_cmpestrm( +v_b, vectorlength, v_a, vectorlength, +_SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); +const int r = _mm_extract_epi32(res_v, 0); +count += _mm_popcnt_u32(r); +const uint16_t a_max = A[i_a + vectorlength - 1]; +const uint16_t b_max = B[i_b + vectorlength - 1]; +if (a_max <= b_max) { +i_a += vectorlength; +if (i_a == st_a) break; +v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); +} +if (b_max <= a_max) { +i_b += vectorlength; +if (i_b == st_b) break; +v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); +} +} +if ((i_a < st_a) && (i_b < st_b)) +while (true) { +const __m128i res_v = _mm_cmpistrm( +v_b, v_a, +_SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); +const int r = _mm_extract_epi32(res_v, 0); +count += _mm_popcnt_u32(r); +const uint16_t a_max = A[i_a + vectorlength - 1]; +const uint16_t b_max = B[i_b + vectorlength - 1]; +if (a_max <= b_max) { +i_a += vectorlength; +if (i_a == st_a) break; +v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); +} +if (b_max <= a_max) { +i_b += vectorlength; +if (i_b == st_b) break; +v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); +} +} +} +// intersect the tail using scalar intersection +while (i_a < s_a && i_b < s_b) { +uint16_t a = A[i_a]; +uint16_t b = B[i_b]; +if (a < b) { +i_a++; +} else if (b < a) { +i_b++; +} else { +count++; +i_a++; +i_b++; +} +} +return (int32_t)count; +} +CROARING_UNTARGET_AVX2 + +CROARING_TARGET_AVX2 +///////// +// Warning: +// This function may not be safe if A == C or B == C. +///////// +int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a, +const uint16_t *__restrict__ B, size_t s_b, +uint16_t *C) { +// we handle the degenerate case +if (s_a == 0) return 0; +if (s_b == 0) { +if (A != C) memcpy(C, A, sizeof(uint16_t) * s_a); +return (int32_t)s_a; +} +// handle the leading zeroes, it is messy but it allows us to use the fast +// _mm_cmpistrm instrinsic safely +int32_t count = 0; +if ((A[0] == 0) || (B[0] == 0)) { +if ((A[0] == 0) && (B[0] == 0)) { +A++; +s_a--; +B++; +s_b--; +} else if (A[0] == 0) { +C[count++] = 0; +A++; +s_a--; +} else { +B++; +s_b--; +} +} +// at this point, we have two non-empty arrays, made of non-zero +// increasing values. +size_t i_a = 0, i_b = 0; +const size_t vectorlength = sizeof(__m128i) / sizeof(uint16_t); +const size_t st_a = (s_a / vectorlength) * vectorlength; +const size_t st_b = (s_b / vectorlength) * vectorlength; +if ((i_a < st_a) && (i_b < st_b)) { // this is the vectorized code path +__m128i v_a, v_b; //, v_bmax; +// we load a vector from A and a vector from B +v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); +v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); +// we have a runningmask which indicates which values from A have been +// spotted in B, these don't get written out. +__m128i runningmask_a_found_in_b = _mm_setzero_si128(); +/**** + * start of the main vectorized loop + *****/ +while (true) { +// afoundinb will contain a mask indicate for each entry in A +// whether it is seen +// in B +const __m128i a_found_in_b = +_mm_cmpistrm(v_b, v_a, _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | +_SIDD_BIT_MASK); +runningmask_a_found_in_b = +_mm_or_si128(runningmask_a_found_in_b, a_found_in_b); +// we always compare the last values of A and B +const uint16_t a_max = A[i_a + vectorlength - 1]; +const uint16_t b_max = B[i_b + vectorlength - 1]; +if (a_max <= b_max) { +// Ok. In this code path, we are ready to write our v_a +// because there is no need to read more from B, they will +// all be large values. +const int bitmask_belongs_to_difference = +_mm_extract_epi32(runningmask_a_found_in_b, 0) ^ 0xFF; +/*** next few lines are probably expensive *****/ +__m128i sm16 = _mm_loadu_si128((const __m128i *)shuffle_mask16 + +bitmask_belongs_to_difference); +__m128i p = _mm_shuffle_epi8(v_a, sm16); +_mm_storeu_si128((__m128i *)&C[count], p); // can overflow +count += _mm_popcnt_u32(bitmask_belongs_to_difference); +// we advance a +i_a += vectorlength; +if (i_a == st_a) // no more +break; +runningmask_a_found_in_b = _mm_setzero_si128(); +v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); +} +if (b_max <= a_max) { +// in this code path, the current v_b has become useless +i_b += vectorlength; +if (i_b == st_b) break; +v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); +} +} +// at this point, either we have i_a == st_a, which is the end of the +// vectorized processing, +// or we have i_b == st_b, and we are not done processing the vector... +// so we need to finish it off. +if (i_a < st_a) { // we have unfinished business... +uint16_t buffer[8]; // buffer to do a masked load +memset(buffer, 0, 8 * sizeof(uint16_t)); +memcpy(buffer, B + i_b, (s_b - i_b) * sizeof(uint16_t)); +v_b = _mm_lddqu_si128((__m128i *)buffer); +const __m128i a_found_in_b = +_mm_cmpistrm(v_b, v_a, _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | +_SIDD_BIT_MASK); +runningmask_a_found_in_b = +_mm_or_si128(runningmask_a_found_in_b, a_found_in_b); +const int bitmask_belongs_to_difference = +_mm_extract_epi32(runningmask_a_found_in_b, 0) ^ 0xFF; +__m128i sm16 = _mm_loadu_si128((const __m128i *)shuffle_mask16 + +bitmask_belongs_to_difference); +__m128i p = _mm_shuffle_epi8(v_a, sm16); +_mm_storeu_si128((__m128i *)&C[count], p); // can overflow +count += _mm_popcnt_u32(bitmask_belongs_to_difference); +i_a += vectorlength; +} +// at this point we should have i_a == st_a and i_b == st_b +} +// do the tail using scalar code +while (i_a < s_a && i_b < s_b) { +uint16_t a = A[i_a]; +uint16_t b = B[i_b]; +if (b < a) { +i_b++; +} else if (a < b) { +C[count] = a; +count++; +i_a++; +} else { //== +i_a++; +i_b++; +} +} +if (i_a < s_a) { +if(C == A) { +assert((size_t)count <= i_a); +if((size_t)count < i_a) { +memmove(C + count, A + i_a, sizeof(uint16_t) * (s_a - i_a)); +} +} else { +for(size_t i = 0; i < (s_a - i_a); i++) { +C[count + i] = A[i + i_a]; +} +} +count += (int32_t)(s_a - i_a); +} +return count; +} +CROARING_UNTARGET_AVX2 +#endif // CROARING_IS_X64 + + + +/** +* Branchless binary search going after 4 values at once. +* Assumes that array is sorted. +* You have that array[*index1] >= target1, array[*index12] >= target2, ... +* except when *index1 = n, in which case you know that all values in array are +* smaller than target1, and so forth. +* It has logarithmic complexity. +*/ +static void binarySearch4(const uint16_t *array, int32_t n, uint16_t target1, +uint16_t target2, uint16_t target3, uint16_t target4, +int32_t *index1, int32_t *index2, int32_t *index3, +int32_t *index4) { +const uint16_t *base1 = array; +const uint16_t *base2 = array; +const uint16_t *base3 = array; +const uint16_t *base4 = array; +if (n == 0) +return; +while (n > 1) { +int32_t half = n >> 1; +base1 = (base1[half] < target1) ? &base1[half] : base1; +base2 = (base2[half] < target2) ? &base2[half] : base2; +base3 = (base3[half] < target3) ? &base3[half] : base3; +base4 = (base4[half] < target4) ? &base4[half] : base4; +n -= half; +} +*index1 = (int32_t)((*base1 < target1) + base1 - array); +*index2 = (int32_t)((*base2 < target2) + base2 - array); +*index3 = (int32_t)((*base3 < target3) + base3 - array); +*index4 = (int32_t)((*base4 < target4) + base4 - array); +} + +/** +* Branchless binary search going after 2 values at once. +* Assumes that array is sorted. +* You have that array[*index1] >= target1, array[*index12] >= target2. +* except when *index1 = n, in which case you know that all values in array are +* smaller than target1, and so forth. +* It has logarithmic complexity. +*/ +static void binarySearch2(const uint16_t *array, int32_t n, uint16_t target1, +uint16_t target2, int32_t *index1, int32_t *index2) { +const uint16_t *base1 = array; +const uint16_t *base2 = array; +if (n == 0) +return; +while (n > 1) { +int32_t half = n >> 1; +base1 = (base1[half] < target1) ? &base1[half] : base1; +base2 = (base2[half] < target2) ? &base2[half] : base2; +n -= half; +} +*index1 = (int32_t)((*base1 < target1) + base1 - array); +*index2 = (int32_t)((*base2 < target2) + base2 - array); +} + +/* Computes the intersection between one small and one large set of uint16_t. + * Stores the result into buffer and return the number of elements. + * Processes the small set in blocks of 4 values calling binarySearch4 + * and binarySearch2. This approach can be slightly superior to a conventional + * galloping search in some instances. + */ +int32_t intersect_skewed_uint16(const uint16_t *small, size_t size_s, +const uint16_t *large, size_t size_l, +uint16_t *buffer) { +size_t pos = 0, idx_l = 0, idx_s = 0; + +if (0 == size_s) { +return 0; +} +int32_t index1 = 0, index2 = 0, index3 = 0, index4 = 0; +while ((idx_s + 4 <= size_s) && (idx_l < size_l)) { +uint16_t target1 = small[idx_s]; +uint16_t target2 = small[idx_s + 1]; +uint16_t target3 = small[idx_s + 2]; +uint16_t target4 = small[idx_s + 3]; +binarySearch4(large + idx_l, (int32_t)(size_l - idx_l), target1, target2, target3, +target4, &index1, &index2, &index3, &index4); +if ((index1 + idx_l < size_l) && (large[idx_l + index1] == target1)) { +buffer[pos++] = target1; +} +if ((index2 + idx_l < size_l) && (large[idx_l + index2] == target2)) { +buffer[pos++] = target2; +} +if ((index3 + idx_l < size_l) && (large[idx_l + index3] == target3)) { +buffer[pos++] = target3; +} +if ((index4 + idx_l < size_l) && (large[idx_l + index4] == target4)) { +buffer[pos++] = target4; +} +idx_s += 4; +idx_l += index4; +} +if ((idx_s + 2 <= size_s) && (idx_l < size_l)) { +uint16_t target1 = small[idx_s]; +uint16_t target2 = small[idx_s + 1]; +binarySearch2(large + idx_l, (int32_t)(size_l - idx_l), target1, target2, &index1, +&index2); +if ((index1 + idx_l < size_l) && (large[idx_l + index1] == target1)) { +buffer[pos++] = target1; +} +if ((index2 + idx_l < size_l) && (large[idx_l + index2] == target2)) { +buffer[pos++] = target2; +} +idx_s += 2; +idx_l += index2; +} +if ((idx_s < size_s) && (idx_l < size_l)) { +uint16_t val_s = small[idx_s]; +int32_t index = binarySearch(large + idx_l, (int32_t)(size_l - idx_l), val_s); +if (index >= 0) +buffer[pos++] = val_s; +} +return (int32_t)pos; +} + + + +// TODO: this could be accelerated, possibly, by using binarySearch4 as above. +int32_t intersect_skewed_uint16_cardinality(const uint16_t *small, +size_t size_s, +const uint16_t *large, +size_t size_l) { +size_t pos = 0, idx_l = 0, idx_s = 0; + +if (0 == size_s) { +return 0; +} + +uint16_t val_l = large[idx_l], val_s = small[idx_s]; + +while (true) { +if (val_l < val_s) { +idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s); +if (idx_l == size_l) break; +val_l = large[idx_l]; +} else if (val_s < val_l) { +idx_s++; +if (idx_s == size_s) break; +val_s = small[idx_s]; +} else { +pos++; +idx_s++; +if (idx_s == size_s) break; +val_s = small[idx_s]; +idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s); +if (idx_l == size_l) break; +val_l = large[idx_l]; +} +} + +return (int32_t)pos; +} + +bool intersect_skewed_uint16_nonempty(const uint16_t *small, size_t size_s, +const uint16_t *large, size_t size_l) { +size_t idx_l = 0, idx_s = 0; + +if (0 == size_s) { +return false; +} + +uint16_t val_l = large[idx_l], val_s = small[idx_s]; + +while (true) { +if (val_l < val_s) { +idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s); +if (idx_l == size_l) break; +val_l = large[idx_l]; +} else if (val_s < val_l) { +idx_s++; +if (idx_s == size_s) break; +val_s = small[idx_s]; +} else { +return true; +} +} + +return false; +} + +/** + * Generic intersection function. + */ +int32_t intersect_uint16(const uint16_t *A, const size_t lenA, +const uint16_t *B, const size_t lenB, uint16_t *out) { +const uint16_t *initout = out; +if (lenA == 0 || lenB == 0) return 0; +const uint16_t *endA = A + lenA; +const uint16_t *endB = B + lenB; + +while (1) { +while (*A < *B) { +SKIP_FIRST_COMPARE: +if (++A == endA) return (int32_t)(out - initout); +} +while (*A > *B) { +if (++B == endB) return (int32_t)(out - initout); +} +if (*A == *B) { +*out++ = *A; +if (++A == endA || ++B == endB) return (int32_t)(out - initout); +} else { +goto SKIP_FIRST_COMPARE; +} +} +return (int32_t)(out - initout); // NOTREACHED +} + +int32_t intersect_uint16_cardinality(const uint16_t *A, const size_t lenA, +const uint16_t *B, const size_t lenB) { +int32_t answer = 0; +if (lenA == 0 || lenB == 0) return 0; +const uint16_t *endA = A + lenA; +const uint16_t *endB = B + lenB; + +while (1) { +while (*A < *B) { +SKIP_FIRST_COMPARE: +if (++A == endA) return answer; +} +while (*A > *B) { +if (++B == endB) return answer; +} +if (*A == *B) { +++answer; +if (++A == endA || ++B == endB) return answer; +} else { +goto SKIP_FIRST_COMPARE; +} +} +return answer; // NOTREACHED +} + + +bool intersect_uint16_nonempty(const uint16_t *A, const size_t lenA, +const uint16_t *B, const size_t lenB) { +if (lenA == 0 || lenB == 0) return 0; +const uint16_t *endA = A + lenA; +const uint16_t *endB = B + lenB; + +while (1) { +while (*A < *B) { +SKIP_FIRST_COMPARE: +if (++A == endA) return false; +} +while (*A > *B) { +if (++B == endB) return false; +} +if (*A == *B) { +return true; +} else { +goto SKIP_FIRST_COMPARE; +} +} +return false; // NOTREACHED +} + + + +/** + * Generic intersection function. + */ +size_t intersection_uint32(const uint32_t *A, const size_t lenA, +const uint32_t *B, const size_t lenB, +uint32_t *out) { +const uint32_t *initout = out; +if (lenA == 0 || lenB == 0) return 0; +const uint32_t *endA = A + lenA; +const uint32_t *endB = B + lenB; + +while (1) { +while (*A < *B) { +SKIP_FIRST_COMPARE: +if (++A == endA) return (out - initout); +} +while (*A > *B) { +if (++B == endB) return (out - initout); +} +if (*A == *B) { +*out++ = *A; +if (++A == endA || ++B == endB) return (out - initout); +} else { +goto SKIP_FIRST_COMPARE; +} +} +return (out - initout); // NOTREACHED +} + +size_t intersection_uint32_card(const uint32_t *A, const size_t lenA, +const uint32_t *B, const size_t lenB) { +if (lenA == 0 || lenB == 0) return 0; +size_t card = 0; +const uint32_t *endA = A + lenA; +const uint32_t *endB = B + lenB; + +while (1) { +while (*A < *B) { +SKIP_FIRST_COMPARE: +if (++A == endA) return card; +} +while (*A > *B) { +if (++B == endB) return card; +} +if (*A == *B) { +card++; +if (++A == endA || ++B == endB) return card; +} else { +goto SKIP_FIRST_COMPARE; +} +} +return card; // NOTREACHED +} + +// can one vectorize the computation of the union? (Update: Yes! See +// union_vector16). + +size_t union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2, +size_t size_2, uint16_t *buffer) { +size_t pos = 0, idx_1 = 0, idx_2 = 0; + +if (0 == size_2) { +memmove(buffer, set_1, size_1 * sizeof(uint16_t)); +return size_1; +} +if (0 == size_1) { +memmove(buffer, set_2, size_2 * sizeof(uint16_t)); +return size_2; +} + +uint16_t val_1 = set_1[idx_1], val_2 = set_2[idx_2]; + +while (true) { +if (val_1 < val_2) { +buffer[pos++] = val_1; +++idx_1; +if (idx_1 >= size_1) break; +val_1 = set_1[idx_1]; +} else if (val_2 < val_1) { +buffer[pos++] = val_2; +++idx_2; +if (idx_2 >= size_2) break; +val_2 = set_2[idx_2]; +} else { +buffer[pos++] = val_1; +++idx_1; +++idx_2; +if (idx_1 >= size_1 || idx_2 >= size_2) break; +val_1 = set_1[idx_1]; +val_2 = set_2[idx_2]; +} +} + +if (idx_1 < size_1) { +const size_t n_elems = size_1 - idx_1; +memmove(buffer + pos, set_1 + idx_1, n_elems * sizeof(uint16_t)); +pos += n_elems; +} else if (idx_2 < size_2) { +const size_t n_elems = size_2 - idx_2; +memmove(buffer + pos, set_2 + idx_2, n_elems * sizeof(uint16_t)); +pos += n_elems; +} + +return pos; +} + +int difference_uint16(const uint16_t *a1, int length1, const uint16_t *a2, +int length2, uint16_t *a_out) { +int out_card = 0; +int k1 = 0, k2 = 0; +if (length1 == 0) return 0; +if (length2 == 0) { +if (a1 != a_out) memcpy(a_out, a1, sizeof(uint16_t) * length1); +return length1; +} +uint16_t s1 = a1[k1]; +uint16_t s2 = a2[k2]; +while (true) { +if (s1 < s2) { +a_out[out_card++] = s1; +++k1; +if (k1 >= length1) { +break; +} +s1 = a1[k1]; +} else if (s1 == s2) { +++k1; +++k2; +if (k1 >= length1) { +break; +} +if (k2 >= length2) { +memmove(a_out + out_card, a1 + k1, +sizeof(uint16_t) * (length1 - k1)); +return out_card + length1 - k1; +} +s1 = a1[k1]; +s2 = a2[k2]; +} else { // if (val1>val2) +++k2; +if (k2 >= length2) { +memmove(a_out + out_card, a1 + k1, +sizeof(uint16_t) * (length1 - k1)); +return out_card + length1 - k1; +} +s2 = a2[k2]; +} +} +return out_card; +} + +int32_t xor_uint16(const uint16_t *array_1, int32_t card_1, +const uint16_t *array_2, int32_t card_2, uint16_t *out) { +int32_t pos1 = 0, pos2 = 0, pos_out = 0; +while (pos1 < card_1 && pos2 < card_2) { +const uint16_t v1 = array_1[pos1]; +const uint16_t v2 = array_2[pos2]; +if (v1 == v2) { +++pos1; +++pos2; +continue; +} +if (v1 < v2) { +out[pos_out++] = v1; +++pos1; +} else { +out[pos_out++] = v2; +++pos2; +} +} +if (pos1 < card_1) { +const size_t n_elems = card_1 - pos1; +memcpy(out + pos_out, array_1 + pos1, n_elems * sizeof(uint16_t)); +pos_out += (int32_t)n_elems; +} else if (pos2 < card_2) { +const size_t n_elems = card_2 - pos2; +memcpy(out + pos_out, array_2 + pos2, n_elems * sizeof(uint16_t)); +pos_out += (int32_t)n_elems; +} +return pos_out; +} + +#if CROARING_IS_X64 + +/*** + * start of the SIMD 16-bit union code + * + */ +CROARING_TARGET_AVX2 + +// Assuming that vInput1 and vInput2 are sorted, produces a sorted output going +// from vecMin all the way to vecMax +// developed originally for merge sort using SIMD instructions. +// Standard merge. See, e.g., Inoue and Taura, SIMD- and Cache-Friendly +// Algorithm for Sorting an Array of Structures +static inline void sse_merge(const __m128i *vInput1, +const __m128i *vInput2, // input 1 & 2 +__m128i *vecMin, __m128i *vecMax) { // output +__m128i vecTmp; +vecTmp = _mm_min_epu16(*vInput1, *vInput2); +*vecMax = _mm_max_epu16(*vInput1, *vInput2); +vecTmp = _mm_alignr_epi8(vecTmp, vecTmp, 2); +*vecMin = _mm_min_epu16(vecTmp, *vecMax); +*vecMax = _mm_max_epu16(vecTmp, *vecMax); +vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); +*vecMin = _mm_min_epu16(vecTmp, *vecMax); +*vecMax = _mm_max_epu16(vecTmp, *vecMax); +vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); +*vecMin = _mm_min_epu16(vecTmp, *vecMax); +*vecMax = _mm_max_epu16(vecTmp, *vecMax); +vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); +*vecMin = _mm_min_epu16(vecTmp, *vecMax); +*vecMax = _mm_max_epu16(vecTmp, *vecMax); +vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); +*vecMin = _mm_min_epu16(vecTmp, *vecMax); +*vecMax = _mm_max_epu16(vecTmp, *vecMax); +vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); +*vecMin = _mm_min_epu16(vecTmp, *vecMax); +*vecMax = _mm_max_epu16(vecTmp, *vecMax); +vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); +*vecMin = _mm_min_epu16(vecTmp, *vecMax); +*vecMax = _mm_max_epu16(vecTmp, *vecMax); +*vecMin = _mm_alignr_epi8(*vecMin, *vecMin, 2); +} +CROARING_UNTARGET_AVX2 +// used by store_unique, generated by simdunion.py +static uint8_t uniqshuf[] = { +0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, +0xc, 0xd, 0xe, 0xf, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, +0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, +0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, +0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, +0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, +0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, +0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, +0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, +0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, +0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, +0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, +0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, +0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, +0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, +0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, +0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, +0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, +0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, +0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, +0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, +0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, +0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xa, 0xb, +0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, +0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, +0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, +0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0xa, 0xb, 0xc, 0xd, +0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xa, 0xb, +0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, +0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, +0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, +0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, +0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, +0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, +0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, +0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, +0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, +0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, +0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x2, 0x3, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xc, 0xd, +0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, +0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, +0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, +0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xc, 0xd, +0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, +0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, +0x4, 0x5, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x2, 0x3, 0x4, 0x5, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0xc, 0xd, 0xe, 0xf, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xc, 0xd, +0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x2, 0x3, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xc, 0xd, +0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, +0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, +0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, +0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, +0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, +0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, +0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, +0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, +0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, +0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, +0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, +0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, +0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, +0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, +0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, +0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, +0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, +0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, +0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, +0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, +0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x4, 0x5, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, +0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x2, 0x3, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xa, 0xb, 0xe, 0xf, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, +0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, +0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, +0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, +0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, +0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, +0x4, 0x5, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xe, 0xf, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, +0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, +0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, +0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, +0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, +0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x2, 0x3, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xe, 0xf, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0xe, 0xf, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, +0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x4, 0x5, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0xe, 0xf, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xe, 0xf, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, +0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, +0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, +0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, +0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, +0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, +0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, +0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, +0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, +0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, +0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xa, 0xb, +0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, +0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, +0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, +0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, +0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, +0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, +0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xa, 0xb, +0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x2, 0x3, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xa, 0xb, +0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, +0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, +0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, +0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, +0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, +0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xc, 0xd, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, +0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, +0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, +0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, +0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xc, 0xd, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, +0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x4, 0x5, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, +0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x2, 0x3, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xc, 0xd, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, +0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, +0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, +0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, +0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, +0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, +0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, +0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, +0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, +0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, +0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, +0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xa, 0xb, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, +0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x4, 0x5, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0xa, 0xb, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xa, 0xb, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, +0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, +0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, +0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, +0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, +0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x2, 0x3, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, +0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x4, 0x5, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, +0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x2, 0x3, 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0x0, 0x1, 0x2, 0x3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +0xFF, 0xFF, 0xFF, 0xFF}; +CROARING_TARGET_AVX2 +// write vector new, while omitting repeated values assuming that previously +// written vector was "old" +static inline int store_unique(__m128i old, __m128i newval, uint16_t *output) { +__m128i vecTmp = _mm_alignr_epi8(newval, old, 16 - 2); +// lots of high latency instructions follow (optimize?) +int M = _mm_movemask_epi8( +_mm_packs_epi16(_mm_cmpeq_epi16(vecTmp, newval), _mm_setzero_si128())); +int numberofnewvalues = 8 - _mm_popcnt_u32(M); +__m128i key = _mm_lddqu_si128((const __m128i *)uniqshuf + M); +__m128i val = _mm_shuffle_epi8(newval, key); +_mm_storeu_si128((__m128i *)output, val); +return numberofnewvalues; +} +CROARING_UNTARGET_AVX2 + +// working in-place, this function overwrites the repeated values +// could be avoided? +static inline uint32_t unique(uint16_t *out, uint32_t len) { +uint32_t pos = 1; +for (uint32_t i = 1; i < len; ++i) { +if (out[i] != out[i - 1]) { +out[pos++] = out[i]; +} +} +return pos; +} + +// use with qsort, could be avoided +static int uint16_compare(const void *a, const void *b) { +return (*(uint16_t *)a - *(uint16_t *)b); +} + +CROARING_TARGET_AVX2 +// a one-pass SSE union algorithm +// This function may not be safe if array1 == output or array2 == output. +uint32_t union_vector16(const uint16_t *__restrict__ array1, uint32_t length1, +const uint16_t *__restrict__ array2, uint32_t length2, +uint16_t *__restrict__ output) { +if ((length1 < 8) || (length2 < 8)) { +return (uint32_t)union_uint16(array1, length1, array2, length2, output); +} +__m128i vA, vB, V, vecMin, vecMax; +__m128i laststore; +uint16_t *initoutput = output; +uint32_t len1 = length1 / 8; +uint32_t len2 = length2 / 8; +uint32_t pos1 = 0; +uint32_t pos2 = 0; +// we start the machine +vA = _mm_lddqu_si128((const __m128i *)array1 + pos1); +pos1++; +vB = _mm_lddqu_si128((const __m128i *)array2 + pos2); +pos2++; +sse_merge(&vA, &vB, &vecMin, &vecMax); +laststore = _mm_set1_epi16(-1); +output += store_unique(laststore, vecMin, output); +laststore = vecMin; +if ((pos1 < len1) && (pos2 < len2)) { +uint16_t curA, curB; +curA = array1[8 * pos1]; +curB = array2[8 * pos2]; +while (true) { +if (curA <= curB) { +V = _mm_lddqu_si128((const __m128i *)array1 + pos1); +pos1++; +if (pos1 < len1) { +curA = array1[8 * pos1]; +} else { +break; +} +} else { +V = _mm_lddqu_si128((const __m128i *)array2 + pos2); +pos2++; +if (pos2 < len2) { +curB = array2[8 * pos2]; +} else { +break; +} +} +sse_merge(&V, &vecMax, &vecMin, &vecMax); +output += store_unique(laststore, vecMin, output); +laststore = vecMin; +} +sse_merge(&V, &vecMax, &vecMin, &vecMax); +output += store_unique(laststore, vecMin, output); +laststore = vecMin; +} +// we finish the rest off using a scalar algorithm +// could be improved? +// +// copy the small end on a tmp buffer +uint32_t len = (uint32_t)(output - initoutput); +uint16_t buffer[16]; +uint32_t leftoversize = store_unique(laststore, vecMax, buffer); +if (pos1 == len1) { +memcpy(buffer + leftoversize, array1 + 8 * pos1, +(length1 - 8 * len1) * sizeof(uint16_t)); +leftoversize += length1 - 8 * len1; +qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare); + +leftoversize = unique(buffer, leftoversize); +len += (uint32_t)union_uint16(buffer, leftoversize, array2 + 8 * pos2, +length2 - 8 * pos2, output); +} else { +memcpy(buffer + leftoversize, array2 + 8 * pos2, +(length2 - 8 * len2) * sizeof(uint16_t)); +leftoversize += length2 - 8 * len2; +qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare); +leftoversize = unique(buffer, leftoversize); +len += (uint32_t)union_uint16(buffer, leftoversize, array1 + 8 * pos1, +length1 - 8 * pos1, output); +} +return len; +} +CROARING_UNTARGET_AVX2 + +/** + * End of the SIMD 16-bit union code + * + */ + +/** + * Start of SIMD 16-bit XOR code + */ + +CROARING_TARGET_AVX2 +// write vector new, while omitting repeated values assuming that previously +// written vector was "old" +static inline int store_unique_xor(__m128i old, __m128i newval, +uint16_t *output) { +__m128i vecTmp1 = _mm_alignr_epi8(newval, old, 16 - 4); +__m128i vecTmp2 = _mm_alignr_epi8(newval, old, 16 - 2); +__m128i equalleft = _mm_cmpeq_epi16(vecTmp2, vecTmp1); +__m128i equalright = _mm_cmpeq_epi16(vecTmp2, newval); +__m128i equalleftoright = _mm_or_si128(equalleft, equalright); +int M = _mm_movemask_epi8( +_mm_packs_epi16(equalleftoright, _mm_setzero_si128())); +int numberofnewvalues = 8 - _mm_popcnt_u32(M); +__m128i key = _mm_lddqu_si128((const __m128i *)uniqshuf + M); +__m128i val = _mm_shuffle_epi8(vecTmp2, key); +_mm_storeu_si128((__m128i *)output, val); +return numberofnewvalues; +} +CROARING_UNTARGET_AVX2 + +// working in-place, this function overwrites the repeated values +// could be avoided? Warning: assumes len > 0 +static inline uint32_t unique_xor(uint16_t *out, uint32_t len) { +uint32_t pos = 1; +for (uint32_t i = 1; i < len; ++i) { +if (out[i] != out[i - 1]) { +out[pos++] = out[i]; +} else +pos--; // if it is identical to previous, delete it +} +return pos; +} +CROARING_TARGET_AVX2 +// a one-pass SSE xor algorithm +uint32_t xor_vector16(const uint16_t *__restrict__ array1, uint32_t length1, +const uint16_t *__restrict__ array2, uint32_t length2, +uint16_t *__restrict__ output) { +if ((length1 < 8) || (length2 < 8)) { +return xor_uint16(array1, length1, array2, length2, output); +} +__m128i vA, vB, V, vecMin, vecMax; +__m128i laststore; +uint16_t *initoutput = output; +uint32_t len1 = length1 / 8; +uint32_t len2 = length2 / 8; +uint32_t pos1 = 0; +uint32_t pos2 = 0; +// we start the machine +vA = _mm_lddqu_si128((const __m128i *)array1 + pos1); +pos1++; +vB = _mm_lddqu_si128((const __m128i *)array2 + pos2); +pos2++; +sse_merge(&vA, &vB, &vecMin, &vecMax); +laststore = _mm_set1_epi16(-1); +uint16_t buffer[17]; +output += store_unique_xor(laststore, vecMin, output); + +laststore = vecMin; +if ((pos1 < len1) && (pos2 < len2)) { +uint16_t curA, curB; +curA = array1[8 * pos1]; +curB = array2[8 * pos2]; +while (true) { +if (curA <= curB) { +V = _mm_lddqu_si128((const __m128i *)array1 + pos1); +pos1++; +if (pos1 < len1) { +curA = array1[8 * pos1]; +} else { +break; +} +} else { +V = _mm_lddqu_si128((const __m128i *)array2 + pos2); +pos2++; +if (pos2 < len2) { +curB = array2[8 * pos2]; +} else { +break; +} +} +sse_merge(&V, &vecMax, &vecMin, &vecMax); +// conditionally stores the last value of laststore as well as all +// but the +// last value of vecMin +output += store_unique_xor(laststore, vecMin, output); +laststore = vecMin; +} +sse_merge(&V, &vecMax, &vecMin, &vecMax); +// conditionally stores the last value of laststore as well as all but +// the +// last value of vecMin +output += store_unique_xor(laststore, vecMin, output); +laststore = vecMin; +} +uint32_t len = (uint32_t)(output - initoutput); + +// we finish the rest off using a scalar algorithm +// could be improved? +// conditionally stores the last value of laststore as well as all but the +// last value of vecMax, +// we store to "buffer" +int leftoversize = store_unique_xor(laststore, vecMax, buffer); +uint16_t vec7 = _mm_extract_epi16(vecMax, 7); +uint16_t vec6 = _mm_extract_epi16(vecMax, 6); +if (vec7 != vec6) buffer[leftoversize++] = vec7; +if (pos1 == len1) { +memcpy(buffer + leftoversize, array1 + 8 * pos1, +(length1 - 8 * len1) * sizeof(uint16_t)); +leftoversize += length1 - 8 * len1; +if (leftoversize == 0) { // trivial case +memcpy(output, array2 + 8 * pos2, +(length2 - 8 * pos2) * sizeof(uint16_t)); +len += (length2 - 8 * pos2); +} else { +qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare); +leftoversize = unique_xor(buffer, leftoversize); +len += xor_uint16(buffer, leftoversize, array2 + 8 * pos2, +length2 - 8 * pos2, output); +} +} else { +memcpy(buffer + leftoversize, array2 + 8 * pos2, +(length2 - 8 * len2) * sizeof(uint16_t)); +leftoversize += length2 - 8 * len2; +if (leftoversize == 0) { // trivial case +memcpy(output, array1 + 8 * pos1, +(length1 - 8 * pos1) * sizeof(uint16_t)); +len += (length1 - 8 * pos1); +} else { +qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare); +leftoversize = unique_xor(buffer, leftoversize); +len += xor_uint16(buffer, leftoversize, array1 + 8 * pos1, +length1 - 8 * pos1, output); +} +} +return len; +} +CROARING_UNTARGET_AVX2 +/** + * End of SIMD 16-bit XOR code + */ + +#endif // CROARING_IS_X64 + +size_t union_uint32(const uint32_t *set_1, size_t size_1, const uint32_t *set_2, +size_t size_2, uint32_t *buffer) { +size_t pos = 0, idx_1 = 0, idx_2 = 0; + +if (0 == size_2) { +memmove(buffer, set_1, size_1 * sizeof(uint32_t)); +return size_1; +} +if (0 == size_1) { +memmove(buffer, set_2, size_2 * sizeof(uint32_t)); +return size_2; +} + +uint32_t val_1 = set_1[idx_1], val_2 = set_2[idx_2]; + +while (true) { +if (val_1 < val_2) { +buffer[pos++] = val_1; +++idx_1; +if (idx_1 >= size_1) break; +val_1 = set_1[idx_1]; +} else if (val_2 < val_1) { +buffer[pos++] = val_2; +++idx_2; +if (idx_2 >= size_2) break; +val_2 = set_2[idx_2]; +} else { +buffer[pos++] = val_1; +++idx_1; +++idx_2; +if (idx_1 >= size_1 || idx_2 >= size_2) break; +val_1 = set_1[idx_1]; +val_2 = set_2[idx_2]; +} +} + +if (idx_1 < size_1) { +const size_t n_elems = size_1 - idx_1; +memmove(buffer + pos, set_1 + idx_1, n_elems * sizeof(uint32_t)); +pos += n_elems; +} else if (idx_2 < size_2) { +const size_t n_elems = size_2 - idx_2; +memmove(buffer + pos, set_2 + idx_2, n_elems * sizeof(uint32_t)); +pos += n_elems; +} + +return pos; +} + +size_t union_uint32_card(const uint32_t *set_1, size_t size_1, +const uint32_t *set_2, size_t size_2) { +size_t pos = 0, idx_1 = 0, idx_2 = 0; + +if (0 == size_2) { +return size_1; +} +if (0 == size_1) { +return size_2; +} + +uint32_t val_1 = set_1[idx_1], val_2 = set_2[idx_2]; + +while (true) { +if (val_1 < val_2) { +++idx_1; +++pos; +if (idx_1 >= size_1) break; +val_1 = set_1[idx_1]; +} else if (val_2 < val_1) { +++idx_2; +++pos; +if (idx_2 >= size_2) break; +val_2 = set_2[idx_2]; +} else { +++idx_1; +++idx_2; +++pos; +if (idx_1 >= size_1 || idx_2 >= size_2) break; +val_1 = set_1[idx_1]; +val_2 = set_2[idx_2]; +} +} + +if (idx_1 < size_1) { +const size_t n_elems = size_1 - idx_1; +pos += n_elems; +} else if (idx_2 < size_2) { +const size_t n_elems = size_2 - idx_2; +pos += n_elems; +} +return pos; +} + + + +size_t fast_union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2, +size_t size_2, uint16_t *buffer) { +#if CROARING_IS_X64 +if( croaring_hardware_support() & ROARING_SUPPORTS_AVX2 ) { +// compute union with smallest array first +if (size_1 < size_2) { +return union_vector16(set_1, (uint32_t)size_1, +set_2, (uint32_t)size_2, buffer); +} else { +return union_vector16(set_2, (uint32_t)size_2, +set_1, (uint32_t)size_1, buffer); +} +} else { +// compute union with smallest array first +if (size_1 < size_2) { +return union_uint16( +set_1, size_1, set_2, size_2, buffer); +} else { +return union_uint16( +set_2, size_2, set_1, size_1, buffer); +} +} +#else +// compute union with smallest array first +if (size_1 < size_2) { +return union_uint16( +set_1, size_1, set_2, size_2, buffer); +} else { +return union_uint16( +set_2, size_2, set_1, size_1, buffer); +} +#endif +} +#if CROARING_IS_X64 +#if CROARING_COMPILER_SUPPORTS_AVX512 +CROARING_TARGET_AVX512 +static inline bool _avx512_memequals(const void *s1, const void *s2, size_t n) { +const uint8_t *ptr1 = (const uint8_t *)s1; +const uint8_t *ptr2 = (const uint8_t *)s2; +const uint8_t *end1 = ptr1 + n; +const uint8_t *end8 = ptr1 + ((n >> 3) << 3); +const uint8_t *end32 = ptr1 + ((n >> 5) << 5); +const uint8_t *end64 = ptr1 + ((n >> 6) << 6); + +while (ptr1 < end64){ +__m512i r1 = _mm512_loadu_si512((const __m512i*)ptr1); +__m512i r2 = _mm512_loadu_si512((const __m512i*)ptr2); + +uint64_t mask = _mm512_cmpeq_epi8_mask(r1, r2); + +if (mask != UINT64_MAX) { +return false; +} + +ptr1 += 64; +ptr2 += 64; + +} + +while (ptr1 < end32) { +__m256i r1 = _mm256_loadu_si256((const __m256i*)ptr1); +__m256i r2 = _mm256_loadu_si256((const __m256i*)ptr2); +int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(r1, r2)); +if ((uint32_t)mask != UINT32_MAX) { +return false; +} +ptr1 += 32; +ptr2 += 32; +} + +while (ptr1 < end8) { +uint64_t v1, v2; +memcpy(&v1,ptr1,sizeof(uint64_t)); +memcpy(&v2,ptr2,sizeof(uint64_t)); +if (v1 != v2) { +return false; +} +ptr1 += 8; +ptr2 += 8; +} + +while (ptr1 < end1) { +if (*ptr1 != *ptr2) { +return false; +} +ptr1++; +ptr2++; +} + +return true; +} +CROARING_UNTARGET_AVX512 +#endif // CROARING_COMPILER_SUPPORTS_AVX512 + +CROARING_TARGET_AVX2 +static inline bool _avx2_memequals(const void *s1, const void *s2, size_t n) { +const uint8_t *ptr1 = (const uint8_t *)s1; +const uint8_t *ptr2 = (const uint8_t *)s2; +const uint8_t *end1 = ptr1 + n; +const uint8_t *end8 = ptr1 + n/8*8; +const uint8_t *end32 = ptr1 + n/32*32; + +while (ptr1 < end32) { +__m256i r1 = _mm256_loadu_si256((const __m256i*)ptr1); +__m256i r2 = _mm256_loadu_si256((const __m256i*)ptr2); +int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(r1, r2)); +if ((uint32_t)mask != UINT32_MAX) { +return false; +} +ptr1 += 32; +ptr2 += 32; +} + +while (ptr1 < end8) { +uint64_t v1, v2; +memcpy(&v1,ptr1,sizeof(uint64_t)); +memcpy(&v2,ptr2,sizeof(uint64_t)); +if (v1 != v2) { +return false; +} +ptr1 += 8; +ptr2 += 8; +} + +while (ptr1 < end1) { +if (*ptr1 != *ptr2) { +return false; +} +ptr1++; +ptr2++; +} + +return true; +} +CROARING_UNTARGET_AVX2 +#endif + +bool memequals(const void *s1, const void *s2, size_t n) { +if (n == 0) { +return true; +} +#if CROARING_IS_X64 +int support = croaring_hardware_support(); +#if CROARING_COMPILER_SUPPORTS_AVX512 +if( support & ROARING_SUPPORTS_AVX512 ) { +return _avx512_memequals(s1, s2, n); +} else +#endif // CROARING_COMPILER_SUPPORTS_AVX512 +if( support & ROARING_SUPPORTS_AVX2 ) { +return _avx2_memequals(s1, s2, n); +} else { +return memcmp(s1, s2, n) == 0; +} +#else +return memcmp(s1, s2, n) == 0; +#endif +} + + +#if CROARING_IS_X64 +#if CROARING_COMPILER_SUPPORTS_AVX512 +CROARING_TARGET_AVX512 +ALLOW_UNALIGNED +int avx512_array_container_to_uint32_array(void *vout, const uint16_t* array, size_t cardinality, +uint32_t base) { +int outpos = 0; +uint32_t *out = (uint32_t *)vout; +size_t i = 0; +for ( ;i + sizeof(__m256i)/sizeof(uint16_t) <= cardinality; i += sizeof(__m256i)/sizeof(uint16_t)) { +__m256i vinput = _mm256_loadu_si256((const __m256i*) (array + i)); +__m512i voutput = _mm512_add_epi32(_mm512_cvtepu16_epi32(vinput), _mm512_set1_epi32(base)); +_mm512_storeu_si512((__m512i*)(out + outpos), voutput); +outpos += sizeof(__m512i)/sizeof(uint32_t); +} +for ( ; i < cardinality; ++i) { +const uint32_t val = base + array[i]; +memcpy(out + outpos, &val, +sizeof(uint32_t)); // should be compiled as a MOV on x64 +outpos++; +} +return outpos; +} +CROARING_UNTARGET_AVX512 +#endif // #if CROARING_COMPILER_SUPPORTS_AVX512 +#endif // #if CROARING_IS_X64 + + +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace internal { +#endif +/* end file src/array_util.c */ +/* begin file src/bitset.c */ +#include +#include +#include +#include +#include + + +#ifdef __cplusplus +extern "C" { namespace roaring { namespace internal { +#endif + +extern inline void bitset_print(const bitset_t *b); +extern inline bool bitset_for_each(const bitset_t *b, bitset_iterator iterator, +void *ptr); +extern inline size_t bitset_next_set_bits(const bitset_t *bitset, size_t *buffer, +size_t capacity, size_t *startfrom); +extern inline void bitset_set_to_value(bitset_t *bitset, size_t i, bool flag); +extern inline bool bitset_next_set_bit(const bitset_t *bitset, size_t *i); +extern inline void bitset_set(bitset_t *bitset, size_t i); +extern inline bool bitset_get(const bitset_t *bitset, size_t i); +extern inline size_t bitset_size_in_words(const bitset_t *bitset); +extern inline size_t bitset_size_in_bits(const bitset_t *bitset); +extern inline size_t bitset_size_in_bytes(const bitset_t *bitset); + + +/* Create a new bitset. Return NULL in case of failure. */ +bitset_t *bitset_create(void) { +bitset_t *bitset = NULL; +/* Allocate the bitset itself. */ +if ((bitset = (bitset_t *)roaring_malloc(sizeof(bitset_t))) == NULL) { +return NULL; +} +bitset->array = NULL; +bitset->arraysize = 0; +bitset->capacity = 0; +return bitset; +} + +/* Create a new bitset able to contain size bits. Return NULL in case of + * failure. */ +bitset_t *bitset_create_with_capacity(size_t size) { +bitset_t *bitset = NULL; +/* Allocate the bitset itself. */ +if ((bitset = (bitset_t *)roaring_malloc(sizeof(bitset_t))) == NULL) { +return NULL; +} +bitset->arraysize = +(size + sizeof(uint64_t) * 8 - 1) / (sizeof(uint64_t) * 8); +bitset->capacity = bitset->arraysize; +if ((bitset->array = +(uint64_t *)roaring_calloc(bitset->arraysize, sizeof(uint64_t))) == NULL) { +roaring_free(bitset); +return NULL; +} +return bitset; +} + +/* Create a copy */ +bitset_t *bitset_copy(const bitset_t *bitset) { +bitset_t *copy = NULL; +/* Allocate the bitset itself. */ +if ((copy = (bitset_t *)roaring_malloc(sizeof(bitset_t))) == NULL) { +return NULL; +} +memcpy(copy, bitset, sizeof(bitset_t)); +copy->capacity = copy->arraysize; +if ((copy->array = (uint64_t *)roaring_malloc(sizeof(uint64_t) * +bitset->arraysize)) == NULL) { +roaring_free(copy); +return NULL; +} +memcpy(copy->array, bitset->array, sizeof(uint64_t) * bitset->arraysize); +return copy; +} + +void bitset_clear(bitset_t *bitset) { +memset(bitset->array, 0, sizeof(uint64_t) * bitset->arraysize); +} + +void bitset_fill(bitset_t *bitset) { +memset(bitset->array, 0xff, sizeof(uint64_t) * bitset->arraysize); +} + +void bitset_shift_left(bitset_t *bitset, size_t s) { +size_t extra_words = s / 64; +int inword_shift = s % 64; +size_t as = bitset->arraysize; +if (inword_shift == 0) { +bitset_resize(bitset, as + extra_words, false); +// could be done with a memmove +for (size_t i = as + extra_words; i > extra_words; i--) { +bitset->array[i - 1] = bitset->array[i - 1 - extra_words]; +} +} else { +bitset_resize(bitset, as + extra_words + 1, true); +bitset->array[as + extra_words] = +bitset->array[as - 1] >> (64 - inword_shift); +for (size_t i = as + extra_words; i >= extra_words + 2; i--) { +bitset->array[i - 1] = +(bitset->array[i - 1 - extra_words] << inword_shift) | +(bitset->array[i - 2 - extra_words] >> (64 - inword_shift)); +} +bitset->array[extra_words] = bitset->array[0] << inword_shift; +} +for (size_t i = 0; i < extra_words; i++) { +bitset->array[i] = 0; +} +} + +void bitset_shift_right(bitset_t *bitset, size_t s) { +size_t extra_words = s / 64; +int inword_shift = s % 64; +size_t as = bitset->arraysize; +if (inword_shift == 0) { +// could be done with a memmove +for (size_t i = 0; i < as - extra_words; i++) { +bitset->array[i] = bitset->array[i + extra_words]; +} +bitset_resize(bitset, as - extra_words, false); + +} else { +for (size_t i = 0; i + extra_words + 1 < as; i++) { +bitset->array[i] = +(bitset->array[i + extra_words] >> inword_shift) | +(bitset->array[i + extra_words + 1] << (64 - inword_shift)); +} +bitset->array[as - extra_words - 1] = +(bitset->array[as - 1] >> inword_shift); +bitset_resize(bitset, as - extra_words, false); +} +} + +/* Free memory. */ +void bitset_free(bitset_t *bitset) { +if(bitset == NULL) { return; } +roaring_free(bitset->array); +roaring_free(bitset); +} + +/* Resize the bitset so that it can support newarraysize * 64 bits. Return true + * in case of success, false for failure. */ +bool bitset_resize(bitset_t *bitset, size_t newarraysize, bool padwithzeroes) { +if(newarraysize > SIZE_MAX/64) { return false; } +size_t smallest = +newarraysize < bitset->arraysize ? newarraysize : bitset->arraysize; +if (bitset->capacity < newarraysize) { +uint64_t *newarray; +size_t newcapacity = bitset->capacity; +if(newcapacity == 0) { newcapacity = 1; } +while(newcapacity < newarraysize) { newcapacity *= 2; } +if ((newarray = (uint64_t *) roaring_realloc(bitset->array, sizeof(uint64_t) * newcapacity)) == NULL) { +return false; +} +bitset->capacity = newcapacity; +bitset->array = newarray; +} +if (padwithzeroes && (newarraysize > smallest)) +memset(bitset->array + smallest, 0, +sizeof(uint64_t) * (newarraysize - smallest)); +bitset->arraysize = newarraysize; +return true; // success! +} + +size_t bitset_count(const bitset_t *bitset) { +size_t card = 0; +size_t k = 0; +for (; k + 7 < bitset->arraysize; k += 8) { +card += roaring_hamming(bitset->array[k]); +card += roaring_hamming(bitset->array[k + 1]); +card += roaring_hamming(bitset->array[k + 2]); +card += roaring_hamming(bitset->array[k + 3]); +card += roaring_hamming(bitset->array[k + 4]); +card += roaring_hamming(bitset->array[k + 5]); +card += roaring_hamming(bitset->array[k + 6]); +card += roaring_hamming(bitset->array[k + 7]); +} +for (; k + 3 < bitset->arraysize; k += 4) { +card += roaring_hamming(bitset->array[k]); +card += roaring_hamming(bitset->array[k + 1]); +card += roaring_hamming(bitset->array[k + 2]); +card += roaring_hamming(bitset->array[k + 3]); +} +for (; k < bitset->arraysize; k++) { +card += roaring_hamming(bitset->array[k]); +} +return card; +} + +bool bitset_inplace_union(bitset_t *CBITSET_RESTRICT b1, +const bitset_t *CBITSET_RESTRICT b2) { +size_t minlength = +b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; +for (size_t k = 0; k < minlength; ++k) { +b1->array[k] |= b2->array[k]; +} +if (b2->arraysize > b1->arraysize) { +size_t oldsize = b1->arraysize; +if (!bitset_resize(b1, b2->arraysize, false)) return false; +memcpy(b1->array + oldsize, b2->array + oldsize, +(b2->arraysize - oldsize) * sizeof(uint64_t)); +} +return true; +} + +size_t bitset_minimum(const bitset_t *bitset) { +for (size_t k = 0; k < bitset->arraysize; k++) { +uint64_t w = bitset->array[k]; +if (w != 0) { +return roaring_trailing_zeroes(w) + k * 64; +} +} +return 0; +} + +bool bitset_grow(bitset_t *bitset, size_t newarraysize) { +if(newarraysize < bitset->arraysize) { return false; } +if(newarraysize > SIZE_MAX/64) { return false; } +if (bitset->capacity < newarraysize) { +uint64_t *newarray; +size_t newcapacity = (UINT64_C(0xFFFFFFFFFFFFFFFF) >> roaring_leading_zeroes(newarraysize)) + 1; +while(newcapacity < newarraysize) { newcapacity *= 2; } +if ((newarray = (uint64_t *) roaring_realloc(bitset->array, sizeof(uint64_t) * newcapacity)) == NULL) { +return false; +} +bitset->capacity = newcapacity; +bitset->array = newarray; +} +memset(bitset->array + bitset->arraysize, 0, +sizeof(uint64_t) * (newarraysize - bitset->arraysize)); +bitset->arraysize = newarraysize; +return true; // success! +} + +size_t bitset_maximum(const bitset_t *bitset) { +for (size_t k = bitset->arraysize; k > 0; k--) { +uint64_t w = bitset->array[k - 1]; +if (w != 0) { +return 63 - roaring_leading_zeroes(w) + (k - 1) * 64; +} +} +return 0; +} + +/* Returns true if bitsets share no common elements, false otherwise. + * + * Performs early-out if common element found. */ +bool bitsets_disjoint(const bitset_t *CBITSET_RESTRICT b1, const bitset_t *CBITSET_RESTRICT b2) { +size_t minlength = +b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; + +for (size_t k = 0; k < minlength; k++) { +if ((b1->array[k] & b2->array[k]) != 0) return false; +} +return true; +} + +/* Returns true if bitsets contain at least 1 common element, false if they are + * disjoint. + * + * Performs early-out if common element found. */ +bool bitsets_intersect(const bitset_t *CBITSET_RESTRICT b1, const bitset_t *CBITSET_RESTRICT b2) { +size_t minlength = +b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; + +for (size_t k = 0; k < minlength; k++) { +if ((b1->array[k] & b2->array[k]) != 0) return true; +} +return false; +} + +/* Returns true if b has any bits set in or after b->array[starting_loc]. */ +static bool any_bits_set(const bitset_t *b, size_t starting_loc) { +if (starting_loc >= b->arraysize) { +return false; +} +for (size_t k = starting_loc; k < b->arraysize; k++) { +if (b->array[k] != 0) return true; +} +return false; +} + +/* Returns true if b1 has all of b2's bits set. + * + * Performs early out if a bit is found in b2 that is not found in b1. */ +bool bitset_contains_all(const bitset_t *CBITSET_RESTRICT b1, const bitset_t *CBITSET_RESTRICT b2) { +size_t min_size = b1->arraysize; +if(b1->arraysize > b2->arraysize) { +min_size = b2->arraysize; +} +for (size_t k = 0; k < min_size; k++) { +if ((b1->array[k] & b2->array[k]) != b2->array[k]) { +return false; +} +} +if (b2->arraysize > b1->arraysize) { +/* Need to check if b2 has any bits set beyond b1's array */ +return !any_bits_set(b2, b1->arraysize); +} +return true; +} + +size_t bitset_union_count(const bitset_t *CBITSET_RESTRICT b1, +const bitset_t *CBITSET_RESTRICT b2) { +size_t answer = 0; +size_t minlength = +b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; +size_t k = 0; +for (; k + 3 < minlength; k += 4) { +answer += roaring_hamming(b1->array[k] | b2->array[k]); +answer += roaring_hamming(b1->array[k + 1] | b2->array[k + 1]); +answer += roaring_hamming(b1->array[k + 2] | b2->array[k + 2]); +answer += roaring_hamming(b1->array[k + 3] | b2->array[k + 3]); +} +for (; k < minlength; ++k) { +answer += roaring_hamming(b1->array[k] | b2->array[k]); +} +if (b2->arraysize > b1->arraysize) { +// k is equal to b1->arraysize +for (; k + 3 < b2->arraysize; k += 4) { +answer += roaring_hamming(b2->array[k]); +answer += roaring_hamming(b2->array[k + 1]); +answer += roaring_hamming(b2->array[k + 2]); +answer += roaring_hamming(b2->array[k + 3]); +} +for (; k < b2->arraysize; ++k) { +answer += roaring_hamming(b2->array[k]); +} +} else { +// k is equal to b2->arraysize +for (; k + 3 < b1->arraysize; k += 4) { +answer += roaring_hamming(b1->array[k]); +answer += roaring_hamming(b1->array[k + 1]); +answer += roaring_hamming(b1->array[k + 2]); +answer += roaring_hamming(b1->array[k + 3]); +} +for (; k < b1->arraysize; ++k) { +answer += roaring_hamming(b1->array[k]); +} +} +return answer; +} + +void bitset_inplace_intersection(bitset_t *CBITSET_RESTRICT b1, +const bitset_t *CBITSET_RESTRICT b2) { +size_t minlength = +b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; +size_t k = 0; +for (; k < minlength; ++k) { +b1->array[k] &= b2->array[k]; +} +for (; k < b1->arraysize; ++k) { +b1->array[k] = 0; // memset could, maybe, be a tiny bit faster +} +} + +size_t bitset_intersection_count(const bitset_t *CBITSET_RESTRICT b1, +const bitset_t *CBITSET_RESTRICT b2) { +size_t answer = 0; +size_t minlength = +b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; +for (size_t k = 0; k < minlength; ++k) { +answer += roaring_hamming(b1->array[k] & b2->array[k]); +} +return answer; +} + +void bitset_inplace_difference(bitset_t *CBITSET_RESTRICT b1, +const bitset_t *CBITSET_RESTRICT b2) { +size_t minlength = +b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; +size_t k = 0; +for (; k < minlength; ++k) { +b1->array[k] &= ~(b2->array[k]); +} +} + +size_t bitset_difference_count(const bitset_t *CBITSET_RESTRICT b1, +const bitset_t *CBITSET_RESTRICT b2) { +size_t minlength = +b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; +size_t k = 0; +size_t answer = 0; +for (; k < minlength; ++k) { +answer += roaring_hamming(b1->array[k] & ~(b2->array[k])); +} +for (; k < b1->arraysize; ++k) { +answer += roaring_hamming(b1->array[k]); +} +return answer; +} + +bool bitset_inplace_symmetric_difference(bitset_t *CBITSET_RESTRICT b1, +const bitset_t *CBITSET_RESTRICT b2) { +size_t minlength = +b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; +size_t k = 0; +for (; k < minlength; ++k) { +b1->array[k] ^= b2->array[k]; +} +if (b2->arraysize > b1->arraysize) { +size_t oldsize = b1->arraysize; +if (!bitset_resize(b1, b2->arraysize, false)) return false; +memcpy(b1->array + oldsize, b2->array + oldsize, +(b2->arraysize - oldsize) * sizeof(uint64_t)); +} +return true; +} + +size_t bitset_symmetric_difference_count(const bitset_t *CBITSET_RESTRICT b1, +const bitset_t *CBITSET_RESTRICT b2) { +size_t minlength = +b1->arraysize < b2->arraysize ? b1->arraysize : b2->arraysize; +size_t k = 0; +size_t answer = 0; +for (; k < minlength; ++k) { +answer += roaring_hamming(b1->array[k] ^ b2->array[k]); +} +if (b2->arraysize > b1->arraysize) { +for (; k < b2->arraysize; ++k) { +answer += roaring_hamming(b2->array[k]); +} +} else { +for (; k < b1->arraysize; ++k) { +answer += roaring_hamming(b1->array[k]); +} +} +return answer; +} + +bool bitset_trim(bitset_t *bitset) { +size_t newsize = bitset->arraysize; +while (newsize > 0) { +if (bitset->array[newsize - 1] == 0) +newsize -= 1; +else +break; +} +if (bitset->capacity == newsize) return true; // nothing to do +uint64_t *newarray; +if ((newarray = (uint64_t *)roaring_realloc( +bitset->array, sizeof(uint64_t) * newsize)) == NULL) { +return false; +} +bitset->array = newarray; +bitset->capacity = newsize; +bitset->arraysize = newsize; +return true; +} + + +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace internal { +#endif +/* end file src/bitset.c */ +/* begin file src/bitset_util.c */ +#include +#include +#include +#include +#include + + +#if CROARING_IS_X64 +#ifndef CROARING_COMPILER_SUPPORTS_AVX512 +#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined." +#endif // CROARING_COMPILER_SUPPORTS_AVX512 +#endif + +#ifdef __cplusplus +using namespace ::roaring::internal; +extern "C" { namespace roaring { namespace api { +#endif + +#if CROARING_IS_X64 +static uint8_t lengthTable[256] = { +0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, +2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, +2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, +2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, +2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, +4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, +2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, +3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, +2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, +4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, +4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; +#endif + +#if CROARING_IS_X64 +ALIGNED(32) +static uint32_t vecDecodeTable[256][8] = { +{0, 0, 0, 0, 0, 0, 0, 0}, /* 0x00 (00000000) */ +{1, 0, 0, 0, 0, 0, 0, 0}, /* 0x01 (00000001) */ +{2, 0, 0, 0, 0, 0, 0, 0}, /* 0x02 (00000010) */ +{1, 2, 0, 0, 0, 0, 0, 0}, /* 0x03 (00000011) */ +{3, 0, 0, 0, 0, 0, 0, 0}, /* 0x04 (00000100) */ +{1, 3, 0, 0, 0, 0, 0, 0}, /* 0x05 (00000101) */ +{2, 3, 0, 0, 0, 0, 0, 0}, /* 0x06 (00000110) */ +{1, 2, 3, 0, 0, 0, 0, 0}, /* 0x07 (00000111) */ +{4, 0, 0, 0, 0, 0, 0, 0}, /* 0x08 (00001000) */ +{1, 4, 0, 0, 0, 0, 0, 0}, /* 0x09 (00001001) */ +{2, 4, 0, 0, 0, 0, 0, 0}, /* 0x0A (00001010) */ +{1, 2, 4, 0, 0, 0, 0, 0}, /* 0x0B (00001011) */ +{3, 4, 0, 0, 0, 0, 0, 0}, /* 0x0C (00001100) */ +{1, 3, 4, 0, 0, 0, 0, 0}, /* 0x0D (00001101) */ +{2, 3, 4, 0, 0, 0, 0, 0}, /* 0x0E (00001110) */ +{1, 2, 3, 4, 0, 0, 0, 0}, /* 0x0F (00001111) */ +{5, 0, 0, 0, 0, 0, 0, 0}, /* 0x10 (00010000) */ +{1, 5, 0, 0, 0, 0, 0, 0}, /* 0x11 (00010001) */ +{2, 5, 0, 0, 0, 0, 0, 0}, /* 0x12 (00010010) */ +{1, 2, 5, 0, 0, 0, 0, 0}, /* 0x13 (00010011) */ +{3, 5, 0, 0, 0, 0, 0, 0}, /* 0x14 (00010100) */ +{1, 3, 5, 0, 0, 0, 0, 0}, /* 0x15 (00010101) */ +{2, 3, 5, 0, 0, 0, 0, 0}, /* 0x16 (00010110) */ +{1, 2, 3, 5, 0, 0, 0, 0}, /* 0x17 (00010111) */ +{4, 5, 0, 0, 0, 0, 0, 0}, /* 0x18 (00011000) */ +{1, 4, 5, 0, 0, 0, 0, 0}, /* 0x19 (00011001) */ +{2, 4, 5, 0, 0, 0, 0, 0}, /* 0x1A (00011010) */ +{1, 2, 4, 5, 0, 0, 0, 0}, /* 0x1B (00011011) */ +{3, 4, 5, 0, 0, 0, 0, 0}, /* 0x1C (00011100) */ +{1, 3, 4, 5, 0, 0, 0, 0}, /* 0x1D (00011101) */ +{2, 3, 4, 5, 0, 0, 0, 0}, /* 0x1E (00011110) */ +{1, 2, 3, 4, 5, 0, 0, 0}, /* 0x1F (00011111) */ +{6, 0, 0, 0, 0, 0, 0, 0}, /* 0x20 (00100000) */ +{1, 6, 0, 0, 0, 0, 0, 0}, /* 0x21 (00100001) */ +{2, 6, 0, 0, 0, 0, 0, 0}, /* 0x22 (00100010) */ +{1, 2, 6, 0, 0, 0, 0, 0}, /* 0x23 (00100011) */ +{3, 6, 0, 0, 0, 0, 0, 0}, /* 0x24 (00100100) */ +{1, 3, 6, 0, 0, 0, 0, 0}, /* 0x25 (00100101) */ +{2, 3, 6, 0, 0, 0, 0, 0}, /* 0x26 (00100110) */ +{1, 2, 3, 6, 0, 0, 0, 0}, /* 0x27 (00100111) */ +{4, 6, 0, 0, 0, 0, 0, 0}, /* 0x28 (00101000) */ +{1, 4, 6, 0, 0, 0, 0, 0}, /* 0x29 (00101001) */ +{2, 4, 6, 0, 0, 0, 0, 0}, /* 0x2A (00101010) */ +{1, 2, 4, 6, 0, 0, 0, 0}, /* 0x2B (00101011) */ +{3, 4, 6, 0, 0, 0, 0, 0}, /* 0x2C (00101100) */ +{1, 3, 4, 6, 0, 0, 0, 0}, /* 0x2D (00101101) */ +{2, 3, 4, 6, 0, 0, 0, 0}, /* 0x2E (00101110) */ +{1, 2, 3, 4, 6, 0, 0, 0}, /* 0x2F (00101111) */ +{5, 6, 0, 0, 0, 0, 0, 0}, /* 0x30 (00110000) */ +{1, 5, 6, 0, 0, 0, 0, 0}, /* 0x31 (00110001) */ +{2, 5, 6, 0, 0, 0, 0, 0}, /* 0x32 (00110010) */ +{1, 2, 5, 6, 0, 0, 0, 0}, /* 0x33 (00110011) */ +{3, 5, 6, 0, 0, 0, 0, 0}, /* 0x34 (00110100) */ +{1, 3, 5, 6, 0, 0, 0, 0}, /* 0x35 (00110101) */ +{2, 3, 5, 6, 0, 0, 0, 0}, /* 0x36 (00110110) */ +{1, 2, 3, 5, 6, 0, 0, 0}, /* 0x37 (00110111) */ +{4, 5, 6, 0, 0, 0, 0, 0}, /* 0x38 (00111000) */ +{1, 4, 5, 6, 0, 0, 0, 0}, /* 0x39 (00111001) */ +{2, 4, 5, 6, 0, 0, 0, 0}, /* 0x3A (00111010) */ +{1, 2, 4, 5, 6, 0, 0, 0}, /* 0x3B (00111011) */ +{3, 4, 5, 6, 0, 0, 0, 0}, /* 0x3C (00111100) */ +{1, 3, 4, 5, 6, 0, 0, 0}, /* 0x3D (00111101) */ +{2, 3, 4, 5, 6, 0, 0, 0}, /* 0x3E (00111110) */ +{1, 2, 3, 4, 5, 6, 0, 0}, /* 0x3F (00111111) */ +{7, 0, 0, 0, 0, 0, 0, 0}, /* 0x40 (01000000) */ +{1, 7, 0, 0, 0, 0, 0, 0}, /* 0x41 (01000001) */ +{2, 7, 0, 0, 0, 0, 0, 0}, /* 0x42 (01000010) */ +{1, 2, 7, 0, 0, 0, 0, 0}, /* 0x43 (01000011) */ +{3, 7, 0, 0, 0, 0, 0, 0}, /* 0x44 (01000100) */ +{1, 3, 7, 0, 0, 0, 0, 0}, /* 0x45 (01000101) */ +{2, 3, 7, 0, 0, 0, 0, 0}, /* 0x46 (01000110) */ +{1, 2, 3, 7, 0, 0, 0, 0}, /* 0x47 (01000111) */ +{4, 7, 0, 0, 0, 0, 0, 0}, /* 0x48 (01001000) */ +{1, 4, 7, 0, 0, 0, 0, 0}, /* 0x49 (01001001) */ +{2, 4, 7, 0, 0, 0, 0, 0}, /* 0x4A (01001010) */ +{1, 2, 4, 7, 0, 0, 0, 0}, /* 0x4B (01001011) */ +{3, 4, 7, 0, 0, 0, 0, 0}, /* 0x4C (01001100) */ +{1, 3, 4, 7, 0, 0, 0, 0}, /* 0x4D (01001101) */ +{2, 3, 4, 7, 0, 0, 0, 0}, /* 0x4E (01001110) */ +{1, 2, 3, 4, 7, 0, 0, 0}, /* 0x4F (01001111) */ +{5, 7, 0, 0, 0, 0, 0, 0}, /* 0x50 (01010000) */ +{1, 5, 7, 0, 0, 0, 0, 0}, /* 0x51 (01010001) */ +{2, 5, 7, 0, 0, 0, 0, 0}, /* 0x52 (01010010) */ +{1, 2, 5, 7, 0, 0, 0, 0}, /* 0x53 (01010011) */ +{3, 5, 7, 0, 0, 0, 0, 0}, /* 0x54 (01010100) */ +{1, 3, 5, 7, 0, 0, 0, 0}, /* 0x55 (01010101) */ +{2, 3, 5, 7, 0, 0, 0, 0}, /* 0x56 (01010110) */ +{1, 2, 3, 5, 7, 0, 0, 0}, /* 0x57 (01010111) */ +{4, 5, 7, 0, 0, 0, 0, 0}, /* 0x58 (01011000) */ +{1, 4, 5, 7, 0, 0, 0, 0}, /* 0x59 (01011001) */ +{2, 4, 5, 7, 0, 0, 0, 0}, /* 0x5A (01011010) */ +{1, 2, 4, 5, 7, 0, 0, 0}, /* 0x5B (01011011) */ +{3, 4, 5, 7, 0, 0, 0, 0}, /* 0x5C (01011100) */ +{1, 3, 4, 5, 7, 0, 0, 0}, /* 0x5D (01011101) */ +{2, 3, 4, 5, 7, 0, 0, 0}, /* 0x5E (01011110) */ +{1, 2, 3, 4, 5, 7, 0, 0}, /* 0x5F (01011111) */ +{6, 7, 0, 0, 0, 0, 0, 0}, /* 0x60 (01100000) */ +{1, 6, 7, 0, 0, 0, 0, 0}, /* 0x61 (01100001) */ +{2, 6, 7, 0, 0, 0, 0, 0}, /* 0x62 (01100010) */ +{1, 2, 6, 7, 0, 0, 0, 0}, /* 0x63 (01100011) */ +{3, 6, 7, 0, 0, 0, 0, 0}, /* 0x64 (01100100) */ +{1, 3, 6, 7, 0, 0, 0, 0}, /* 0x65 (01100101) */ +{2, 3, 6, 7, 0, 0, 0, 0}, /* 0x66 (01100110) */ +{1, 2, 3, 6, 7, 0, 0, 0}, /* 0x67 (01100111) */ +{4, 6, 7, 0, 0, 0, 0, 0}, /* 0x68 (01101000) */ +{1, 4, 6, 7, 0, 0, 0, 0}, /* 0x69 (01101001) */ +{2, 4, 6, 7, 0, 0, 0, 0}, /* 0x6A (01101010) */ +{1, 2, 4, 6, 7, 0, 0, 0}, /* 0x6B (01101011) */ +{3, 4, 6, 7, 0, 0, 0, 0}, /* 0x6C (01101100) */ +{1, 3, 4, 6, 7, 0, 0, 0}, /* 0x6D (01101101) */ +{2, 3, 4, 6, 7, 0, 0, 0}, /* 0x6E (01101110) */ +{1, 2, 3, 4, 6, 7, 0, 0}, /* 0x6F (01101111) */ +{5, 6, 7, 0, 0, 0, 0, 0}, /* 0x70 (01110000) */ +{1, 5, 6, 7, 0, 0, 0, 0}, /* 0x71 (01110001) */ +{2, 5, 6, 7, 0, 0, 0, 0}, /* 0x72 (01110010) */ +{1, 2, 5, 6, 7, 0, 0, 0}, /* 0x73 (01110011) */ +{3, 5, 6, 7, 0, 0, 0, 0}, /* 0x74 (01110100) */ +{1, 3, 5, 6, 7, 0, 0, 0}, /* 0x75 (01110101) */ +{2, 3, 5, 6, 7, 0, 0, 0}, /* 0x76 (01110110) */ +{1, 2, 3, 5, 6, 7, 0, 0}, /* 0x77 (01110111) */ +{4, 5, 6, 7, 0, 0, 0, 0}, /* 0x78 (01111000) */ +{1, 4, 5, 6, 7, 0, 0, 0}, /* 0x79 (01111001) */ +{2, 4, 5, 6, 7, 0, 0, 0}, /* 0x7A (01111010) */ +{1, 2, 4, 5, 6, 7, 0, 0}, /* 0x7B (01111011) */ +{3, 4, 5, 6, 7, 0, 0, 0}, /* 0x7C (01111100) */ +{1, 3, 4, 5, 6, 7, 0, 0}, /* 0x7D (01111101) */ +{2, 3, 4, 5, 6, 7, 0, 0}, /* 0x7E (01111110) */ +{1, 2, 3, 4, 5, 6, 7, 0}, /* 0x7F (01111111) */ +{8, 0, 0, 0, 0, 0, 0, 0}, /* 0x80 (10000000) */ +{1, 8, 0, 0, 0, 0, 0, 0}, /* 0x81 (10000001) */ +{2, 8, 0, 0, 0, 0, 0, 0}, /* 0x82 (10000010) */ +{1, 2, 8, 0, 0, 0, 0, 0}, /* 0x83 (10000011) */ +{3, 8, 0, 0, 0, 0, 0, 0}, /* 0x84 (10000100) */ +{1, 3, 8, 0, 0, 0, 0, 0}, /* 0x85 (10000101) */ +{2, 3, 8, 0, 0, 0, 0, 0}, /* 0x86 (10000110) */ +{1, 2, 3, 8, 0, 0, 0, 0}, /* 0x87 (10000111) */ +{4, 8, 0, 0, 0, 0, 0, 0}, /* 0x88 (10001000) */ +{1, 4, 8, 0, 0, 0, 0, 0}, /* 0x89 (10001001) */ +{2, 4, 8, 0, 0, 0, 0, 0}, /* 0x8A (10001010) */ +{1, 2, 4, 8, 0, 0, 0, 0}, /* 0x8B (10001011) */ +{3, 4, 8, 0, 0, 0, 0, 0}, /* 0x8C (10001100) */ +{1, 3, 4, 8, 0, 0, 0, 0}, /* 0x8D (10001101) */ +{2, 3, 4, 8, 0, 0, 0, 0}, /* 0x8E (10001110) */ +{1, 2, 3, 4, 8, 0, 0, 0}, /* 0x8F (10001111) */ +{5, 8, 0, 0, 0, 0, 0, 0}, /* 0x90 (10010000) */ +{1, 5, 8, 0, 0, 0, 0, 0}, /* 0x91 (10010001) */ +{2, 5, 8, 0, 0, 0, 0, 0}, /* 0x92 (10010010) */ +{1, 2, 5, 8, 0, 0, 0, 0}, /* 0x93 (10010011) */ +{3, 5, 8, 0, 0, 0, 0, 0}, /* 0x94 (10010100) */ +{1, 3, 5, 8, 0, 0, 0, 0}, /* 0x95 (10010101) */ +{2, 3, 5, 8, 0, 0, 0, 0}, /* 0x96 (10010110) */ +{1, 2, 3, 5, 8, 0, 0, 0}, /* 0x97 (10010111) */ +{4, 5, 8, 0, 0, 0, 0, 0}, /* 0x98 (10011000) */ +{1, 4, 5, 8, 0, 0, 0, 0}, /* 0x99 (10011001) */ +{2, 4, 5, 8, 0, 0, 0, 0}, /* 0x9A (10011010) */ +{1, 2, 4, 5, 8, 0, 0, 0}, /* 0x9B (10011011) */ +{3, 4, 5, 8, 0, 0, 0, 0}, /* 0x9C (10011100) */ +{1, 3, 4, 5, 8, 0, 0, 0}, /* 0x9D (10011101) */ +{2, 3, 4, 5, 8, 0, 0, 0}, /* 0x9E (10011110) */ +{1, 2, 3, 4, 5, 8, 0, 0}, /* 0x9F (10011111) */ +{6, 8, 0, 0, 0, 0, 0, 0}, /* 0xA0 (10100000) */ +{1, 6, 8, 0, 0, 0, 0, 0}, /* 0xA1 (10100001) */ +{2, 6, 8, 0, 0, 0, 0, 0}, /* 0xA2 (10100010) */ +{1, 2, 6, 8, 0, 0, 0, 0}, /* 0xA3 (10100011) */ +{3, 6, 8, 0, 0, 0, 0, 0}, /* 0xA4 (10100100) */ +{1, 3, 6, 8, 0, 0, 0, 0}, /* 0xA5 (10100101) */ +{2, 3, 6, 8, 0, 0, 0, 0}, /* 0xA6 (10100110) */ +{1, 2, 3, 6, 8, 0, 0, 0}, /* 0xA7 (10100111) */ +{4, 6, 8, 0, 0, 0, 0, 0}, /* 0xA8 (10101000) */ +{1, 4, 6, 8, 0, 0, 0, 0}, /* 0xA9 (10101001) */ +{2, 4, 6, 8, 0, 0, 0, 0}, /* 0xAA (10101010) */ +{1, 2, 4, 6, 8, 0, 0, 0}, /* 0xAB (10101011) */ +{3, 4, 6, 8, 0, 0, 0, 0}, /* 0xAC (10101100) */ +{1, 3, 4, 6, 8, 0, 0, 0}, /* 0xAD (10101101) */ +{2, 3, 4, 6, 8, 0, 0, 0}, /* 0xAE (10101110) */ +{1, 2, 3, 4, 6, 8, 0, 0}, /* 0xAF (10101111) */ +{5, 6, 8, 0, 0, 0, 0, 0}, /* 0xB0 (10110000) */ +{1, 5, 6, 8, 0, 0, 0, 0}, /* 0xB1 (10110001) */ +{2, 5, 6, 8, 0, 0, 0, 0}, /* 0xB2 (10110010) */ +{1, 2, 5, 6, 8, 0, 0, 0}, /* 0xB3 (10110011) */ +{3, 5, 6, 8, 0, 0, 0, 0}, /* 0xB4 (10110100) */ +{1, 3, 5, 6, 8, 0, 0, 0}, /* 0xB5 (10110101) */ +{2, 3, 5, 6, 8, 0, 0, 0}, /* 0xB6 (10110110) */ +{1, 2, 3, 5, 6, 8, 0, 0}, /* 0xB7 (10110111) */ +{4, 5, 6, 8, 0, 0, 0, 0}, /* 0xB8 (10111000) */ +{1, 4, 5, 6, 8, 0, 0, 0}, /* 0xB9 (10111001) */ +{2, 4, 5, 6, 8, 0, 0, 0}, /* 0xBA (10111010) */ +{1, 2, 4, 5, 6, 8, 0, 0}, /* 0xBB (10111011) */ +{3, 4, 5, 6, 8, 0, 0, 0}, /* 0xBC (10111100) */ +{1, 3, 4, 5, 6, 8, 0, 0}, /* 0xBD (10111101) */ +{2, 3, 4, 5, 6, 8, 0, 0}, /* 0xBE (10111110) */ +{1, 2, 3, 4, 5, 6, 8, 0}, /* 0xBF (10111111) */ +{7, 8, 0, 0, 0, 0, 0, 0}, /* 0xC0 (11000000) */ +{1, 7, 8, 0, 0, 0, 0, 0}, /* 0xC1 (11000001) */ +{2, 7, 8, 0, 0, 0, 0, 0}, /* 0xC2 (11000010) */ +{1, 2, 7, 8, 0, 0, 0, 0}, /* 0xC3 (11000011) */ +{3, 7, 8, 0, 0, 0, 0, 0}, /* 0xC4 (11000100) */ +{1, 3, 7, 8, 0, 0, 0, 0}, /* 0xC5 (11000101) */ +{2, 3, 7, 8, 0, 0, 0, 0}, /* 0xC6 (11000110) */ +{1, 2, 3, 7, 8, 0, 0, 0}, /* 0xC7 (11000111) */ +{4, 7, 8, 0, 0, 0, 0, 0}, /* 0xC8 (11001000) */ +{1, 4, 7, 8, 0, 0, 0, 0}, /* 0xC9 (11001001) */ +{2, 4, 7, 8, 0, 0, 0, 0}, /* 0xCA (11001010) */ +{1, 2, 4, 7, 8, 0, 0, 0}, /* 0xCB (11001011) */ +{3, 4, 7, 8, 0, 0, 0, 0}, /* 0xCC (11001100) */ +{1, 3, 4, 7, 8, 0, 0, 0}, /* 0xCD (11001101) */ +{2, 3, 4, 7, 8, 0, 0, 0}, /* 0xCE (11001110) */ +{1, 2, 3, 4, 7, 8, 0, 0}, /* 0xCF (11001111) */ +{5, 7, 8, 0, 0, 0, 0, 0}, /* 0xD0 (11010000) */ +{1, 5, 7, 8, 0, 0, 0, 0}, /* 0xD1 (11010001) */ +{2, 5, 7, 8, 0, 0, 0, 0}, /* 0xD2 (11010010) */ +{1, 2, 5, 7, 8, 0, 0, 0}, /* 0xD3 (11010011) */ +{3, 5, 7, 8, 0, 0, 0, 0}, /* 0xD4 (11010100) */ +{1, 3, 5, 7, 8, 0, 0, 0}, /* 0xD5 (11010101) */ +{2, 3, 5, 7, 8, 0, 0, 0}, /* 0xD6 (11010110) */ +{1, 2, 3, 5, 7, 8, 0, 0}, /* 0xD7 (11010111) */ +{4, 5, 7, 8, 0, 0, 0, 0}, /* 0xD8 (11011000) */ +{1, 4, 5, 7, 8, 0, 0, 0}, /* 0xD9 (11011001) */ +{2, 4, 5, 7, 8, 0, 0, 0}, /* 0xDA (11011010) */ +{1, 2, 4, 5, 7, 8, 0, 0}, /* 0xDB (11011011) */ +{3, 4, 5, 7, 8, 0, 0, 0}, /* 0xDC (11011100) */ +{1, 3, 4, 5, 7, 8, 0, 0}, /* 0xDD (11011101) */ +{2, 3, 4, 5, 7, 8, 0, 0}, /* 0xDE (11011110) */ +{1, 2, 3, 4, 5, 7, 8, 0}, /* 0xDF (11011111) */ +{6, 7, 8, 0, 0, 0, 0, 0}, /* 0xE0 (11100000) */ +{1, 6, 7, 8, 0, 0, 0, 0}, /* 0xE1 (11100001) */ +{2, 6, 7, 8, 0, 0, 0, 0}, /* 0xE2 (11100010) */ +{1, 2, 6, 7, 8, 0, 0, 0}, /* 0xE3 (11100011) */ +{3, 6, 7, 8, 0, 0, 0, 0}, /* 0xE4 (11100100) */ +{1, 3, 6, 7, 8, 0, 0, 0}, /* 0xE5 (11100101) */ +{2, 3, 6, 7, 8, 0, 0, 0}, /* 0xE6 (11100110) */ +{1, 2, 3, 6, 7, 8, 0, 0}, /* 0xE7 (11100111) */ +{4, 6, 7, 8, 0, 0, 0, 0}, /* 0xE8 (11101000) */ +{1, 4, 6, 7, 8, 0, 0, 0}, /* 0xE9 (11101001) */ +{2, 4, 6, 7, 8, 0, 0, 0}, /* 0xEA (11101010) */ +{1, 2, 4, 6, 7, 8, 0, 0}, /* 0xEB (11101011) */ +{3, 4, 6, 7, 8, 0, 0, 0}, /* 0xEC (11101100) */ +{1, 3, 4, 6, 7, 8, 0, 0}, /* 0xED (11101101) */ +{2, 3, 4, 6, 7, 8, 0, 0}, /* 0xEE (11101110) */ +{1, 2, 3, 4, 6, 7, 8, 0}, /* 0xEF (11101111) */ +{5, 6, 7, 8, 0, 0, 0, 0}, /* 0xF0 (11110000) */ +{1, 5, 6, 7, 8, 0, 0, 0}, /* 0xF1 (11110001) */ +{2, 5, 6, 7, 8, 0, 0, 0}, /* 0xF2 (11110010) */ +{1, 2, 5, 6, 7, 8, 0, 0}, /* 0xF3 (11110011) */ +{3, 5, 6, 7, 8, 0, 0, 0}, /* 0xF4 (11110100) */ +{1, 3, 5, 6, 7, 8, 0, 0}, /* 0xF5 (11110101) */ +{2, 3, 5, 6, 7, 8, 0, 0}, /* 0xF6 (11110110) */ +{1, 2, 3, 5, 6, 7, 8, 0}, /* 0xF7 (11110111) */ +{4, 5, 6, 7, 8, 0, 0, 0}, /* 0xF8 (11111000) */ +{1, 4, 5, 6, 7, 8, 0, 0}, /* 0xF9 (11111001) */ +{2, 4, 5, 6, 7, 8, 0, 0}, /* 0xFA (11111010) */ +{1, 2, 4, 5, 6, 7, 8, 0}, /* 0xFB (11111011) */ +{3, 4, 5, 6, 7, 8, 0, 0}, /* 0xFC (11111100) */ +{1, 3, 4, 5, 6, 7, 8, 0}, /* 0xFD (11111101) */ +{2, 3, 4, 5, 6, 7, 8, 0}, /* 0xFE (11111110) */ +{1, 2, 3, 4, 5, 6, 7, 8} /* 0xFF (11111111) */ +}; + +#endif // #if CROARING_IS_X64 + +#if CROARING_IS_X64 +// same as vecDecodeTable but in 16 bits +ALIGNED(32) +static uint16_t vecDecodeTable_uint16[256][8] = { +{0, 0, 0, 0, 0, 0, 0, 0}, /* 0x00 (00000000) */ +{1, 0, 0, 0, 0, 0, 0, 0}, /* 0x01 (00000001) */ +{2, 0, 0, 0, 0, 0, 0, 0}, /* 0x02 (00000010) */ +{1, 2, 0, 0, 0, 0, 0, 0}, /* 0x03 (00000011) */ +{3, 0, 0, 0, 0, 0, 0, 0}, /* 0x04 (00000100) */ +{1, 3, 0, 0, 0, 0, 0, 0}, /* 0x05 (00000101) */ +{2, 3, 0, 0, 0, 0, 0, 0}, /* 0x06 (00000110) */ +{1, 2, 3, 0, 0, 0, 0, 0}, /* 0x07 (00000111) */ +{4, 0, 0, 0, 0, 0, 0, 0}, /* 0x08 (00001000) */ +{1, 4, 0, 0, 0, 0, 0, 0}, /* 0x09 (00001001) */ +{2, 4, 0, 0, 0, 0, 0, 0}, /* 0x0A (00001010) */ +{1, 2, 4, 0, 0, 0, 0, 0}, /* 0x0B (00001011) */ +{3, 4, 0, 0, 0, 0, 0, 0}, /* 0x0C (00001100) */ +{1, 3, 4, 0, 0, 0, 0, 0}, /* 0x0D (00001101) */ +{2, 3, 4, 0, 0, 0, 0, 0}, /* 0x0E (00001110) */ +{1, 2, 3, 4, 0, 0, 0, 0}, /* 0x0F (00001111) */ +{5, 0, 0, 0, 0, 0, 0, 0}, /* 0x10 (00010000) */ +{1, 5, 0, 0, 0, 0, 0, 0}, /* 0x11 (00010001) */ +{2, 5, 0, 0, 0, 0, 0, 0}, /* 0x12 (00010010) */ +{1, 2, 5, 0, 0, 0, 0, 0}, /* 0x13 (00010011) */ +{3, 5, 0, 0, 0, 0, 0, 0}, /* 0x14 (00010100) */ +{1, 3, 5, 0, 0, 0, 0, 0}, /* 0x15 (00010101) */ +{2, 3, 5, 0, 0, 0, 0, 0}, /* 0x16 (00010110) */ +{1, 2, 3, 5, 0, 0, 0, 0}, /* 0x17 (00010111) */ +{4, 5, 0, 0, 0, 0, 0, 0}, /* 0x18 (00011000) */ +{1, 4, 5, 0, 0, 0, 0, 0}, /* 0x19 (00011001) */ +{2, 4, 5, 0, 0, 0, 0, 0}, /* 0x1A (00011010) */ +{1, 2, 4, 5, 0, 0, 0, 0}, /* 0x1B (00011011) */ +{3, 4, 5, 0, 0, 0, 0, 0}, /* 0x1C (00011100) */ +{1, 3, 4, 5, 0, 0, 0, 0}, /* 0x1D (00011101) */ +{2, 3, 4, 5, 0, 0, 0, 0}, /* 0x1E (00011110) */ +{1, 2, 3, 4, 5, 0, 0, 0}, /* 0x1F (00011111) */ +{6, 0, 0, 0, 0, 0, 0, 0}, /* 0x20 (00100000) */ +{1, 6, 0, 0, 0, 0, 0, 0}, /* 0x21 (00100001) */ +{2, 6, 0, 0, 0, 0, 0, 0}, /* 0x22 (00100010) */ +{1, 2, 6, 0, 0, 0, 0, 0}, /* 0x23 (00100011) */ +{3, 6, 0, 0, 0, 0, 0, 0}, /* 0x24 (00100100) */ +{1, 3, 6, 0, 0, 0, 0, 0}, /* 0x25 (00100101) */ +{2, 3, 6, 0, 0, 0, 0, 0}, /* 0x26 (00100110) */ +{1, 2, 3, 6, 0, 0, 0, 0}, /* 0x27 (00100111) */ +{4, 6, 0, 0, 0, 0, 0, 0}, /* 0x28 (00101000) */ +{1, 4, 6, 0, 0, 0, 0, 0}, /* 0x29 (00101001) */ +{2, 4, 6, 0, 0, 0, 0, 0}, /* 0x2A (00101010) */ +{1, 2, 4, 6, 0, 0, 0, 0}, /* 0x2B (00101011) */ +{3, 4, 6, 0, 0, 0, 0, 0}, /* 0x2C (00101100) */ +{1, 3, 4, 6, 0, 0, 0, 0}, /* 0x2D (00101101) */ +{2, 3, 4, 6, 0, 0, 0, 0}, /* 0x2E (00101110) */ +{1, 2, 3, 4, 6, 0, 0, 0}, /* 0x2F (00101111) */ +{5, 6, 0, 0, 0, 0, 0, 0}, /* 0x30 (00110000) */ +{1, 5, 6, 0, 0, 0, 0, 0}, /* 0x31 (00110001) */ +{2, 5, 6, 0, 0, 0, 0, 0}, /* 0x32 (00110010) */ +{1, 2, 5, 6, 0, 0, 0, 0}, /* 0x33 (00110011) */ +{3, 5, 6, 0, 0, 0, 0, 0}, /* 0x34 (00110100) */ +{1, 3, 5, 6, 0, 0, 0, 0}, /* 0x35 (00110101) */ +{2, 3, 5, 6, 0, 0, 0, 0}, /* 0x36 (00110110) */ +{1, 2, 3, 5, 6, 0, 0, 0}, /* 0x37 (00110111) */ +{4, 5, 6, 0, 0, 0, 0, 0}, /* 0x38 (00111000) */ +{1, 4, 5, 6, 0, 0, 0, 0}, /* 0x39 (00111001) */ +{2, 4, 5, 6, 0, 0, 0, 0}, /* 0x3A (00111010) */ +{1, 2, 4, 5, 6, 0, 0, 0}, /* 0x3B (00111011) */ +{3, 4, 5, 6, 0, 0, 0, 0}, /* 0x3C (00111100) */ +{1, 3, 4, 5, 6, 0, 0, 0}, /* 0x3D (00111101) */ +{2, 3, 4, 5, 6, 0, 0, 0}, /* 0x3E (00111110) */ +{1, 2, 3, 4, 5, 6, 0, 0}, /* 0x3F (00111111) */ +{7, 0, 0, 0, 0, 0, 0, 0}, /* 0x40 (01000000) */ +{1, 7, 0, 0, 0, 0, 0, 0}, /* 0x41 (01000001) */ +{2, 7, 0, 0, 0, 0, 0, 0}, /* 0x42 (01000010) */ +{1, 2, 7, 0, 0, 0, 0, 0}, /* 0x43 (01000011) */ +{3, 7, 0, 0, 0, 0, 0, 0}, /* 0x44 (01000100) */ +{1, 3, 7, 0, 0, 0, 0, 0}, /* 0x45 (01000101) */ +{2, 3, 7, 0, 0, 0, 0, 0}, /* 0x46 (01000110) */ +{1, 2, 3, 7, 0, 0, 0, 0}, /* 0x47 (01000111) */ +{4, 7, 0, 0, 0, 0, 0, 0}, /* 0x48 (01001000) */ +{1, 4, 7, 0, 0, 0, 0, 0}, /* 0x49 (01001001) */ +{2, 4, 7, 0, 0, 0, 0, 0}, /* 0x4A (01001010) */ +{1, 2, 4, 7, 0, 0, 0, 0}, /* 0x4B (01001011) */ +{3, 4, 7, 0, 0, 0, 0, 0}, /* 0x4C (01001100) */ +{1, 3, 4, 7, 0, 0, 0, 0}, /* 0x4D (01001101) */ +{2, 3, 4, 7, 0, 0, 0, 0}, /* 0x4E (01001110) */ +{1, 2, 3, 4, 7, 0, 0, 0}, /* 0x4F (01001111) */ +{5, 7, 0, 0, 0, 0, 0, 0}, /* 0x50 (01010000) */ +{1, 5, 7, 0, 0, 0, 0, 0}, /* 0x51 (01010001) */ +{2, 5, 7, 0, 0, 0, 0, 0}, /* 0x52 (01010010) */ +{1, 2, 5, 7, 0, 0, 0, 0}, /* 0x53 (01010011) */ +{3, 5, 7, 0, 0, 0, 0, 0}, /* 0x54 (01010100) */ +{1, 3, 5, 7, 0, 0, 0, 0}, /* 0x55 (01010101) */ +{2, 3, 5, 7, 0, 0, 0, 0}, /* 0x56 (01010110) */ +{1, 2, 3, 5, 7, 0, 0, 0}, /* 0x57 (01010111) */ +{4, 5, 7, 0, 0, 0, 0, 0}, /* 0x58 (01011000) */ +{1, 4, 5, 7, 0, 0, 0, 0}, /* 0x59 (01011001) */ +{2, 4, 5, 7, 0, 0, 0, 0}, /* 0x5A (01011010) */ +{1, 2, 4, 5, 7, 0, 0, 0}, /* 0x5B (01011011) */ +{3, 4, 5, 7, 0, 0, 0, 0}, /* 0x5C (01011100) */ +{1, 3, 4, 5, 7, 0, 0, 0}, /* 0x5D (01011101) */ +{2, 3, 4, 5, 7, 0, 0, 0}, /* 0x5E (01011110) */ +{1, 2, 3, 4, 5, 7, 0, 0}, /* 0x5F (01011111) */ +{6, 7, 0, 0, 0, 0, 0, 0}, /* 0x60 (01100000) */ +{1, 6, 7, 0, 0, 0, 0, 0}, /* 0x61 (01100001) */ +{2, 6, 7, 0, 0, 0, 0, 0}, /* 0x62 (01100010) */ +{1, 2, 6, 7, 0, 0, 0, 0}, /* 0x63 (01100011) */ +{3, 6, 7, 0, 0, 0, 0, 0}, /* 0x64 (01100100) */ +{1, 3, 6, 7, 0, 0, 0, 0}, /* 0x65 (01100101) */ +{2, 3, 6, 7, 0, 0, 0, 0}, /* 0x66 (01100110) */ +{1, 2, 3, 6, 7, 0, 0, 0}, /* 0x67 (01100111) */ +{4, 6, 7, 0, 0, 0, 0, 0}, /* 0x68 (01101000) */ +{1, 4, 6, 7, 0, 0, 0, 0}, /* 0x69 (01101001) */ +{2, 4, 6, 7, 0, 0, 0, 0}, /* 0x6A (01101010) */ +{1, 2, 4, 6, 7, 0, 0, 0}, /* 0x6B (01101011) */ +{3, 4, 6, 7, 0, 0, 0, 0}, /* 0x6C (01101100) */ +{1, 3, 4, 6, 7, 0, 0, 0}, /* 0x6D (01101101) */ +{2, 3, 4, 6, 7, 0, 0, 0}, /* 0x6E (01101110) */ +{1, 2, 3, 4, 6, 7, 0, 0}, /* 0x6F (01101111) */ +{5, 6, 7, 0, 0, 0, 0, 0}, /* 0x70 (01110000) */ +{1, 5, 6, 7, 0, 0, 0, 0}, /* 0x71 (01110001) */ +{2, 5, 6, 7, 0, 0, 0, 0}, /* 0x72 (01110010) */ +{1, 2, 5, 6, 7, 0, 0, 0}, /* 0x73 (01110011) */ +{3, 5, 6, 7, 0, 0, 0, 0}, /* 0x74 (01110100) */ +{1, 3, 5, 6, 7, 0, 0, 0}, /* 0x75 (01110101) */ +{2, 3, 5, 6, 7, 0, 0, 0}, /* 0x76 (01110110) */ +{1, 2, 3, 5, 6, 7, 0, 0}, /* 0x77 (01110111) */ +{4, 5, 6, 7, 0, 0, 0, 0}, /* 0x78 (01111000) */ +{1, 4, 5, 6, 7, 0, 0, 0}, /* 0x79 (01111001) */ +{2, 4, 5, 6, 7, 0, 0, 0}, /* 0x7A (01111010) */ +{1, 2, 4, 5, 6, 7, 0, 0}, /* 0x7B (01111011) */ +{3, 4, 5, 6, 7, 0, 0, 0}, /* 0x7C (01111100) */ +{1, 3, 4, 5, 6, 7, 0, 0}, /* 0x7D (01111101) */ +{2, 3, 4, 5, 6, 7, 0, 0}, /* 0x7E (01111110) */ +{1, 2, 3, 4, 5, 6, 7, 0}, /* 0x7F (01111111) */ +{8, 0, 0, 0, 0, 0, 0, 0}, /* 0x80 (10000000) */ +{1, 8, 0, 0, 0, 0, 0, 0}, /* 0x81 (10000001) */ +{2, 8, 0, 0, 0, 0, 0, 0}, /* 0x82 (10000010) */ +{1, 2, 8, 0, 0, 0, 0, 0}, /* 0x83 (10000011) */ +{3, 8, 0, 0, 0, 0, 0, 0}, /* 0x84 (10000100) */ +{1, 3, 8, 0, 0, 0, 0, 0}, /* 0x85 (10000101) */ +{2, 3, 8, 0, 0, 0, 0, 0}, /* 0x86 (10000110) */ +{1, 2, 3, 8, 0, 0, 0, 0}, /* 0x87 (10000111) */ +{4, 8, 0, 0, 0, 0, 0, 0}, /* 0x88 (10001000) */ +{1, 4, 8, 0, 0, 0, 0, 0}, /* 0x89 (10001001) */ +{2, 4, 8, 0, 0, 0, 0, 0}, /* 0x8A (10001010) */ +{1, 2, 4, 8, 0, 0, 0, 0}, /* 0x8B (10001011) */ +{3, 4, 8, 0, 0, 0, 0, 0}, /* 0x8C (10001100) */ +{1, 3, 4, 8, 0, 0, 0, 0}, /* 0x8D (10001101) */ +{2, 3, 4, 8, 0, 0, 0, 0}, /* 0x8E (10001110) */ +{1, 2, 3, 4, 8, 0, 0, 0}, /* 0x8F (10001111) */ +{5, 8, 0, 0, 0, 0, 0, 0}, /* 0x90 (10010000) */ +{1, 5, 8, 0, 0, 0, 0, 0}, /* 0x91 (10010001) */ +{2, 5, 8, 0, 0, 0, 0, 0}, /* 0x92 (10010010) */ +{1, 2, 5, 8, 0, 0, 0, 0}, /* 0x93 (10010011) */ +{3, 5, 8, 0, 0, 0, 0, 0}, /* 0x94 (10010100) */ +{1, 3, 5, 8, 0, 0, 0, 0}, /* 0x95 (10010101) */ +{2, 3, 5, 8, 0, 0, 0, 0}, /* 0x96 (10010110) */ +{1, 2, 3, 5, 8, 0, 0, 0}, /* 0x97 (10010111) */ +{4, 5, 8, 0, 0, 0, 0, 0}, /* 0x98 (10011000) */ +{1, 4, 5, 8, 0, 0, 0, 0}, /* 0x99 (10011001) */ +{2, 4, 5, 8, 0, 0, 0, 0}, /* 0x9A (10011010) */ +{1, 2, 4, 5, 8, 0, 0, 0}, /* 0x9B (10011011) */ +{3, 4, 5, 8, 0, 0, 0, 0}, /* 0x9C (10011100) */ +{1, 3, 4, 5, 8, 0, 0, 0}, /* 0x9D (10011101) */ +{2, 3, 4, 5, 8, 0, 0, 0}, /* 0x9E (10011110) */ +{1, 2, 3, 4, 5, 8, 0, 0}, /* 0x9F (10011111) */ +{6, 8, 0, 0, 0, 0, 0, 0}, /* 0xA0 (10100000) */ +{1, 6, 8, 0, 0, 0, 0, 0}, /* 0xA1 (10100001) */ +{2, 6, 8, 0, 0, 0, 0, 0}, /* 0xA2 (10100010) */ +{1, 2, 6, 8, 0, 0, 0, 0}, /* 0xA3 (10100011) */ +{3, 6, 8, 0, 0, 0, 0, 0}, /* 0xA4 (10100100) */ +{1, 3, 6, 8, 0, 0, 0, 0}, /* 0xA5 (10100101) */ +{2, 3, 6, 8, 0, 0, 0, 0}, /* 0xA6 (10100110) */ +{1, 2, 3, 6, 8, 0, 0, 0}, /* 0xA7 (10100111) */ +{4, 6, 8, 0, 0, 0, 0, 0}, /* 0xA8 (10101000) */ +{1, 4, 6, 8, 0, 0, 0, 0}, /* 0xA9 (10101001) */ +{2, 4, 6, 8, 0, 0, 0, 0}, /* 0xAA (10101010) */ +{1, 2, 4, 6, 8, 0, 0, 0}, /* 0xAB (10101011) */ +{3, 4, 6, 8, 0, 0, 0, 0}, /* 0xAC (10101100) */ +{1, 3, 4, 6, 8, 0, 0, 0}, /* 0xAD (10101101) */ +{2, 3, 4, 6, 8, 0, 0, 0}, /* 0xAE (10101110) */ +{1, 2, 3, 4, 6, 8, 0, 0}, /* 0xAF (10101111) */ +{5, 6, 8, 0, 0, 0, 0, 0}, /* 0xB0 (10110000) */ +{1, 5, 6, 8, 0, 0, 0, 0}, /* 0xB1 (10110001) */ +{2, 5, 6, 8, 0, 0, 0, 0}, /* 0xB2 (10110010) */ +{1, 2, 5, 6, 8, 0, 0, 0}, /* 0xB3 (10110011) */ +{3, 5, 6, 8, 0, 0, 0, 0}, /* 0xB4 (10110100) */ +{1, 3, 5, 6, 8, 0, 0, 0}, /* 0xB5 (10110101) */ +{2, 3, 5, 6, 8, 0, 0, 0}, /* 0xB6 (10110110) */ +{1, 2, 3, 5, 6, 8, 0, 0}, /* 0xB7 (10110111) */ +{4, 5, 6, 8, 0, 0, 0, 0}, /* 0xB8 (10111000) */ +{1, 4, 5, 6, 8, 0, 0, 0}, /* 0xB9 (10111001) */ +{2, 4, 5, 6, 8, 0, 0, 0}, /* 0xBA (10111010) */ +{1, 2, 4, 5, 6, 8, 0, 0}, /* 0xBB (10111011) */ +{3, 4, 5, 6, 8, 0, 0, 0}, /* 0xBC (10111100) */ +{1, 3, 4, 5, 6, 8, 0, 0}, /* 0xBD (10111101) */ +{2, 3, 4, 5, 6, 8, 0, 0}, /* 0xBE (10111110) */ +{1, 2, 3, 4, 5, 6, 8, 0}, /* 0xBF (10111111) */ +{7, 8, 0, 0, 0, 0, 0, 0}, /* 0xC0 (11000000) */ +{1, 7, 8, 0, 0, 0, 0, 0}, /* 0xC1 (11000001) */ +{2, 7, 8, 0, 0, 0, 0, 0}, /* 0xC2 (11000010) */ +{1, 2, 7, 8, 0, 0, 0, 0}, /* 0xC3 (11000011) */ +{3, 7, 8, 0, 0, 0, 0, 0}, /* 0xC4 (11000100) */ +{1, 3, 7, 8, 0, 0, 0, 0}, /* 0xC5 (11000101) */ +{2, 3, 7, 8, 0, 0, 0, 0}, /* 0xC6 (11000110) */ +{1, 2, 3, 7, 8, 0, 0, 0}, /* 0xC7 (11000111) */ +{4, 7, 8, 0, 0, 0, 0, 0}, /* 0xC8 (11001000) */ +{1, 4, 7, 8, 0, 0, 0, 0}, /* 0xC9 (11001001) */ +{2, 4, 7, 8, 0, 0, 0, 0}, /* 0xCA (11001010) */ +{1, 2, 4, 7, 8, 0, 0, 0}, /* 0xCB (11001011) */ +{3, 4, 7, 8, 0, 0, 0, 0}, /* 0xCC (11001100) */ +{1, 3, 4, 7, 8, 0, 0, 0}, /* 0xCD (11001101) */ +{2, 3, 4, 7, 8, 0, 0, 0}, /* 0xCE (11001110) */ +{1, 2, 3, 4, 7, 8, 0, 0}, /* 0xCF (11001111) */ +{5, 7, 8, 0, 0, 0, 0, 0}, /* 0xD0 (11010000) */ +{1, 5, 7, 8, 0, 0, 0, 0}, /* 0xD1 (11010001) */ +{2, 5, 7, 8, 0, 0, 0, 0}, /* 0xD2 (11010010) */ +{1, 2, 5, 7, 8, 0, 0, 0}, /* 0xD3 (11010011) */ +{3, 5, 7, 8, 0, 0, 0, 0}, /* 0xD4 (11010100) */ +{1, 3, 5, 7, 8, 0, 0, 0}, /* 0xD5 (11010101) */ +{2, 3, 5, 7, 8, 0, 0, 0}, /* 0xD6 (11010110) */ +{1, 2, 3, 5, 7, 8, 0, 0}, /* 0xD7 (11010111) */ +{4, 5, 7, 8, 0, 0, 0, 0}, /* 0xD8 (11011000) */ +{1, 4, 5, 7, 8, 0, 0, 0}, /* 0xD9 (11011001) */ +{2, 4, 5, 7, 8, 0, 0, 0}, /* 0xDA (11011010) */ +{1, 2, 4, 5, 7, 8, 0, 0}, /* 0xDB (11011011) */ +{3, 4, 5, 7, 8, 0, 0, 0}, /* 0xDC (11011100) */ +{1, 3, 4, 5, 7, 8, 0, 0}, /* 0xDD (11011101) */ +{2, 3, 4, 5, 7, 8, 0, 0}, /* 0xDE (11011110) */ +{1, 2, 3, 4, 5, 7, 8, 0}, /* 0xDF (11011111) */ +{6, 7, 8, 0, 0, 0, 0, 0}, /* 0xE0 (11100000) */ +{1, 6, 7, 8, 0, 0, 0, 0}, /* 0xE1 (11100001) */ +{2, 6, 7, 8, 0, 0, 0, 0}, /* 0xE2 (11100010) */ +{1, 2, 6, 7, 8, 0, 0, 0}, /* 0xE3 (11100011) */ +{3, 6, 7, 8, 0, 0, 0, 0}, /* 0xE4 (11100100) */ +{1, 3, 6, 7, 8, 0, 0, 0}, /* 0xE5 (11100101) */ +{2, 3, 6, 7, 8, 0, 0, 0}, /* 0xE6 (11100110) */ +{1, 2, 3, 6, 7, 8, 0, 0}, /* 0xE7 (11100111) */ +{4, 6, 7, 8, 0, 0, 0, 0}, /* 0xE8 (11101000) */ +{1, 4, 6, 7, 8, 0, 0, 0}, /* 0xE9 (11101001) */ +{2, 4, 6, 7, 8, 0, 0, 0}, /* 0xEA (11101010) */ +{1, 2, 4, 6, 7, 8, 0, 0}, /* 0xEB (11101011) */ +{3, 4, 6, 7, 8, 0, 0, 0}, /* 0xEC (11101100) */ +{1, 3, 4, 6, 7, 8, 0, 0}, /* 0xED (11101101) */ +{2, 3, 4, 6, 7, 8, 0, 0}, /* 0xEE (11101110) */ +{1, 2, 3, 4, 6, 7, 8, 0}, /* 0xEF (11101111) */ +{5, 6, 7, 8, 0, 0, 0, 0}, /* 0xF0 (11110000) */ +{1, 5, 6, 7, 8, 0, 0, 0}, /* 0xF1 (11110001) */ +{2, 5, 6, 7, 8, 0, 0, 0}, /* 0xF2 (11110010) */ +{1, 2, 5, 6, 7, 8, 0, 0}, /* 0xF3 (11110011) */ +{3, 5, 6, 7, 8, 0, 0, 0}, /* 0xF4 (11110100) */ +{1, 3, 5, 6, 7, 8, 0, 0}, /* 0xF5 (11110101) */ +{2, 3, 5, 6, 7, 8, 0, 0}, /* 0xF6 (11110110) */ +{1, 2, 3, 5, 6, 7, 8, 0}, /* 0xF7 (11110111) */ +{4, 5, 6, 7, 8, 0, 0, 0}, /* 0xF8 (11111000) */ +{1, 4, 5, 6, 7, 8, 0, 0}, /* 0xF9 (11111001) */ +{2, 4, 5, 6, 7, 8, 0, 0}, /* 0xFA (11111010) */ +{1, 2, 4, 5, 6, 7, 8, 0}, /* 0xFB (11111011) */ +{3, 4, 5, 6, 7, 8, 0, 0}, /* 0xFC (11111100) */ +{1, 3, 4, 5, 6, 7, 8, 0}, /* 0xFD (11111101) */ +{2, 3, 4, 5, 6, 7, 8, 0}, /* 0xFE (11111110) */ +{1, 2, 3, 4, 5, 6, 7, 8} /* 0xFF (11111111) */ +}; + +#endif + +#if CROARING_IS_X64 +#if CROARING_COMPILER_SUPPORTS_AVX512 +CROARING_TARGET_AVX512 +const uint8_t vbmi2_table[64] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63}; +size_t bitset_extract_setbits_avx512(const uint64_t *words, size_t length, uint32_t *vout, +size_t outcapacity, uint32_t base) { +uint32_t *out = (uint32_t *)vout; +uint32_t *initout = out; +uint32_t *safeout = out + outcapacity; +__m512i base_v = _mm512_set1_epi32(base); +__m512i index_table = _mm512_loadu_si512(vbmi2_table); +size_t i = 0; + +for (; (i < length) && ((out + 64) < safeout); i += 1) +{ +uint64_t v = words[i]; +__m512i vec = _mm512_maskz_compress_epi8(v, index_table); + +uint8_t advance = roaring_hamming(v); + +__m512i vbase = _mm512_add_epi32(base_v, _mm512_set1_epi32(i * 64)); +__m512i r1 = _mm512_cvtepi8_epi32(_mm512_extracti32x4_epi32(vec,0)); +__m512i r2 = _mm512_cvtepi8_epi32(_mm512_extracti32x4_epi32(vec,1)); +__m512i r3 = _mm512_cvtepi8_epi32(_mm512_extracti32x4_epi32(vec,2)); +__m512i r4 = _mm512_cvtepi8_epi32(_mm512_extracti32x4_epi32(vec,3)); + +r1 = _mm512_add_epi32(r1, vbase); +r2 = _mm512_add_epi32(r2, vbase); +r3 = _mm512_add_epi32(r3, vbase); +r4 = _mm512_add_epi32(r4, vbase); +_mm512_storeu_si512((__m512i *)out, r1); +_mm512_storeu_si512((__m512i *)(out + 16), r2); +_mm512_storeu_si512((__m512i *)(out + 32), r3); +_mm512_storeu_si512((__m512i *)(out + 48), r4); + +out += advance; + +} + +base += i * 64; + +for (; (i < length) && (out < safeout); ++i) { +uint64_t w = words[i]; +while ((w != 0) && (out < safeout)) { +uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail) +int r = roaring_trailing_zeroes(w); // on x64, should compile to TZCNT +uint32_t val = r + base; +memcpy(out, &val, +sizeof(uint32_t)); // should be compiled as a MOV on x64 +out++; +w ^= t; +} +base += 64; +} + + +return out - initout; + +} + +// Reference: https://lemire.me/blog/2022/05/10/faster-bitset-decoding-using-intel-avx-512/ +size_t bitset_extract_setbits_avx512_uint16(const uint64_t *array, size_t length, +uint16_t *vout, size_t capacity, uint16_t base) { +uint16_t *out = (uint16_t *)vout; +uint16_t *initout = out; +uint16_t *safeout = vout + capacity; + +__m512i base_v = _mm512_set1_epi16(base); +__m512i index_table = _mm512_loadu_si512(vbmi2_table); +size_t i = 0; + +for (; (i < length) && ((out + 64) < safeout); i++) +{ +uint64_t v = array[i]; +__m512i vec = _mm512_maskz_compress_epi8(v, index_table); + +uint8_t advance = roaring_hamming(v); + +__m512i vbase = _mm512_add_epi16(base_v, _mm512_set1_epi16(i * 64)); +__m512i r1 = _mm512_cvtepi8_epi16(_mm512_extracti32x8_epi32(vec,0)); +__m512i r2 = _mm512_cvtepi8_epi16(_mm512_extracti32x8_epi32(vec,1)); + +r1 = _mm512_add_epi16(r1, vbase); +r2 = _mm512_add_epi16(r2, vbase); + +_mm512_storeu_si512((__m512i *)out, r1); +_mm512_storeu_si512((__m512i *)(out + 32), r2); +out += advance; + +} + +base += i * 64; + +for (; (i < length) && (out < safeout); ++i) { +uint64_t w = array[i]; +while ((w != 0) && (out < safeout)) { +uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail) +int r = roaring_trailing_zeroes(w); // on x64, should compile to TZCNT +uint32_t val = r + base; +memcpy(out, &val, +sizeof(uint16_t)); +out++; +w ^= t; +} +base += 64; +} + +return out - initout; +} +CROARING_UNTARGET_AVX512 +#endif + +CROARING_TARGET_AVX2 +size_t bitset_extract_setbits_avx2(const uint64_t *words, size_t length, +uint32_t *out, size_t outcapacity, +uint32_t base) { +uint32_t *initout = out; +__m256i baseVec = _mm256_set1_epi32(base - 1); +__m256i incVec = _mm256_set1_epi32(64); +__m256i add8 = _mm256_set1_epi32(8); +uint32_t *safeout = out + outcapacity; +size_t i = 0; +for (; (i < length) && (out + 64 <= safeout); ++i) { +uint64_t w = words[i]; +if (w == 0) { +baseVec = _mm256_add_epi32(baseVec, incVec); +} else { +for (int k = 0; k < 4; ++k) { +uint8_t byteA = (uint8_t)w; +uint8_t byteB = (uint8_t)(w >> 8); +w >>= 16; +__m256i vecA = +_mm256_loadu_si256((const __m256i *)vecDecodeTable[byteA]); +__m256i vecB = +_mm256_loadu_si256((const __m256i *)vecDecodeTable[byteB]); +uint8_t advanceA = lengthTable[byteA]; +uint8_t advanceB = lengthTable[byteB]; +vecA = _mm256_add_epi32(baseVec, vecA); +baseVec = _mm256_add_epi32(baseVec, add8); +vecB = _mm256_add_epi32(baseVec, vecB); +baseVec = _mm256_add_epi32(baseVec, add8); +_mm256_storeu_si256((__m256i *)out, vecA); +out += advanceA; +_mm256_storeu_si256((__m256i *)out, vecB); +out += advanceB; +} +} +} +base += i * 64; +for (; (i < length) && (out < safeout); ++i) { +uint64_t w = words[i]; +while ((w != 0) && (out < safeout)) { +uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail) +int r = roaring_trailing_zeroes(w); // on x64, should compile to TZCNT +uint32_t val = r + base; +memcpy(out, &val, +sizeof(uint32_t)); // should be compiled as a MOV on x64 +out++; +w ^= t; +} +base += 64; +} +return out - initout; +} +CROARING_UNTARGET_AVX2 +#endif // CROARING_IS_X64 + +size_t bitset_extract_setbits(const uint64_t *words, size_t length, +uint32_t *out, uint32_t base) { +int outpos = 0; +for (size_t i = 0; i < length; ++i) { +uint64_t w = words[i]; +while (w != 0) { +uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail) +int r = roaring_trailing_zeroes(w); // on x64, should compile to TZCNT +uint32_t val = r + base; +memcpy(out + outpos, &val, +sizeof(uint32_t)); // should be compiled as a MOV on x64 +outpos++; +w ^= t; +} +base += 64; +} +return outpos; +} + +size_t bitset_extract_intersection_setbits_uint16(const uint64_t * __restrict__ words1, +const uint64_t * __restrict__ words2, +size_t length, uint16_t *out, +uint16_t base) { +int outpos = 0; +for (size_t i = 0; i < length; ++i) { +uint64_t w = words1[i] & words2[i]; +while (w != 0) { +uint64_t t = w & (~w + 1); +int r = roaring_trailing_zeroes(w); +out[outpos++] = r + base; +w ^= t; +} +base += 64; +} +return outpos; +} + +#if CROARING_IS_X64 +/* + * Given a bitset containing "length" 64-bit words, write out the position + * of all the set bits to "out" as 16-bit integers, values start at "base" (can + *be set to zero). + * + * The "out" pointer should be sufficient to store the actual number of bits + *set. + * + * Returns how many values were actually decoded. + * + * This function uses SSE decoding. + */ +CROARING_TARGET_AVX2 +size_t bitset_extract_setbits_sse_uint16(const uint64_t *words, size_t length, +uint16_t *out, size_t outcapacity, +uint16_t base) { +uint16_t *initout = out; +__m128i baseVec = _mm_set1_epi16(base - 1); +__m128i incVec = _mm_set1_epi16(64); +__m128i add8 = _mm_set1_epi16(8); +uint16_t *safeout = out + outcapacity; +const int numberofbytes = 2; // process two bytes at a time +size_t i = 0; +for (; (i < length) && (out + numberofbytes * 8 <= safeout); ++i) { +uint64_t w = words[i]; +if (w == 0) { +baseVec = _mm_add_epi16(baseVec, incVec); +} else { +for (int k = 0; k < 4; ++k) { +uint8_t byteA = (uint8_t)w; +uint8_t byteB = (uint8_t)(w >> 8); +w >>= 16; +__m128i vecA = _mm_loadu_si128( +(const __m128i *)vecDecodeTable_uint16[byteA]); +__m128i vecB = _mm_loadu_si128( +(const __m128i *)vecDecodeTable_uint16[byteB]); +uint8_t advanceA = lengthTable[byteA]; +uint8_t advanceB = lengthTable[byteB]; +vecA = _mm_add_epi16(baseVec, vecA); +baseVec = _mm_add_epi16(baseVec, add8); +vecB = _mm_add_epi16(baseVec, vecB); +baseVec = _mm_add_epi16(baseVec, add8); +_mm_storeu_si128((__m128i *)out, vecA); +out += advanceA; +_mm_storeu_si128((__m128i *)out, vecB); +out += advanceB; +} +} +} +base += (uint16_t)(i * 64); +for (; (i < length) && (out < safeout); ++i) { +uint64_t w = words[i]; +while ((w != 0) && (out < safeout)) { +uint64_t t = w & (~w + 1); +int r = roaring_trailing_zeroes(w); +*out = r + base; +out++; +w ^= t; +} +base += 64; +} +return out - initout; +} +CROARING_UNTARGET_AVX2 +#endif + +/* + * Given a bitset containing "length" 64-bit words, write out the position + * of all the set bits to "out", values start at "base" (can be set to zero). + * + * The "out" pointer should be sufficient to store the actual number of bits + *set. + * + * Returns how many values were actually decoded. + */ +size_t bitset_extract_setbits_uint16(const uint64_t *words, size_t length, +uint16_t *out, uint16_t base) { +int outpos = 0; +for (size_t i = 0; i < length; ++i) { +uint64_t w = words[i]; +while (w != 0) { +uint64_t t = w & (~w + 1); +int r = roaring_trailing_zeroes(w); +out[outpos++] = r + base; +w ^= t; +} +base += 64; +} +return outpos; +} + +#if defined(CROARING_ASMBITMANIPOPTIMIZATION) && defined(CROARING_IS_X64) + +static inline uint64_t _asm_bitset_set_list_withcard(uint64_t *words, uint64_t card, +const uint16_t *list, uint64_t length) { +uint64_t offset, load, pos; +uint64_t shift = 6; +const uint16_t *end = list + length; +if (!length) return card; +// TODO: could unroll for performance, see bitset_set_list +// bts is not available as an intrinsic in GCC +__asm volatile( +"1:\n" +"movzwq (%[list]), %[pos]\n" +"shrx %[shift], %[pos], %[offset]\n" +"mov (%[words],%[offset],8), %[load]\n" +"bts %[pos], %[load]\n" +"mov %[load], (%[words],%[offset],8)\n" +"sbb $-1, %[card]\n" +"add $2, %[list]\n" +"cmp %[list], %[end]\n" +"jnz 1b" +: [card] "+&r"(card), [list] "+&r"(list), [load] "=&r"(load), +[pos] "=&r"(pos), [offset] "=&r"(offset) +: [end] "r"(end), [words] "r"(words), [shift] "r"(shift)); +return card; +} + +static inline void _asm_bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length) { +uint64_t pos; +const uint16_t *end = list + length; + +uint64_t shift = 6; +uint64_t offset; +uint64_t load; +for (; list + 3 < end; list += 4) { +pos = list[0]; +__asm volatile( +"shrx %[shift], %[pos], %[offset]\n" +"mov (%[words],%[offset],8), %[load]\n" +"bts %[pos], %[load]\n" +"mov %[load], (%[words],%[offset],8)" +: [load] "=&r"(load), [offset] "=&r"(offset) +: [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos)); +pos = list[1]; +__asm volatile( +"shrx %[shift], %[pos], %[offset]\n" +"mov (%[words],%[offset],8), %[load]\n" +"bts %[pos], %[load]\n" +"mov %[load], (%[words],%[offset],8)" +: [load] "=&r"(load), [offset] "=&r"(offset) +: [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos)); +pos = list[2]; +__asm volatile( +"shrx %[shift], %[pos], %[offset]\n" +"mov (%[words],%[offset],8), %[load]\n" +"bts %[pos], %[load]\n" +"mov %[load], (%[words],%[offset],8)" +: [load] "=&r"(load), [offset] "=&r"(offset) +: [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos)); +pos = list[3]; +__asm volatile( +"shrx %[shift], %[pos], %[offset]\n" +"mov (%[words],%[offset],8), %[load]\n" +"bts %[pos], %[load]\n" +"mov %[load], (%[words],%[offset],8)" +: [load] "=&r"(load), [offset] "=&r"(offset) +: [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos)); +} + +while (list != end) { +pos = list[0]; +__asm volatile( +"shrx %[shift], %[pos], %[offset]\n" +"mov (%[words],%[offset],8), %[load]\n" +"bts %[pos], %[load]\n" +"mov %[load], (%[words],%[offset],8)" +: [load] "=&r"(load), [offset] "=&r"(offset) +: [words] "r"(words), [shift] "r"(shift), [pos] "r"(pos)); +list++; +} +} + +static inline uint64_t _asm_bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list, +uint64_t length) { +uint64_t offset, load, pos; +uint64_t shift = 6; +const uint16_t *end = list + length; +if (!length) return card; +// btr is not available as an intrinsic in GCC +__asm volatile( +"1:\n" +"movzwq (%[list]), %[pos]\n" +"shrx %[shift], %[pos], %[offset]\n" +"mov (%[words],%[offset],8), %[load]\n" +"btr %[pos], %[load]\n" +"mov %[load], (%[words],%[offset],8)\n" +"sbb $0, %[card]\n" +"add $2, %[list]\n" +"cmp %[list], %[end]\n" +"jnz 1b" +: [card] "+&r"(card), [list] "+&r"(list), [load] "=&r"(load), +[pos] "=&r"(pos), [offset] "=&r"(offset) +: [end] "r"(end), [words] "r"(words), [shift] "r"(shift) +: +/* clobbers */ "memory"); +return card; +} + +static inline uint64_t _scalar_bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list, +uint64_t length) { +uint64_t offset, load, newload, pos, index; +const uint16_t *end = list + length; +while (list != end) { +pos = *(const uint16_t *)list; +offset = pos >> 6; +index = pos % 64; +load = words[offset]; +newload = load & ~(UINT64_C(1) << index); +card -= (load ^ newload) >> index; +words[offset] = newload; +list++; +} +return card; +} + +static inline uint64_t _scalar_bitset_set_list_withcard(uint64_t *words, uint64_t card, +const uint16_t *list, uint64_t length) { +uint64_t offset, load, newload, pos, index; +const uint16_t *end = list + length; +while (list != end) { +pos = *list; +offset = pos >> 6; +index = pos % 64; +load = words[offset]; +newload = load | (UINT64_C(1) << index); +card += (load ^ newload) >> index; +words[offset] = newload; +list++; +} +return card; +} + +static inline void _scalar_bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length) { +uint64_t offset, load, newload, pos, index; +const uint16_t *end = list + length; +while (list != end) { +pos = *list; +offset = pos >> 6; +index = pos % 64; +load = words[offset]; +newload = load | (UINT64_C(1) << index); +words[offset] = newload; +list++; +} +} + +uint64_t bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list, +uint64_t length) { +if( croaring_hardware_support() & ROARING_SUPPORTS_AVX2 ) { +return _asm_bitset_clear_list(words, card, list, length); +} else { +return _scalar_bitset_clear_list(words, card, list, length); +} +} + +uint64_t bitset_set_list_withcard(uint64_t *words, uint64_t card, +const uint16_t *list, uint64_t length) { +if( croaring_hardware_support() & ROARING_SUPPORTS_AVX2 ) { +return _asm_bitset_set_list_withcard(words, card, list, length); +} else { +return _scalar_bitset_set_list_withcard(words, card, list, length); +} +} + +void bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length) { +if( croaring_hardware_support() & ROARING_SUPPORTS_AVX2 ) { +_asm_bitset_set_list(words, list, length); +} else { +_scalar_bitset_set_list(words, list, length); +} +} +#else +uint64_t bitset_clear_list(uint64_t *words, uint64_t card, const uint16_t *list, +uint64_t length) { +uint64_t offset, load, newload, pos, index; +const uint16_t *end = list + length; +while (list != end) { +pos = *(const uint16_t *)list; +offset = pos >> 6; +index = pos % 64; +load = words[offset]; +newload = load & ~(UINT64_C(1) << index); +card -= (load ^ newload) >> index; +words[offset] = newload; +list++; +} +return card; +} + +uint64_t bitset_set_list_withcard(uint64_t *words, uint64_t card, +const uint16_t *list, uint64_t length) { +uint64_t offset, load, newload, pos, index; +const uint16_t *end = list + length; +while (list != end) { +pos = *list; +offset = pos >> 6; +index = pos % 64; +load = words[offset]; +newload = load | (UINT64_C(1) << index); +card += (load ^ newload) >> index; +words[offset] = newload; +list++; +} +return card; +} + +void bitset_set_list(uint64_t *words, const uint16_t *list, uint64_t length) { +uint64_t offset, load, newload, pos, index; +const uint16_t *end = list + length; +while (list != end) { +pos = *list; +offset = pos >> 6; +index = pos % 64; +load = words[offset]; +newload = load | (UINT64_C(1) << index); +words[offset] = newload; +list++; +} +} + +#endif + +/* flip specified bits */ +/* TODO: consider whether worthwhile to make an asm version */ + +uint64_t bitset_flip_list_withcard(uint64_t *words, uint64_t card, +const uint16_t *list, uint64_t length) { +uint64_t offset, load, newload, pos, index; +const uint16_t *end = list + length; +while (list != end) { +pos = *list; +offset = pos >> 6; +index = pos % 64; +load = words[offset]; +newload = load ^ (UINT64_C(1) << index); +// todo: is a branch here all that bad? +card += +(1 - 2 * (((UINT64_C(1) << index) & load) >> index)); // +1 or -1 +words[offset] = newload; +list++; +} +return card; +} + +void bitset_flip_list(uint64_t *words, const uint16_t *list, uint64_t length) { +uint64_t offset, load, newload, pos, index; +const uint16_t *end = list + length; +while (list != end) { +pos = *list; +offset = pos >> 6; +index = pos % 64; +load = words[offset]; +newload = load ^ (UINT64_C(1) << index); +words[offset] = newload; +list++; +} +} + +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace api { +#endif +/* end file src/bitset_util.c */ +/* begin file src/containers/array.c */ +/* + * array.c + * + */ + +#include +#include +#include + +#if CROARING_IS_X64 +#ifndef CROARING_COMPILER_SUPPORTS_AVX512 +#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined." +#endif // CROARING_COMPILER_SUPPORTS_AVX512 +#endif + +#ifdef __cplusplus +extern "C" { namespace roaring { namespace internal { +#endif + +extern inline uint16_t array_container_minimum(const array_container_t *arr); +extern inline uint16_t array_container_maximum(const array_container_t *arr); +extern inline int array_container_index_equalorlarger(const array_container_t *arr, uint16_t x); + +extern inline int array_container_rank(const array_container_t *arr, +uint16_t x); +extern inline int array_container_get_index(const array_container_t *arr, +uint16_t x); +extern inline bool array_container_contains(const array_container_t *arr, +uint16_t pos); +extern inline int array_container_cardinality(const array_container_t *array); +extern inline bool array_container_nonzero_cardinality(const array_container_t *array); +extern inline int32_t array_container_serialized_size_in_bytes(int32_t card); +extern inline bool array_container_empty(const array_container_t *array); +extern inline bool array_container_full(const array_container_t *array); + +/* Create a new array with capacity size. Return NULL in case of failure. */ +array_container_t *array_container_create_given_capacity(int32_t size) { +array_container_t *container; + +if ((container = (array_container_t *)roaring_malloc(sizeof(array_container_t))) == +NULL) { +return NULL; +} + +if( size <= 0 ) { // we don't want to rely on malloc(0) +container->array = NULL; +} else if ((container->array = (uint16_t *)roaring_malloc(sizeof(uint16_t) * size)) == +NULL) { +roaring_free(container); +return NULL; +} + +container->capacity = size; +container->cardinality = 0; + +return container; +} + +/* Create a new array. Return NULL in case of failure. */ +array_container_t *array_container_create(void) { +return array_container_create_given_capacity(ARRAY_DEFAULT_INIT_SIZE); +} + +/* Create a new array containing all values in [min,max). */ +array_container_t * array_container_create_range(uint32_t min, uint32_t max) { +array_container_t * answer = array_container_create_given_capacity(max - min + 1); +if(answer == NULL) return answer; +answer->cardinality = 0; +for(uint32_t k = min; k < max; k++) { +answer->array[answer->cardinality++] = k; +} +return answer; +} + +/* Duplicate container */ +array_container_t *array_container_clone(const array_container_t *src) { +array_container_t *newcontainer = +array_container_create_given_capacity(src->capacity); +if (newcontainer == NULL) return NULL; + +newcontainer->cardinality = src->cardinality; + +memcpy(newcontainer->array, src->array, +src->cardinality * sizeof(uint16_t)); + +return newcontainer; +} + +void array_container_offset(const array_container_t *c, +container_t **loc, container_t **hic, +uint16_t offset) { +array_container_t *lo = NULL, *hi = NULL; +int top, lo_cap, hi_cap; + +top = (1 << 16) - offset; + +lo_cap = count_less(c->array, c->cardinality, top); +if (loc && lo_cap) { +lo = array_container_create_given_capacity(lo_cap); +for (int i = 0; i < lo_cap; ++i) { +array_container_add(lo, c->array[i] + offset); +} +*loc = (container_t*)lo; +} + +hi_cap = c->cardinality - lo_cap; +if (hic && hi_cap) { +hi = array_container_create_given_capacity(hi_cap); +for (int i = lo_cap; i < c->cardinality; ++i) { +array_container_add(hi, c->array[i] + offset); +} +*hic = (container_t*)hi; +} +} + +int array_container_shrink_to_fit(array_container_t *src) { +if (src->cardinality == src->capacity) return 0; // nothing to do +int savings = src->capacity - src->cardinality; +src->capacity = src->cardinality; +if( src->capacity == 0) { // we do not want to rely on realloc for zero allocs +roaring_free(src->array); +src->array = NULL; +} else { +uint16_t *oldarray = src->array; +src->array = +(uint16_t *)roaring_realloc(oldarray, src->capacity * sizeof(uint16_t)); +if (src->array == NULL) roaring_free(oldarray); // should never happen? +} +return savings; +} + +/* Free memory. */ +void array_container_free(array_container_t *arr) { +if(arr->array != NULL) {// Jon Strabala reports that some tools complain otherwise +roaring_free(arr->array); +arr->array = NULL; // pedantic +} +roaring_free(arr); +} + +static inline int32_t grow_capacity(int32_t capacity) { +return (capacity <= 0) ? ARRAY_DEFAULT_INIT_SIZE +: capacity < 64 ? capacity * 2 +: capacity < 1024 ? capacity * 3 / 2 +: capacity * 5 / 4; +} + +static inline int32_t clamp(int32_t val, int32_t min, int32_t max) { +return ((val < min) ? min : (val > max) ? max : val); +} + +void array_container_grow(array_container_t *container, int32_t min, +bool preserve) { + +int32_t max = (min <= DEFAULT_MAX_SIZE ? DEFAULT_MAX_SIZE : 65536); +int32_t new_capacity = clamp(grow_capacity(container->capacity), min, max); + +container->capacity = new_capacity; +uint16_t *array = container->array; + +if (preserve) { +container->array = +(uint16_t *)roaring_realloc(array, new_capacity * sizeof(uint16_t)); +if (container->array == NULL) roaring_free(array); +} else { +// Jon Strabala reports that some tools complain otherwise +if (array != NULL) { +roaring_free(array); +} +container->array = (uint16_t *)roaring_malloc(new_capacity * sizeof(uint16_t)); +} + +// if realloc fails, we have container->array == NULL. +} + +/* Copy one container into another. We assume that they are distinct. */ +void array_container_copy(const array_container_t *src, +array_container_t *dst) { +const int32_t cardinality = src->cardinality; +if (cardinality > dst->capacity) { +array_container_grow(dst, cardinality, false); +} + +dst->cardinality = cardinality; +memcpy(dst->array, src->array, cardinality * sizeof(uint16_t)); +} + +void array_container_add_from_range(array_container_t *arr, uint32_t min, +uint32_t max, uint16_t step) { +for (uint32_t value = min; value < max; value += step) { +array_container_append(arr, value); +} +} + +/* Computes the union of array1 and array2 and write the result to arrayout. + * It is assumed that arrayout is distinct from both array1 and array2. + */ +void array_container_union(const array_container_t *array_1, +const array_container_t *array_2, +array_container_t *out) { +const int32_t card_1 = array_1->cardinality, card_2 = array_2->cardinality; +const int32_t max_cardinality = card_1 + card_2; + +if (out->capacity < max_cardinality) { +array_container_grow(out, max_cardinality, false); +} +out->cardinality = (int32_t)fast_union_uint16(array_1->array, card_1, +array_2->array, card_2, out->array); + +} + +/* Computes the difference of array1 and array2 and write the result + * to array out. + * Array out does not need to be distinct from array_1 + */ +void array_container_andnot(const array_container_t *array_1, +const array_container_t *array_2, +array_container_t *out) { +if (out->capacity < array_1->cardinality) +array_container_grow(out, array_1->cardinality, false); +#if CROARING_IS_X64 +if(( croaring_hardware_support() & ROARING_SUPPORTS_AVX2 ) && (out != array_1) && (out != array_2)) { +out->cardinality = +difference_vector16(array_1->array, array_1->cardinality, +array_2->array, array_2->cardinality, out->array); +} else { +out->cardinality = +difference_uint16(array_1->array, array_1->cardinality, array_2->array, +array_2->cardinality, out->array); +} +#else +out->cardinality = +difference_uint16(array_1->array, array_1->cardinality, array_2->array, +array_2->cardinality, out->array); +#endif +} + +/* Computes the symmetric difference of array1 and array2 and write the + * result + * to arrayout. + * It is assumed that arrayout is distinct from both array1 and array2. + */ +void array_container_xor(const array_container_t *array_1, +const array_container_t *array_2, +array_container_t *out) { +const int32_t card_1 = array_1->cardinality, card_2 = array_2->cardinality; +const int32_t max_cardinality = card_1 + card_2; +if (out->capacity < max_cardinality) { +array_container_grow(out, max_cardinality, false); +} + +#if CROARING_IS_X64 +if( croaring_hardware_support() & ROARING_SUPPORTS_AVX2 ) { +out->cardinality = +xor_vector16(array_1->array, array_1->cardinality, array_2->array, +array_2->cardinality, out->array); +} else { +out->cardinality = +xor_uint16(array_1->array, array_1->cardinality, array_2->array, +array_2->cardinality, out->array); +} +#else +out->cardinality = +xor_uint16(array_1->array, array_1->cardinality, array_2->array, +array_2->cardinality, out->array); +#endif +} + +static inline int32_t minimum_int32(int32_t a, int32_t b) { +return (a < b) ? a : b; +} + +/* computes the intersection of array1 and array2 and write the result to + * arrayout. + * It is assumed that arrayout is distinct from both array1 and array2. + * */ +void array_container_intersection(const array_container_t *array1, +const array_container_t *array2, +array_container_t *out) { +int32_t card_1 = array1->cardinality, card_2 = array2->cardinality, +min_card = minimum_int32(card_1, card_2); +const int threshold = 64; // subject to tuning +#if CROARING_IS_X64 +if (out->capacity < min_card) { +array_container_grow(out, min_card + sizeof(__m128i) / sizeof(uint16_t), +false); +} +#else +if (out->capacity < min_card) { +array_container_grow(out, min_card, false); +} +#endif + +if (card_1 * threshold < card_2) { +out->cardinality = intersect_skewed_uint16( +array1->array, card_1, array2->array, card_2, out->array); +} else if (card_2 * threshold < card_1) { +out->cardinality = intersect_skewed_uint16( +array2->array, card_2, array1->array, card_1, out->array); +} else { +#if CROARING_IS_X64 +if( croaring_hardware_support() & ROARING_SUPPORTS_AVX2 ) { +out->cardinality = intersect_vector16( +array1->array, card_1, array2->array, card_2, out->array); +} else { +out->cardinality = intersect_uint16(array1->array, card_1, +array2->array, card_2, out->array); +} +#else +out->cardinality = intersect_uint16(array1->array, card_1, +array2->array, card_2, out->array); +#endif +} +} + +/* computes the size of the intersection of array1 and array2 + * */ +int array_container_intersection_cardinality(const array_container_t *array1, +const array_container_t *array2) { +int32_t card_1 = array1->cardinality, card_2 = array2->cardinality; +const int threshold = 64; // subject to tuning +if (card_1 * threshold < card_2) { +return intersect_skewed_uint16_cardinality(array1->array, card_1, +array2->array, card_2); +} else if (card_2 * threshold < card_1) { +return intersect_skewed_uint16_cardinality(array2->array, card_2, +array1->array, card_1); +} else { +#if CROARING_IS_X64 +if( croaring_hardware_support() & ROARING_SUPPORTS_AVX2 ) { +return intersect_vector16_cardinality(array1->array, card_1, +array2->array, card_2); +} else { +return intersect_uint16_cardinality(array1->array, card_1, +array2->array, card_2); +} +#else +return intersect_uint16_cardinality(array1->array, card_1, +array2->array, card_2); +#endif +} +} + +bool array_container_intersect(const array_container_t *array1, +const array_container_t *array2) { +int32_t card_1 = array1->cardinality, card_2 = array2->cardinality; +const int threshold = 64; // subject to tuning +if (card_1 * threshold < card_2) { +return intersect_skewed_uint16_nonempty( +array1->array, card_1, array2->array, card_2); +} else if (card_2 * threshold < card_1) { +return intersect_skewed_uint16_nonempty( +array2->array, card_2, array1->array, card_1); +} else { +// we do not bother vectorizing +return intersect_uint16_nonempty(array1->array, card_1, +array2->array, card_2); +} +} + +/* computes the intersection of array1 and array2 and write the result to + * array1. + * */ +void array_container_intersection_inplace(array_container_t *src_1, +const array_container_t *src_2) { +int32_t card_1 = src_1->cardinality, card_2 = src_2->cardinality; +const int threshold = 64; // subject to tuning +if (card_1 * threshold < card_2) { +src_1->cardinality = intersect_skewed_uint16( +src_1->array, card_1, src_2->array, card_2, src_1->array); +} else if (card_2 * threshold < card_1) { +src_1->cardinality = intersect_skewed_uint16( +src_2->array, card_2, src_1->array, card_1, src_1->array); +} else { +#if CROARING_IS_X64 +if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) { +src_1->cardinality = intersect_vector16_inplace( +src_1->array, card_1, src_2->array, card_2); +} else { +src_1->cardinality = intersect_uint16( +src_1->array, card_1, src_2->array, card_2, src_1->array); +} +#else +src_1->cardinality = intersect_uint16( +src_1->array, card_1, src_2->array, card_2, src_1->array); +#endif +} +} + +ALLOW_UNALIGNED +int array_container_to_uint32_array(void *vout, const array_container_t *cont, +uint32_t base) { + +#if CROARING_IS_X64 +int support = croaring_hardware_support(); +#if CROARING_COMPILER_SUPPORTS_AVX512 +if (support & ROARING_SUPPORTS_AVX512) { +return avx512_array_container_to_uint32_array(vout, cont->array, cont->cardinality, base); +} +#endif +if (support & ROARING_SUPPORTS_AVX2) { +return array_container_to_uint32_array_vector16(vout, cont->array, cont->cardinality, base); +} +#endif // CROARING_IS_X64 +int outpos = 0; +uint32_t *out = (uint32_t *)vout; +size_t i = 0; +for ( ; i < (size_t)cont->cardinality; ++i) { +const uint32_t val = base + cont->array[i]; +memcpy(out + outpos, &val, +sizeof(uint32_t)); // should be compiled as a MOV on x64 +outpos++; +} +return outpos; +} + +void array_container_printf(const array_container_t *v) { +if (v->cardinality == 0) { +printf("{}"); +return; +} +printf("{"); +printf("%d", v->array[0]); +for (int i = 1; i < v->cardinality; ++i) { +printf(",%d", v->array[i]); +} +printf("}"); +} + +void array_container_printf_as_uint32_array(const array_container_t *v, +uint32_t base) { +if (v->cardinality == 0) { +return; +} +printf("%u", v->array[0] + base); +for (int i = 1; i < v->cardinality; ++i) { +printf(",%u", v->array[i] + base); +} +} + +/* + * Validate the container. Returns true if valid. + */ +bool array_container_validate(const array_container_t *v, const char **reason) { +if (v->capacity < 0) { +*reason = "negative capacity"; +return false; +} +if (v->cardinality < 0) { +*reason = "negative cardinality"; +return false; +} +if (v->cardinality > v->capacity) { +*reason = "cardinality exceeds capacity"; +return false; +} +if (v->cardinality > DEFAULT_MAX_SIZE) { +*reason = "cardinality exceeds DEFAULT_MAX_SIZE"; +return false; +} +if (v->cardinality == 0) { +return true; +} + +if (v->array == NULL) { +*reason = "NULL array pointer"; +return false; +} +uint16_t prev = v->array[0]; +for (int i = 1; i < v->cardinality; ++i) { +if (v->array[i] <= prev) { +*reason = "array elements not strictly increasing"; +return false; +} +prev = v->array[i]; +} + +return true; +} + +/* Compute the number of runs */ +int32_t array_container_number_of_runs(const array_container_t *ac) { +// Can SIMD work here? +int32_t nr_runs = 0; +int32_t prev = -2; +for (const uint16_t *p = ac->array; p != ac->array + ac->cardinality; ++p) { +if (*p != prev + 1) nr_runs++; +prev = *p; +} +return nr_runs; +} + +/** + * Writes the underlying array to buf, outputs how many bytes were written. + * The number of bytes written should be + * array_container_size_in_bytes(container). + * + */ +int32_t array_container_write(const array_container_t *container, char *buf) { +memcpy(buf, container->array, container->cardinality * sizeof(uint16_t)); +return array_container_size_in_bytes(container); +} + +bool array_container_is_subset(const array_container_t *container1, +const array_container_t *container2) { +if (container1->cardinality > container2->cardinality) { +return false; +} +int i1 = 0, i2 = 0; +while (i1 < container1->cardinality && i2 < container2->cardinality) { +if (container1->array[i1] == container2->array[i2]) { +i1++; +i2++; +} else if (container1->array[i1] > container2->array[i2]) { +i2++; +} else { // container1->array[i1] < container2->array[i2] +return false; +} +} +if (i1 == container1->cardinality) { +return true; +} else { +return false; +} +} + +int32_t array_container_read(int32_t cardinality, array_container_t *container, +const char *buf) { +if (container->capacity < cardinality) { +array_container_grow(container, cardinality, false); +} +container->cardinality = cardinality; +memcpy(container->array, buf, container->cardinality * sizeof(uint16_t)); + +return array_container_size_in_bytes(container); +} + +bool array_container_iterate(const array_container_t *cont, uint32_t base, +roaring_iterator iterator, void *ptr) { +for (int i = 0; i < cont->cardinality; i++) +if (!iterator(cont->array[i] + base, ptr)) return false; +return true; +} + +bool array_container_iterate64(const array_container_t *cont, uint32_t base, +roaring_iterator64 iterator, uint64_t high_bits, +void *ptr) { +for (int i = 0; i < cont->cardinality; i++) +if (!iterator(high_bits | (uint64_t)(cont->array[i] + base), ptr)) +return false; +return true; +} + +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace internal { +#endif +/* end file src/containers/array.c */ +/* begin file src/containers/bitset.c */ +/* + * bitset.c + * + */ +#ifndef _POSIX_C_SOURCE +#define _POSIX_C_SOURCE 200809L +#endif +#include +#include +#include +#include + + +#if CROARING_IS_X64 +#ifndef CROARING_COMPILER_SUPPORTS_AVX512 +#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined." +#endif // CROARING_COMPILER_SUPPORTS_AVX512 +#endif + +#ifdef __cplusplus +extern "C" { namespace roaring { namespace internal { +#endif + +extern inline int bitset_container_cardinality(const bitset_container_t *bitset); +extern inline void bitset_container_set(bitset_container_t *bitset, uint16_t pos); +// unused at this time: +//extern inline void bitset_container_unset(bitset_container_t *bitset, uint16_t pos); +extern inline bool bitset_container_get(const bitset_container_t *bitset, +uint16_t pos); +extern inline int32_t bitset_container_serialized_size_in_bytes(void); +extern inline bool bitset_container_add(bitset_container_t *bitset, uint16_t pos); +extern inline bool bitset_container_remove(bitset_container_t *bitset, uint16_t pos); +extern inline bool bitset_container_contains(const bitset_container_t *bitset, +uint16_t pos); + +void bitset_container_clear(bitset_container_t *bitset) { +memset(bitset->words, 0, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); +bitset->cardinality = 0; +} + +void bitset_container_set_all(bitset_container_t *bitset) { +memset(bitset->words, INT64_C(-1), +sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); +bitset->cardinality = (1 << 16); +} + + + +/* Create a new bitset. Return NULL in case of failure. */ +bitset_container_t *bitset_container_create(void) { +bitset_container_t *bitset = +(bitset_container_t *)roaring_malloc(sizeof(bitset_container_t)); + +if (!bitset) { +return NULL; +} + +size_t align_size = 32; +#if CROARING_IS_X64 +int support = croaring_hardware_support(); +if ( support & ROARING_SUPPORTS_AVX512 ) { +// sizeof(__m512i) == 64 +align_size = 64; +} +else { +// sizeof(__m256i) == 32 +align_size = 32; +} +#endif +bitset->words = (uint64_t *)roaring_aligned_malloc( +align_size, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); +if (!bitset->words) { +roaring_free(bitset); +return NULL; +} +bitset_container_clear(bitset); +return bitset; +} + +/* Copy one container into another. We assume that they are distinct. */ +void bitset_container_copy(const bitset_container_t *source, +bitset_container_t *dest) { +dest->cardinality = source->cardinality; +memcpy(dest->words, source->words, +sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); +} + +void bitset_container_add_from_range(bitset_container_t *bitset, uint32_t min, +uint32_t max, uint16_t step) { +if (step == 0) return; // refuse to crash +if ((64 % step) == 0) { // step divides 64 +uint64_t mask = 0; // construct the repeated mask +for (uint32_t value = (min % step); value < 64; value += step) { +mask |= ((uint64_t)1 << value); +} +uint32_t firstword = min / 64; +uint32_t endword = (max - 1) / 64; +bitset->cardinality = (max - min + step - 1) / step; +if (firstword == endword) { +bitset->words[firstword] |= +mask & (((~UINT64_C(0)) << (min % 64)) & +((~UINT64_C(0)) >> ((~max + 1) % 64))); +return; +} +bitset->words[firstword] = mask & ((~UINT64_C(0)) << (min % 64)); +for (uint32_t i = firstword + 1; i < endword; i++) +bitset->words[i] = mask; +bitset->words[endword] = mask & ((~UINT64_C(0)) >> ((~max + 1) % 64)); +} else { +for (uint32_t value = min; value < max; value += step) { +bitset_container_add(bitset, value); +} +} +} + +/* Free memory. */ +void bitset_container_free(bitset_container_t *bitset) { +if(bitset->words != NULL) {// Jon Strabala reports that some tools complain otherwise +roaring_aligned_free(bitset->words); +bitset->words = NULL; // pedantic +} +roaring_free(bitset); +} + +/* duplicate container. */ +bitset_container_t *bitset_container_clone(const bitset_container_t *src) { +bitset_container_t *bitset = +(bitset_container_t *)roaring_malloc(sizeof(bitset_container_t)); + +if (!bitset) { +return NULL; +} + +size_t align_size = 32; +#if CROARING_IS_X64 +if ( croaring_hardware_support() & ROARING_SUPPORTS_AVX512 ) { +// sizeof(__m512i) == 64 +align_size = 64; +} +else { +// sizeof(__m256i) == 32 +align_size = 32; +} +#endif +bitset->words = (uint64_t *)roaring_aligned_malloc( +align_size, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); +if (!bitset->words) { +roaring_free(bitset); +return NULL; +} +bitset->cardinality = src->cardinality; +memcpy(bitset->words, src->words, +sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); +return bitset; +} + +void bitset_container_offset(const bitset_container_t *c, +container_t **loc, container_t **hic, +uint16_t offset) { +bitset_container_t *bc = NULL; +uint64_t val; +uint16_t b, i, end; + +b = offset >> 6; +i = offset % 64; +end = 1024 - b; + +if (loc != NULL) { +bc = bitset_container_create(); +if (i == 0) { +memcpy(bc->words+b, c->words, 8*end); +} else { +bc->words[b] = c->words[0] << i; +for (uint32_t k = 1; k < end; ++k) { +val = c->words[k] << i; +val |= c->words[k-1] >> (64 - i); +bc->words[b+k] = val; +} +} + +bc->cardinality = bitset_container_compute_cardinality(bc); +if (bc->cardinality != 0) { +*loc = bc; +} +if (bc->cardinality == c->cardinality) { +return; +} +} + +if (hic == NULL) { +// Both hic and loc can't be NULL, so bc is never NULL here +if (bc->cardinality == 0) { +bitset_container_free(bc); +} +return; +} + +if (bc == NULL || bc->cardinality != 0) { +bc = bitset_container_create(); +} + +if (i == 0) { +memcpy(bc->words, c->words+end, 8*b); +} else { +for (uint32_t k = end; k < 1024; ++k) { +val = c->words[k] << i; +val |= c->words[k-1] >> (64 - i); +bc->words[k-end] = val; +} +bc->words[b] = c->words[1023] >> (64 - i); +} + +bc->cardinality = bitset_container_compute_cardinality(bc); +if (bc->cardinality == 0) { +bitset_container_free(bc); +return; +} +*hic = bc; +} + +void bitset_container_set_range(bitset_container_t *bitset, uint32_t begin, +uint32_t end) { +bitset_set_range(bitset->words, begin, end); +bitset->cardinality = +bitset_container_compute_cardinality(bitset); // could be smarter +} + + +bool bitset_container_intersect(const bitset_container_t *src_1, +const bitset_container_t *src_2) { +// could vectorize, but this is probably already quite fast in practice +const uint64_t * __restrict__ words_1 = src_1->words; +const uint64_t * __restrict__ words_2 = src_2->words; +for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i ++) { +if((words_1[i] & words_2[i]) != 0) return true; +} +return false; +} + + +#if CROARING_IS_X64 +#ifndef WORDS_IN_AVX2_REG +#define WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t) +#endif +#ifndef WORDS_IN_AVX512_REG +#define WORDS_IN_AVX512_REG sizeof(__m512i) / sizeof(uint64_t) +#endif +/* Get the number of bits set (force computation) */ +static inline int _scalar_bitset_container_compute_cardinality(const bitset_container_t *bitset) { +const uint64_t *words = bitset->words; +int32_t sum = 0; +for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 4) { +sum += roaring_hamming(words[i]); +sum += roaring_hamming(words[i + 1]); +sum += roaring_hamming(words[i + 2]); +sum += roaring_hamming(words[i + 3]); +} +return sum; +} +/* Get the number of bits set (force computation) */ +int bitset_container_compute_cardinality(const bitset_container_t *bitset) { +int support = croaring_hardware_support(); +#if CROARING_COMPILER_SUPPORTS_AVX512 +if( support & ROARING_SUPPORTS_AVX512 ) { +return (int) avx512_vpopcount( +(const __m512i *)bitset->words, +BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX512_REG)); +} else +#endif // CROARING_COMPILER_SUPPORTS_AVX512 +if( support & ROARING_SUPPORTS_AVX2 ) { +return (int) avx2_harley_seal_popcount256( +(const __m256i *)bitset->words, +BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG)); +} else { +return _scalar_bitset_container_compute_cardinality(bitset); + +} +} + +#elif defined(CROARING_USENEON) +int bitset_container_compute_cardinality(const bitset_container_t *bitset) { +uint16x8_t n0 = vdupq_n_u16(0); +uint16x8_t n1 = vdupq_n_u16(0); +uint16x8_t n2 = vdupq_n_u16(0); +uint16x8_t n3 = vdupq_n_u16(0); +for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { +uint64x2_t c0 = vld1q_u64(&bitset->words[i + 0]); +n0 = vaddq_u16(n0, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c0)))); +uint64x2_t c1 = vld1q_u64(&bitset->words[i + 2]); +n1 = vaddq_u16(n1, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c1)))); +uint64x2_t c2 = vld1q_u64(&bitset->words[i + 4]); +n2 = vaddq_u16(n2, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c2)))); +uint64x2_t c3 = vld1q_u64(&bitset->words[i + 6]); +n3 = vaddq_u16(n3, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c3)))); +} +uint64x2_t n = vdupq_n_u64(0); +n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n0))); +n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n1))); +n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n2))); +n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n3))); +return vgetq_lane_u64(n, 0) + vgetq_lane_u64(n, 1); +} + +#else // CROARING_IS_X64 + +/* Get the number of bits set (force computation) */ +int bitset_container_compute_cardinality(const bitset_container_t *bitset) { +const uint64_t *words = bitset->words; +int32_t sum = 0; +for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 4) { +sum += roaring_hamming(words[i]); +sum += roaring_hamming(words[i + 1]); +sum += roaring_hamming(words[i + 2]); +sum += roaring_hamming(words[i + 3]); +} +return sum; +} + +#endif // CROARING_IS_X64 + +#if CROARING_IS_X64 + +#define BITSET_CONTAINER_FN_REPEAT 8 +#ifndef WORDS_IN_AVX512_REG +#define WORDS_IN_AVX512_REG sizeof(__m512i) / sizeof(uint64_t) +#endif // WORDS_IN_AVX512_REG + +/* Computes a binary operation (eg union) on bitset1 and bitset2 and write the + result to bitsetout */ +// clang-format off +#define AVX512_BITSET_CONTAINER_FN1(before, opname, opsymbol, avx_intrinsic, \ + neon_intrinsic, after) \ + static inline int _avx512_bitset_container_##opname##_nocard( \ + const bitset_container_t *src_1, const bitset_container_t *src_2, \ + bitset_container_t *dst) { \ + const uint8_t * __restrict__ words_1 = (const uint8_t *)src_1->words; \ + const uint8_t * __restrict__ words_2 = (const uint8_t *)src_2->words; \ + /* not using the blocking optimization for some reason*/ \ + uint8_t *out = (uint8_t*)dst->words; \ + const int innerloop = 8; \ + for (size_t i = 0; \ + i < BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX512_REG); \ + i+=innerloop) { \ + __m512i A1, A2, AO; \ + A1 = _mm512_loadu_si512((const __m512i *)(words_1)); \ + A2 = _mm512_loadu_si512((const __m512i *)(words_2)); \ + AO = avx_intrinsic(A2, A1); \ + _mm512_storeu_si512((__m512i *)out, AO); \ + A1 = _mm512_loadu_si512((const __m512i *)(words_1 + 64)); \ + A2 = _mm512_loadu_si512((const __m512i *)(words_2 + 64)); \ + AO = avx_intrinsic(A2, A1); \ + _mm512_storeu_si512((__m512i *)(out+64), AO); \ + A1 = _mm512_loadu_si512((const __m512i *)(words_1 + 128)); \ + A2 = _mm512_loadu_si512((const __m512i *)(words_2 + 128)); \ + AO = avx_intrinsic(A2, A1); \ + _mm512_storeu_si512((__m512i *)(out+128), AO); \ + A1 = _mm512_loadu_si512((const __m512i *)(words_1 + 192)); \ + A2 = _mm512_loadu_si512((const __m512i *)(words_2 + 192)); \ + AO = avx_intrinsic(A2, A1); \ + _mm512_storeu_si512((__m512i *)(out+192), AO); \ + A1 = _mm512_loadu_si512((const __m512i *)(words_1 + 256)); \ + A2 = _mm512_loadu_si512((const __m512i *)(words_2 + 256)); \ + AO = avx_intrinsic(A2, A1); \ + _mm512_storeu_si512((__m512i *)(out+256), AO); \ + A1 = _mm512_loadu_si512((const __m512i *)(words_1 + 320)); \ + A2 = _mm512_loadu_si512((const __m512i *)(words_2 + 320)); \ + AO = avx_intrinsic(A2, A1); \ + _mm512_storeu_si512((__m512i *)(out+320), AO); \ + A1 = _mm512_loadu_si512((const __m512i *)(words_1 + 384)); \ + A2 = _mm512_loadu_si512((const __m512i *)(words_2 + 384)); \ + AO = avx_intrinsic(A2, A1); \ + _mm512_storeu_si512((__m512i *)(out+384), AO); \ + A1 = _mm512_loadu_si512((const __m512i *)(words_1 + 448)); \ + A2 = _mm512_loadu_si512((const __m512i *)(words_2 + 448)); \ + AO = avx_intrinsic(A2, A1); \ + _mm512_storeu_si512((__m512i *)(out+448), AO); \ + out+=512; \ + words_1 += 512; \ + words_2 += 512; \ + } \ + dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \ + return dst->cardinality; \ + } + +#define AVX512_BITSET_CONTAINER_FN2(before, opname, opsymbol, avx_intrinsic, \ + neon_intrinsic, after) \ + /* next, a version that updates cardinality*/ \ + static inline int _avx512_bitset_container_##opname(const bitset_container_t *src_1, \ + const bitset_container_t *src_2, \ + bitset_container_t *dst) { \ + const __m512i * __restrict__ words_1 = (const __m512i *) src_1->words; \ + const __m512i * __restrict__ words_2 = (const __m512i *) src_2->words; \ + __m512i *out = (__m512i *) dst->words; \ + dst->cardinality = (int32_t)avx512_harley_seal_popcount512andstore_##opname(words_2,\ + words_1, out,BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX512_REG)); \ + return dst->cardinality; \ + } + +#define AVX512_BITSET_CONTAINER_FN3(before, opname, opsymbol, avx_intrinsic, \ + neon_intrinsic, after) \ + /* next, a version that just computes the cardinality*/ \ + static inline int _avx512_bitset_container_##opname##_justcard( \ + const bitset_container_t *src_1, const bitset_container_t *src_2) { \ + const __m512i * __restrict__ data1 = (const __m512i *) src_1->words; \ + const __m512i * __restrict__ data2 = (const __m512i *) src_2->words; \ + return (int)avx512_harley_seal_popcount512_##opname(data2, \ + data1, BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX512_REG)); \ + } + + +// we duplicate the function because other containers use the "or" term, makes API more consistent +#if CROARING_COMPILER_SUPPORTS_AVX512 +CROARING_TARGET_AVX512 +AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, or, |, _mm512_or_si512, vorrq_u64, CROARING_UNTARGET_AVX512) +CROARING_UNTARGET_AVX512 +CROARING_TARGET_AVX512 +AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, union, |, _mm512_or_si512, vorrq_u64, CROARING_UNTARGET_AVX512) +CROARING_UNTARGET_AVX512 + +// we duplicate the function because other containers use the "intersection" term, makes API more consistent +CROARING_TARGET_AVX512 +AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, and, &, _mm512_and_si512, vandq_u64, CROARING_UNTARGET_AVX512) +CROARING_UNTARGET_AVX512 +CROARING_TARGET_AVX512 +AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, intersection, &, _mm512_and_si512, vandq_u64, CROARING_UNTARGET_AVX512) +CROARING_UNTARGET_AVX512 + +CROARING_TARGET_AVX512 +AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, xor, ^, _mm512_xor_si512, veorq_u64, CROARING_UNTARGET_AVX512) +CROARING_UNTARGET_AVX512 +CROARING_TARGET_AVX512 +AVX512_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX512, andnot, &~, _mm512_andnot_si512, vbicq_u64, CROARING_UNTARGET_AVX512) +CROARING_UNTARGET_AVX512 + +// we duplicate the function because other containers use the "or" term, makes API more consistent +CROARING_TARGET_AVX512 +AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, or, |, _mm512_or_si512, vorrq_u64, CROARING_UNTARGET_AVX512) +CROARING_UNTARGET_AVX512 +CROARING_TARGET_AVX512 +AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, union, |, _mm512_or_si512, vorrq_u64, CROARING_UNTARGET_AVX512) +CROARING_UNTARGET_AVX512 + +// we duplicate the function because other containers use the "intersection" term, makes API more consistent +CROARING_TARGET_AVX512 +AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, and, &, _mm512_and_si512, vandq_u64, CROARING_UNTARGET_AVX512) +CROARING_UNTARGET_AVX512 +CROARING_TARGET_AVX512 +AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, intersection, &, _mm512_and_si512, vandq_u64, CROARING_UNTARGET_AVX512) +CROARING_UNTARGET_AVX512 + +CROARING_TARGET_AVX512 +AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, xor, ^, _mm512_xor_si512, veorq_u64, CROARING_UNTARGET_AVX512) +CROARING_UNTARGET_AVX512 +CROARING_TARGET_AVX512 +AVX512_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX512, andnot, &~, _mm512_andnot_si512, vbicq_u64, CROARING_UNTARGET_AVX512) +CROARING_UNTARGET_AVX512 + +// we duplicate the function because other containers use the "or" term, makes API more consistent +CROARING_TARGET_AVX512 +AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, or, |, _mm512_or_si512, vorrq_u64, CROARING_UNTARGET_AVX512) +CROARING_UNTARGET_AVX512 +CROARING_TARGET_AVX512 +AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, union, |, _mm512_or_si512, vorrq_u64, CROARING_UNTARGET_AVX512) +CROARING_UNTARGET_AVX512 + +// we duplicate the function because other containers use the "intersection" term, makes API more consistent +CROARING_TARGET_AVX512 +AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, and, &, _mm512_and_si512, vandq_u64, CROARING_UNTARGET_AVX512) +CROARING_UNTARGET_AVX512 +CROARING_TARGET_AVX512 +AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, intersection, &, _mm512_and_si512, vandq_u64, CROARING_UNTARGET_AVX512) +CROARING_UNTARGET_AVX512 + +CROARING_TARGET_AVX512 +AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, xor, ^, _mm512_xor_si512, veorq_u64, CROARING_UNTARGET_AVX512) +CROARING_UNTARGET_AVX512 +CROARING_TARGET_AVX512 +AVX512_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX512, andnot, &~, _mm512_andnot_si512, vbicq_u64, CROARING_UNTARGET_AVX512) +CROARING_UNTARGET_AVX512 +#endif // CROARING_COMPILER_SUPPORTS_AVX512 + +#ifndef WORDS_IN_AVX2_REG #define WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t) +#endif // WORDS_IN_AVX2_REG +#define LOOP_SIZE \ + BITSET_CONTAINER_SIZE_IN_WORDS / \ + ((WORDS_IN_AVX2_REG)*BITSET_CONTAINER_FN_REPEAT) + +/* Computes a binary operation (eg union) on bitset1 and bitset2 and write the + result to bitsetout */ +// clang-format off +#define AVX_BITSET_CONTAINER_FN1(before, opname, opsymbol, avx_intrinsic, \ + neon_intrinsic, after) \ + static inline int _avx2_bitset_container_##opname##_nocard( \ + const bitset_container_t *src_1, const bitset_container_t *src_2, \ + bitset_container_t *dst) { \ + const uint8_t *__restrict__ words_1 = (const uint8_t *)src_1->words; \ + const uint8_t *__restrict__ words_2 = (const uint8_t *)src_2->words; \ + /* not using the blocking optimization for some reason*/ \ + uint8_t *out = (uint8_t *)dst->words; \ + const int innerloop = 8; \ + for (size_t i = 0; \ + i < BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG); \ + i += innerloop) { \ + __m256i A1, A2, AO; \ + A1 = _mm256_lddqu_si256((const __m256i *)(words_1)); \ + A2 = _mm256_lddqu_si256((const __m256i *)(words_2)); \ + AO = avx_intrinsic(A2, A1); \ + _mm256_storeu_si256((__m256i *)out, AO); \ + A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 32)); \ + A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 32)); \ + AO = avx_intrinsic(A2, A1); \ + _mm256_storeu_si256((__m256i *)(out + 32), AO); \ + A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 64)); \ + A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 64)); \ + AO = avx_intrinsic(A2, A1); \ + _mm256_storeu_si256((__m256i *)(out + 64), AO); \ + A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 96)); \ + A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 96)); \ + AO = avx_intrinsic(A2, A1); \ + _mm256_storeu_si256((__m256i *)(out + 96), AO); \ + A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 128)); \ + A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 128)); \ + AO = avx_intrinsic(A2, A1); \ + _mm256_storeu_si256((__m256i *)(out + 128), AO); \ + A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 160)); \ + A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 160)); \ + AO = avx_intrinsic(A2, A1); \ + _mm256_storeu_si256((__m256i *)(out + 160), AO); \ + A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 192)); \ + A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 192)); \ + AO = avx_intrinsic(A2, A1); \ + _mm256_storeu_si256((__m256i *)(out + 192), AO); \ + A1 = _mm256_lddqu_si256((const __m256i *)(words_1 + 224)); \ + A2 = _mm256_lddqu_si256((const __m256i *)(words_2 + 224)); \ + AO = avx_intrinsic(A2, A1); \ + _mm256_storeu_si256((__m256i *)(out + 224), AO); \ + out += 256; \ + words_1 += 256; \ + words_2 += 256; \ + } \ + dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \ + return dst->cardinality; \ + } + +#define AVX_BITSET_CONTAINER_FN2(before, opname, opsymbol, avx_intrinsic, \ + neon_intrinsic, after) \ + /* next, a version that updates cardinality*/ \ + static inline int _avx2_bitset_container_##opname(const bitset_container_t *src_1, \ + const bitset_container_t *src_2, \ + bitset_container_t *dst) { \ + const __m256i *__restrict__ words_1 = (const __m256i *)src_1->words; \ + const __m256i *__restrict__ words_2 = (const __m256i *)src_2->words; \ + __m256i *out = (__m256i *)dst->words; \ + dst->cardinality = (int32_t)avx2_harley_seal_popcount256andstore_##opname( \ + words_2, words_1, out, \ + BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG)); \ + return dst->cardinality; \ + } \ + +#define AVX_BITSET_CONTAINER_FN3(before, opname, opsymbol, avx_intrinsic, \ + neon_intrinsic, after) \ + /* next, a version that just computes the cardinality*/ \ + static inline int _avx2_bitset_container_##opname##_justcard( \ + const bitset_container_t *src_1, const bitset_container_t *src_2) { \ + const __m256i *__restrict__ data1 = (const __m256i *)src_1->words; \ + const __m256i *__restrict__ data2 = (const __m256i *)src_2->words; \ + return (int)avx2_harley_seal_popcount256_##opname( \ + data2, data1, BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG)); \ + } + + +// we duplicate the function because other containers use the "or" term, makes API more consistent +CROARING_TARGET_AVX2 +AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, or, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_AVX2) +CROARING_UNTARGET_AVX2 +CROARING_TARGET_AVX2 +AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, union, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_AVX2) +CROARING_UNTARGET_AVX2 + +// we duplicate the function because other containers use the "intersection" term, makes API more consistent +CROARING_TARGET_AVX2 +AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, and, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_AVX2) +CROARING_UNTARGET_AVX2 +CROARING_TARGET_AVX2 +AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, intersection, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_AVX2) +CROARING_UNTARGET_AVX2 + +CROARING_TARGET_AVX2 +AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, xor, ^, _mm256_xor_si256, veorq_u64, CROARING_UNTARGET_AVX2) +CROARING_UNTARGET_AVX2 +CROARING_TARGET_AVX2 +AVX_BITSET_CONTAINER_FN1(CROARING_TARGET_AVX2, andnot, &~, _mm256_andnot_si256, vbicq_u64, CROARING_UNTARGET_AVX2) +CROARING_UNTARGET_AVX2 + +// we duplicate the function because other containers use the "or" term, makes API more consistent +CROARING_TARGET_AVX2 +AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, or, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_AVX2) +CROARING_UNTARGET_AVX2 +CROARING_TARGET_AVX2 +AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, union, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_AVX2) +CROARING_UNTARGET_AVX2 + +// we duplicate the function because other containers use the "intersection" term, makes API more consistent +CROARING_TARGET_AVX2 +AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, and, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_AVX2) +CROARING_UNTARGET_AVX2 +CROARING_TARGET_AVX2 +AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, intersection, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_AVX2) +CROARING_UNTARGET_AVX2 + +CROARING_TARGET_AVX2 +AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, xor, ^, _mm256_xor_si256, veorq_u64, CROARING_UNTARGET_AVX2) +CROARING_UNTARGET_AVX2 +CROARING_TARGET_AVX2 +AVX_BITSET_CONTAINER_FN2(CROARING_TARGET_AVX2, andnot, &~, _mm256_andnot_si256, vbicq_u64, CROARING_UNTARGET_AVX2) +CROARING_UNTARGET_AVX2 + +// we duplicate the function because other containers use the "or" term, makes API more consistent +CROARING_TARGET_AVX2 +AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, or, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_AVX2) +CROARING_UNTARGET_AVX2 +CROARING_TARGET_AVX2 +AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, union, |, _mm256_or_si256, vorrq_u64, CROARING_UNTARGET_AVX2) +CROARING_UNTARGET_AVX2 + +// we duplicate the function because other containers use the "intersection" term, makes API more consistent +CROARING_TARGET_AVX2 +AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, and, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_AVX2) +CROARING_UNTARGET_AVX2 +CROARING_TARGET_AVX2 +AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, intersection, &, _mm256_and_si256, vandq_u64, CROARING_UNTARGET_AVX2) +CROARING_UNTARGET_AVX2 + +CROARING_TARGET_AVX2 +AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, xor, ^, _mm256_xor_si256, veorq_u64, CROARING_UNTARGET_AVX2) +CROARING_UNTARGET_AVX2 +CROARING_TARGET_AVX2 +AVX_BITSET_CONTAINER_FN3(CROARING_TARGET_AVX2, andnot, &~, _mm256_andnot_si256, vbicq_u64, CROARING_UNTARGET_AVX2) +CROARING_UNTARGET_AVX2 + + +#define SCALAR_BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, \ + neon_intrinsic) \ + static inline int _scalar_bitset_container_##opname(const bitset_container_t *src_1, \ + const bitset_container_t *src_2, \ + bitset_container_t *dst) { \ + const uint64_t *__restrict__ words_1 = src_1->words; \ + const uint64_t *__restrict__ words_2 = src_2->words; \ + uint64_t *out = dst->words; \ + int32_t sum = 0; \ + for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \ + const uint64_t word_1 = (words_1[i])opsymbol(words_2[i]), \ + word_2 = (words_1[i + 1]) opsymbol(words_2[i + 1]); \ + out[i] = word_1; \ + out[i + 1] = word_2; \ + sum += roaring_hamming(word_1); \ + sum += roaring_hamming(word_2); \ + } \ + dst->cardinality = sum; \ + return dst->cardinality; \ + } \ + static inline int _scalar_bitset_container_##opname##_nocard( \ + const bitset_container_t *src_1, const bitset_container_t *src_2, \ + bitset_container_t *dst) { \ + const uint64_t *__restrict__ words_1 = src_1->words; \ + const uint64_t *__restrict__ words_2 = src_2->words; \ + uint64_t *out = dst->words; \ + for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i++) { \ + out[i] = (words_1[i])opsymbol(words_2[i]); \ + } \ + dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \ + return dst->cardinality; \ + } \ + static inline int _scalar_bitset_container_##opname##_justcard( \ + const bitset_container_t *src_1, const bitset_container_t *src_2) { \ + const uint64_t *__restrict__ words_1 = src_1->words; \ + const uint64_t *__restrict__ words_2 = src_2->words; \ + int32_t sum = 0; \ + for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \ + const uint64_t word_1 = (words_1[i])opsymbol(words_2[i]), \ + word_2 = (words_1[i + 1]) opsymbol(words_2[i + 1]); \ + sum += roaring_hamming(word_1); \ + sum += roaring_hamming(word_2); \ + } \ + return sum; \ + } + +// we duplicate the function because other containers use the "or" term, makes API more consistent +SCALAR_BITSET_CONTAINER_FN(or, |, _mm256_or_si256, vorrq_u64) +SCALAR_BITSET_CONTAINER_FN(union, |, _mm256_or_si256, vorrq_u64) + +// we duplicate the function because other containers use the "intersection" term, makes API more consistent +SCALAR_BITSET_CONTAINER_FN(and, &, _mm256_and_si256, vandq_u64) +SCALAR_BITSET_CONTAINER_FN(intersection, &, _mm256_and_si256, vandq_u64) + +SCALAR_BITSET_CONTAINER_FN(xor, ^, _mm256_xor_si256, veorq_u64) +SCALAR_BITSET_CONTAINER_FN(andnot, &~, _mm256_andnot_si256, vbicq_u64) + +#if CROARING_COMPILER_SUPPORTS_AVX512 +#define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic) \ + int bitset_container_##opname(const bitset_container_t *src_1, \ + const bitset_container_t *src_2, \ + bitset_container_t *dst) { \ + int support = croaring_hardware_support(); \ + if ( support & ROARING_SUPPORTS_AVX512 ) { \ + return _avx512_bitset_container_##opname(src_1, src_2, dst); \ + } \ + else if ( support & ROARING_SUPPORTS_AVX2 ) { \ + return _avx2_bitset_container_##opname(src_1, src_2, dst); \ + } else { \ + return _scalar_bitset_container_##opname(src_1, src_2, dst); \ + } \ + } \ + int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \ + const bitset_container_t *src_2, \ + bitset_container_t *dst) { \ + int support = croaring_hardware_support(); \ + if ( support & ROARING_SUPPORTS_AVX512 ) { \ + return _avx512_bitset_container_##opname##_nocard(src_1, src_2, dst); \ + } \ + else if ( support & ROARING_SUPPORTS_AVX2 ) { \ + return _avx2_bitset_container_##opname##_nocard(src_1, src_2, dst); \ + } else { \ + return _scalar_bitset_container_##opname##_nocard(src_1, src_2, dst); \ + } \ + } \ + int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \ + const bitset_container_t *src_2) { \ + int support = croaring_hardware_support(); \ + if ( support & ROARING_SUPPORTS_AVX512 ) { \ + return _avx512_bitset_container_##opname##_justcard(src_1, src_2); \ + } \ + else if ( support & ROARING_SUPPORTS_AVX2 ) { \ + return _avx2_bitset_container_##opname##_justcard(src_1, src_2); \ + } else { \ + return _scalar_bitset_container_##opname##_justcard(src_1, src_2); \ + } \ + } + +#else // CROARING_COMPILER_SUPPORTS_AVX512 + + +#define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic) \ + int bitset_container_##opname(const bitset_container_t *src_1, \ + const bitset_container_t *src_2, \ + bitset_container_t *dst) { \ + if ( croaring_hardware_support() & ROARING_SUPPORTS_AVX2 ) { \ + return _avx2_bitset_container_##opname(src_1, src_2, dst); \ + } else { \ + return _scalar_bitset_container_##opname(src_1, src_2, dst); \ + } \ + } \ + int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \ + const bitset_container_t *src_2, \ + bitset_container_t *dst) { \ + if ( croaring_hardware_support() & ROARING_SUPPORTS_AVX2 ) { \ + return _avx2_bitset_container_##opname##_nocard(src_1, src_2, dst); \ + } else { \ + return _scalar_bitset_container_##opname##_nocard(src_1, src_2, dst); \ + } \ + } \ + int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \ + const bitset_container_t *src_2) { \ + if ( croaring_hardware_support() & ROARING_SUPPORTS_AVX2 ) { \ + return _avx2_bitset_container_##opname##_justcard(src_1, src_2); \ + } else { \ + return _scalar_bitset_container_##opname##_justcard(src_1, src_2); \ + } \ + } + +#endif // CROARING_COMPILER_SUPPORTS_AVX512 + +#elif defined(CROARING_USENEON) + +#define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic) \ +int bitset_container_##opname(const bitset_container_t *src_1, \ + const bitset_container_t *src_2, \ + bitset_container_t *dst) { \ + const uint64_t * __restrict__ words_1 = src_1->words; \ + const uint64_t * __restrict__ words_2 = src_2->words; \ + uint64_t *out = dst->words; \ + uint16x8_t n0 = vdupq_n_u16(0); \ + uint16x8_t n1 = vdupq_n_u16(0); \ + uint16x8_t n2 = vdupq_n_u16(0); \ + uint16x8_t n3 = vdupq_n_u16(0); \ + for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { \ + uint64x2_t c0 = neon_intrinsic(vld1q_u64(&words_1[i + 0]), \ + vld1q_u64(&words_2[i + 0])); \ + n0 = vaddq_u16(n0, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c0)))); \ + vst1q_u64(&out[i + 0], c0); \ + uint64x2_t c1 = neon_intrinsic(vld1q_u64(&words_1[i + 2]), \ + vld1q_u64(&words_2[i + 2])); \ + n1 = vaddq_u16(n1, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c1)))); \ + vst1q_u64(&out[i + 2], c1); \ + uint64x2_t c2 = neon_intrinsic(vld1q_u64(&words_1[i + 4]), \ + vld1q_u64(&words_2[i + 4])); \ + n2 = vaddq_u16(n2, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c2)))); \ + vst1q_u64(&out[i + 4], c2); \ + uint64x2_t c3 = neon_intrinsic(vld1q_u64(&words_1[i + 6]), \ + vld1q_u64(&words_2[i + 6])); \ + n3 = vaddq_u16(n3, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c3)))); \ + vst1q_u64(&out[i + 6], c3); \ + } \ + uint64x2_t n = vdupq_n_u64(0); \ + n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n0))); \ + n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n1))); \ + n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n2))); \ + n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n3))); \ + dst->cardinality = vgetq_lane_u64(n, 0) + vgetq_lane_u64(n, 1); \ + return dst->cardinality; \ +} \ +int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \ + const bitset_container_t *src_2, \ + bitset_container_t *dst) { \ + const uint64_t * __restrict__ words_1 = src_1->words; \ + const uint64_t * __restrict__ words_2 = src_2->words; \ + uint64_t *out = dst->words; \ + for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { \ + vst1q_u64(&out[i + 0], neon_intrinsic(vld1q_u64(&words_1[i + 0]), \ + vld1q_u64(&words_2[i + 0]))); \ + vst1q_u64(&out[i + 2], neon_intrinsic(vld1q_u64(&words_1[i + 2]), \ + vld1q_u64(&words_2[i + 2]))); \ + vst1q_u64(&out[i + 4], neon_intrinsic(vld1q_u64(&words_1[i + 4]), \ + vld1q_u64(&words_2[i + 4]))); \ + vst1q_u64(&out[i + 6], neon_intrinsic(vld1q_u64(&words_1[i + 6]), \ + vld1q_u64(&words_2[i + 6]))); \ + } \ + dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \ + return dst->cardinality; \ +} \ +int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \ + const bitset_container_t *src_2) { \ + const uint64_t * __restrict__ words_1 = src_1->words; \ + const uint64_t * __restrict__ words_2 = src_2->words; \ + uint16x8_t n0 = vdupq_n_u16(0); \ + uint16x8_t n1 = vdupq_n_u16(0); \ + uint16x8_t n2 = vdupq_n_u16(0); \ + uint16x8_t n3 = vdupq_n_u16(0); \ + for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { \ + uint64x2_t c0 = neon_intrinsic(vld1q_u64(&words_1[i + 0]), \ + vld1q_u64(&words_2[i + 0])); \ + n0 = vaddq_u16(n0, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c0)))); \ + uint64x2_t c1 = neon_intrinsic(vld1q_u64(&words_1[i + 2]), \ + vld1q_u64(&words_2[i + 2])); \ + n1 = vaddq_u16(n1, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c1)))); \ + uint64x2_t c2 = neon_intrinsic(vld1q_u64(&words_1[i + 4]), \ + vld1q_u64(&words_2[i + 4])); \ + n2 = vaddq_u16(n2, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c2)))); \ + uint64x2_t c3 = neon_intrinsic(vld1q_u64(&words_1[i + 6]), \ + vld1q_u64(&words_2[i + 6])); \ + n3 = vaddq_u16(n3, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c3)))); \ + } \ + uint64x2_t n = vdupq_n_u64(0); \ + n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n0))); \ + n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n1))); \ + n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n2))); \ + n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n3))); \ + return vgetq_lane_u64(n, 0) + vgetq_lane_u64(n, 1); \ +} + +#else + +#define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic) \ +int bitset_container_##opname(const bitset_container_t *src_1, \ + const bitset_container_t *src_2, \ + bitset_container_t *dst) { \ + const uint64_t * __restrict__ words_1 = src_1->words; \ + const uint64_t * __restrict__ words_2 = src_2->words; \ + uint64_t *out = dst->words; \ + int32_t sum = 0; \ + for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \ + const uint64_t word_1 = (words_1[i])opsymbol(words_2[i]), \ + word_2 = (words_1[i + 1])opsymbol(words_2[i + 1]); \ + out[i] = word_1; \ + out[i + 1] = word_2; \ + sum += roaring_hamming(word_1); \ + sum += roaring_hamming(word_2); \ + } \ + dst->cardinality = sum; \ + return dst->cardinality; \ +} \ +int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \ + const bitset_container_t *src_2, \ + bitset_container_t *dst) { \ + const uint64_t * __restrict__ words_1 = src_1->words; \ + const uint64_t * __restrict__ words_2 = src_2->words; \ + uint64_t *out = dst->words; \ + for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i++) { \ + out[i] = (words_1[i])opsymbol(words_2[i]); \ + } \ + dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \ + return dst->cardinality; \ +} \ +int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \ + const bitset_container_t *src_2) { \ + printf("A1\n"); const uint64_t * __restrict__ words_1 = src_1->words; \ + const uint64_t * __restrict__ words_2 = src_2->words; \ + int32_t sum = 0; \ + for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \ + const uint64_t word_1 = (words_1[i])opsymbol(words_2[i]), \ + word_2 = (words_1[i + 1])opsymbol(words_2[i + 1]); \ + sum += roaring_hamming(word_1); \ + sum += roaring_hamming(word_2); \ + } \ + return sum; \ +} + +#endif // CROARING_IS_X64 + +// we duplicate the function because other containers use the "or" term, makes API more consistent +BITSET_CONTAINER_FN(or, |, _mm256_or_si256, vorrq_u64) +BITSET_CONTAINER_FN(union, |, _mm256_or_si256, vorrq_u64) + +// we duplicate the function because other containers use the "intersection" term, makes API more consistent +BITSET_CONTAINER_FN(and, &, _mm256_and_si256, vandq_u64) +BITSET_CONTAINER_FN(intersection, &, _mm256_and_si256, vandq_u64) + +BITSET_CONTAINER_FN(xor, ^, _mm256_xor_si256, veorq_u64) +BITSET_CONTAINER_FN(andnot, &~, _mm256_andnot_si256, vbicq_u64) +// clang-format On + + +ALLOW_UNALIGNED +int bitset_container_to_uint32_array( +uint32_t *out, +const bitset_container_t *bc, +uint32_t base +){ +#if CROARING_IS_X64 +int support = croaring_hardware_support(); +#if CROARING_COMPILER_SUPPORTS_AVX512 +if(( support & ROARING_SUPPORTS_AVX512 ) && (bc->cardinality >= 8192)) // heuristic +return (int) bitset_extract_setbits_avx512(bc->words, +BITSET_CONTAINER_SIZE_IN_WORDS, out, bc->cardinality, base); +else +#endif +if(( support & ROARING_SUPPORTS_AVX2 ) && (bc->cardinality >= 8192)) // heuristic +return (int) bitset_extract_setbits_avx2(bc->words, +BITSET_CONTAINER_SIZE_IN_WORDS, out, bc->cardinality, base); +else +return (int) bitset_extract_setbits(bc->words, +BITSET_CONTAINER_SIZE_IN_WORDS, out, base); +#else +return (int) bitset_extract_setbits(bc->words, +BITSET_CONTAINER_SIZE_IN_WORDS, out, base); +#endif +} + +/* + * Print this container using printf (useful for debugging). + */ +void bitset_container_printf(const bitset_container_t * v) { +printf("{"); +uint32_t base = 0; +bool iamfirst = true;// TODO: rework so that this is not necessary yet still readable +for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) { +uint64_t w = v->words[i]; +while (w != 0) { +uint64_t t = w & (~w + 1); +int r = roaring_trailing_zeroes(w); +if(iamfirst) {// predicted to be false +printf("%u",base + r); +iamfirst = false; +} else { +printf(",%u",base + r); +} +w ^= t; +} +base += 64; +} +printf("}"); +} + + +/* + * Print this container using printf as a comma-separated list of 32-bit integers starting at base. + */ +void bitset_container_printf_as_uint32_array(const bitset_container_t * v, uint32_t base) { +bool iamfirst = true;// TODO: rework so that this is not necessary yet still readable +for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) { +uint64_t w = v->words[i]; +while (w != 0) { +uint64_t t = w & (~w + 1); +int r = roaring_trailing_zeroes(w); +if(iamfirst) {// predicted to be false +printf("%u", r + base); +iamfirst = false; +} else { +printf(",%u",r + base); +} +w ^= t; +} +base += 64; +} +} + +/* + * Validate the container. Returns true if valid. + */ +bool bitset_container_validate(const bitset_container_t *v, const char **reason) { +if (v->words == NULL) { +*reason = "words is NULL"; +return false; +} +if (v->cardinality != bitset_container_compute_cardinality(v)) { +*reason = "cardinality is incorrect"; +return false; +} +// Attempt to forcibly load the first and last words, hopefully causing +// a segfault or an address sanitizer error if words is not allocated. +volatile uint64_t *words = v->words; +(void) words[0]; +(void) words[BITSET_CONTAINER_SIZE_IN_WORDS - 1]; +return true; +} + + +// TODO: use the fast lower bound, also +int bitset_container_number_of_runs(bitset_container_t *bc) { +int num_runs = 0; +uint64_t next_word = bc->words[0]; + +for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS-1; ++i) { +uint64_t word = next_word; +next_word = bc->words[i+1]; +num_runs += roaring_hamming((~word) & (word << 1)) + ( (word >> 63) & ~next_word); +} + +uint64_t word = next_word; +num_runs += roaring_hamming((~word) & (word << 1)); +if((word & 0x8000000000000000ULL) != 0) +num_runs++; +return num_runs; +} + + +int32_t bitset_container_write(const bitset_container_t *container, +char *buf) { +memcpy(buf, container->words, BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t)); +return bitset_container_size_in_bytes(container); +} + + +int32_t bitset_container_read(int32_t cardinality, bitset_container_t *container, +const char *buf) { +container->cardinality = cardinality; +memcpy(container->words, buf, BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t)); +return bitset_container_size_in_bytes(container); +} + +bool bitset_container_iterate(const bitset_container_t *cont, uint32_t base, roaring_iterator iterator, void *ptr) { +for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { +uint64_t w = cont->words[i]; +while (w != 0) { +uint64_t t = w & (~w + 1); +int r = roaring_trailing_zeroes(w); +if(!iterator(r + base, ptr)) return false; +w ^= t; +} +base += 64; +} +return true; +} + +bool bitset_container_iterate64(const bitset_container_t *cont, uint32_t base, roaring_iterator64 iterator, uint64_t high_bits, void *ptr) { +for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { +uint64_t w = cont->words[i]; +while (w != 0) { +uint64_t t = w & (~w + 1); +int r = roaring_trailing_zeroes(w); +if(!iterator(high_bits | (uint64_t)(r + base), ptr)) return false; +w ^= t; +} +base += 64; +} +return true; +} + +#if CROARING_IS_X64 +#if CROARING_COMPILER_SUPPORTS_AVX512 +CROARING_TARGET_AVX512 +ALLOW_UNALIGNED +static inline bool _avx512_bitset_container_equals(const bitset_container_t *container1, const bitset_container_t *container2) { +const __m512i *ptr1 = (const __m512i*)container1->words; +const __m512i *ptr2 = (const __m512i*)container2->words; +for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS*sizeof(uint64_t)/64; i++) { +__m512i r1 = _mm512_loadu_si512(ptr1+i); +__m512i r2 = _mm512_loadu_si512(ptr2+i); +__mmask64 mask = _mm512_cmpeq_epi8_mask(r1, r2); +if ((uint64_t)mask != UINT64_MAX) { +return false; +} +} +return true; +} +CROARING_UNTARGET_AVX512 +#endif // CROARING_COMPILER_SUPPORTS_AVX512 +CROARING_TARGET_AVX2 +ALLOW_UNALIGNED +static inline bool _avx2_bitset_container_equals(const bitset_container_t *container1, const bitset_container_t *container2) { +const __m256i *ptr1 = (const __m256i*)container1->words; +const __m256i *ptr2 = (const __m256i*)container2->words; +for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS*sizeof(uint64_t)/32; i++) { +__m256i r1 = _mm256_loadu_si256(ptr1+i); +__m256i r2 = _mm256_loadu_si256(ptr2+i); +int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(r1, r2)); +if ((uint32_t)mask != UINT32_MAX) { +return false; +} +} +return true; +} +CROARING_UNTARGET_AVX2 +#endif // CROARING_IS_X64 + +ALLOW_UNALIGNED +bool bitset_container_equals(const bitset_container_t *container1, const bitset_container_t *container2) { +if((container1->cardinality != BITSET_UNKNOWN_CARDINALITY) && (container2->cardinality != BITSET_UNKNOWN_CARDINALITY)) { +if(container1->cardinality != container2->cardinality) { +return false; +} +if (container1->cardinality == INT32_C(0x10000)) { +return true; +} +} +#if CROARING_IS_X64 +int support = croaring_hardware_support(); +#if CROARING_COMPILER_SUPPORTS_AVX512 +if( support & ROARING_SUPPORTS_AVX512 ) { +return _avx512_bitset_container_equals(container1, container2); +} +else +#endif +if( support & ROARING_SUPPORTS_AVX2 ) { +return _avx2_bitset_container_equals(container1, container2); +} +#endif +return memcmp(container1->words, +container2->words, +BITSET_CONTAINER_SIZE_IN_WORDS*sizeof(uint64_t)) == 0; +} + +bool bitset_container_is_subset(const bitset_container_t *container1, +const bitset_container_t *container2) { +if((container1->cardinality != BITSET_UNKNOWN_CARDINALITY) && (container2->cardinality != BITSET_UNKNOWN_CARDINALITY)) { +if(container1->cardinality > container2->cardinality) { +return false; +} +} +for(int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { +if((container1->words[i] & container2->words[i]) != container1->words[i]) { +return false; +} +} +return true; +} + +bool bitset_container_select(const bitset_container_t *container, uint32_t *start_rank, uint32_t rank, uint32_t *element) { +int card = bitset_container_cardinality(container); +if(rank >= *start_rank + card) { +*start_rank += card; +return false; +} +const uint64_t *words = container->words; +int32_t size; +for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 1) { +size = roaring_hamming(words[i]); +if(rank <= *start_rank + size) { +uint64_t w = container->words[i]; +uint16_t base = i*64; +while (w != 0) { +uint64_t t = w & (~w + 1); +int r = roaring_trailing_zeroes(w); +if(*start_rank == rank) { +*element = r+base; +return true; +} +w ^= t; +*start_rank += 1; +} +} +else +*start_rank += size; +} +assert(false); +roaring_unreachable; +} + + +/* Returns the smallest value (assumes not empty) */ +uint16_t bitset_container_minimum(const bitset_container_t *container) { +for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { +uint64_t w = container->words[i]; +if (w != 0) { +int r = roaring_trailing_zeroes(w); +return r + i * 64; +} +} +return UINT16_MAX; +} + +/* Returns the largest value (assumes not empty) */ +uint16_t bitset_container_maximum(const bitset_container_t *container) { +for (int32_t i = BITSET_CONTAINER_SIZE_IN_WORDS - 1; i > 0; --i ) { +uint64_t w = container->words[i]; +if (w != 0) { +int r = roaring_leading_zeroes(w); +return i * 64 + 63 - r; +} +} +return 0; +} + +/* Returns the number of values equal or smaller than x */ +int bitset_container_rank(const bitset_container_t *container, uint16_t x) { +// credit: aqrit +int sum = 0; +int i = 0; +for (int end = x / 64; i < end; i++){ +sum += roaring_hamming(container->words[i]); +} +uint64_t lastword = container->words[i]; +uint64_t lastpos = UINT64_C(1) << (x % 64); +uint64_t mask = lastpos + lastpos - 1; // smear right +sum += roaring_hamming(lastword & mask); +return sum; +} + +/* Returns the index of x , if not exsist return -1 */ +int bitset_container_get_index(const bitset_container_t *container, uint16_t x) { +if (bitset_container_get(container, x)) { +// credit: aqrit +int sum = 0; +int i = 0; +for (int end = x / 64; i < end; i++){ +sum += roaring_hamming(container->words[i]); +} +uint64_t lastword = container->words[i]; +uint64_t lastpos = UINT64_C(1) << (x % 64); +uint64_t mask = lastpos + lastpos - 1; // smear right +sum += roaring_hamming(lastword & mask); +return sum - 1; +} else { +return -1; +} +} + +/* Returns the index of the first value equal or larger than x, or -1 */ +int bitset_container_index_equalorlarger(const bitset_container_t *container, uint16_t x) { +uint32_t x32 = x; +uint32_t k = x32 / 64; +uint64_t word = container->words[k]; +const int diff = x32 - k * 64; // in [0,64) +word = (word >> diff) << diff; // a mask is faster, but we don't care +while(word == 0) { +k++; +if(k == BITSET_CONTAINER_SIZE_IN_WORDS) return -1; +word = container->words[k]; +} +return k * 64 + roaring_trailing_zeroes(word); +} + +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace internal { +#endif +/* end file src/containers/bitset.c */ +/* begin file src/containers/containers.c */ + + +#ifdef __cplusplus +extern "C" { namespace roaring { namespace internal { +#endif + +extern inline const container_t *container_unwrap_shared( +const container_t *candidate_shared_container, uint8_t *type); + +extern inline container_t *container_mutable_unwrap_shared( +container_t *candidate_shared_container, uint8_t *type); + +extern inline int container_get_cardinality( +const container_t *c, uint8_t typecode); + +extern inline container_t *container_iand( +container_t *c1, uint8_t type1, +const container_t *c2, uint8_t type2, +uint8_t *result_type); + +extern inline container_t *container_ior( +container_t *c1, uint8_t type1, +const container_t *c2, uint8_t type2, +uint8_t *result_type); + +extern inline container_t *container_ixor( +container_t *c1, uint8_t type1, +const container_t *c2, uint8_t type2, +uint8_t *result_type); + +extern inline container_t *container_iandnot( +container_t *c1, uint8_t type1, +const container_t *c2, uint8_t type2, +uint8_t *result_type); + +void container_free(container_t *c, uint8_t type) { +switch (type) { +case BITSET_CONTAINER_TYPE: +bitset_container_free(CAST_bitset(c)); +break; +case ARRAY_CONTAINER_TYPE: +array_container_free(CAST_array(c)); +break; +case RUN_CONTAINER_TYPE: +run_container_free(CAST_run(c)); +break; +case SHARED_CONTAINER_TYPE: +shared_container_free(CAST_shared(c)); +break; +default: +assert(false); +roaring_unreachable; +} +} + +void container_printf(const container_t *c, uint8_t type) { +c = container_unwrap_shared(c, &type); +switch (type) { +case BITSET_CONTAINER_TYPE: +bitset_container_printf(const_CAST_bitset(c)); +return; +case ARRAY_CONTAINER_TYPE: +array_container_printf(const_CAST_array(c)); +return; +case RUN_CONTAINER_TYPE: +run_container_printf(const_CAST_run(c)); +return; +default: +roaring_unreachable; +} +} + +void container_printf_as_uint32_array( +const container_t *c, uint8_t typecode, +uint32_t base +){ +c = container_unwrap_shared(c, &typecode); +switch (typecode) { +case BITSET_CONTAINER_TYPE: +bitset_container_printf_as_uint32_array( +const_CAST_bitset(c), base); +return; +case ARRAY_CONTAINER_TYPE: +array_container_printf_as_uint32_array( +const_CAST_array(c), base); +return; +case RUN_CONTAINER_TYPE: +run_container_printf_as_uint32_array( +const_CAST_run(c), base); +return; +default: +roaring_unreachable; +} +} + +bool container_internal_validate(const container_t *container, +uint8_t typecode, const char **reason) { +if (container == NULL) { +*reason = "container is NULL"; +return false; +} +// Not using container_unwrap_shared because it asserts if shared containers are nested +if (typecode == SHARED_CONTAINER_TYPE) { +const shared_container_t *shared_container = const_CAST_shared(container); +if (croaring_refcount_get(&shared_container->counter) == 0) { +*reason = "shared container has zero refcount"; +return false; +} +if (shared_container->typecode == SHARED_CONTAINER_TYPE) { +*reason = "shared container is nested"; +return false; +} +if (shared_container->container == NULL) { +*reason = "shared container has NULL container"; +return false; +} +container = shared_container->container; +typecode = shared_container->typecode; +} +switch (typecode) { +case BITSET_CONTAINER_TYPE: +return bitset_container_validate(const_CAST_bitset(container), reason); +case ARRAY_CONTAINER_TYPE: +return array_container_validate(const_CAST_array(container), reason); +case RUN_CONTAINER_TYPE: +return run_container_validate(const_CAST_run(container), reason); +default: +*reason = "invalid typecode"; +return false; +} +} + +extern inline bool container_nonzero_cardinality( +const container_t *c, uint8_t typecode); + +extern inline int container_to_uint32_array( +uint32_t *output, +const container_t *c, uint8_t typecode, +uint32_t base); + +extern inline container_t *container_add( +container_t *c, +uint16_t val, +uint8_t typecode, // !!! 2nd arg? +uint8_t *new_typecode); + +extern inline bool container_contains( +const container_t *c, +uint16_t val, +uint8_t typecode); // !!! 2nd arg? + +extern inline container_t *container_and( +const container_t *c1, uint8_t type1, +const container_t *c2, uint8_t type2, +uint8_t *result_type); + +extern inline container_t *container_or( +const container_t *c1, uint8_t type1, +const container_t *c2, uint8_t type2, +uint8_t *result_type); + +extern inline container_t *container_xor( +const container_t *c1, uint8_t type1, +const container_t *c2, uint8_t type2, +uint8_t *result_type); + +container_t *get_copy_of_container( +container_t *c, uint8_t *typecode, +bool copy_on_write +){ +if (copy_on_write) { +shared_container_t *shared_container; +if (*typecode == SHARED_CONTAINER_TYPE) { +shared_container = CAST_shared(c); +croaring_refcount_inc(&shared_container->counter); +return shared_container; +} +assert(*typecode != SHARED_CONTAINER_TYPE); + +if ((shared_container = (shared_container_t *)roaring_malloc( +sizeof(shared_container_t))) == NULL) { +return NULL; +} + +shared_container->container = c; +shared_container->typecode = *typecode; +// At this point, we are creating new shared container +// so there should be no other references, and setting +// the counter to 2 - even non-atomically - is safe as +// long as the value is set before the return statement. +shared_container->counter = 2; +*typecode = SHARED_CONTAINER_TYPE; + +return shared_container; +} // copy_on_write +// otherwise, no copy on write... +const container_t *actual_container = container_unwrap_shared(c, typecode); +assert(*typecode != SHARED_CONTAINER_TYPE); +return container_clone(actual_container, *typecode); +} + +/** + * Copies a container, requires a typecode. This allocates new memory, caller + * is responsible for deallocation. + */ +container_t *container_clone(const container_t *c, uint8_t typecode) { +// We do not want to allow cloning of shared containers. +// c = container_unwrap_shared(c, &typecode); +switch (typecode) { +case BITSET_CONTAINER_TYPE: +return bitset_container_clone(const_CAST_bitset(c)); +case ARRAY_CONTAINER_TYPE: +return array_container_clone(const_CAST_array(c)); +case RUN_CONTAINER_TYPE: +return run_container_clone(const_CAST_run(c)); +case SHARED_CONTAINER_TYPE: +// Shared containers are not cloneable. Are you mixing COW and non-COW bitmaps? +return NULL; +default: +assert(false); +roaring_unreachable; +return NULL; +} +} + +container_t *shared_container_extract_copy( +shared_container_t *sc, uint8_t *typecode +){ +assert(sc->typecode != SHARED_CONTAINER_TYPE); +*typecode = sc->typecode; +container_t *answer; +if (croaring_refcount_dec(&sc->counter)) { +answer = sc->container; +sc->container = NULL; // paranoid +roaring_free(sc); +} else { +answer = container_clone(sc->container, *typecode); +} +assert(*typecode != SHARED_CONTAINER_TYPE); +return answer; +} + +void shared_container_free(shared_container_t *container) { +if (croaring_refcount_dec(&container->counter)) { +assert(container->typecode != SHARED_CONTAINER_TYPE); +container_free(container->container, container->typecode); +container->container = NULL; // paranoid +roaring_free(container); +} +} + +extern inline container_t *container_not( +const container_t *c1, uint8_t type1, +uint8_t *result_type); + +extern inline container_t *container_not_range( +const container_t *c1, uint8_t type1, +uint32_t range_start, uint32_t range_end, +uint8_t *result_type); + +extern inline container_t *container_inot( +container_t *c1, uint8_t type1, +uint8_t *result_type); + +extern inline container_t *container_inot_range( +container_t *c1, uint8_t type1, +uint32_t range_start, uint32_t range_end, +uint8_t *result_type); + +extern inline container_t *container_range_of_ones( +uint32_t range_start, uint32_t range_end, +uint8_t *result_type); + +// where are the correponding things for union and intersection?? +extern inline container_t *container_lazy_xor( +const container_t *c1, uint8_t type1, +const container_t *c2, uint8_t type2, +uint8_t *result_type); + +extern inline container_t *container_lazy_ixor( +container_t *c1, uint8_t type1, +const container_t *c2, uint8_t type2, +uint8_t *result_type); + +extern inline container_t *container_andnot( +const container_t *c1, uint8_t type1, +const container_t *c2, uint8_t type2, +uint8_t *result_type); + +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace internal { +#endif +/* end file src/containers/containers.c */ +/* begin file src/containers/convert.c */ +#include + + +#if CROARING_IS_X64 +#ifndef CROARING_COMPILER_SUPPORTS_AVX512 +#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined." +#endif // CROARING_COMPILER_SUPPORTS_AVX512 +#endif + +#ifdef __cplusplus +extern "C" { namespace roaring { namespace internal { +#endif + +// file contains grubby stuff that must know impl. details of all container +// types. +bitset_container_t *bitset_container_from_array(const array_container_t *ac) { +bitset_container_t *ans = bitset_container_create(); +int limit = array_container_cardinality(ac); +for (int i = 0; i < limit; ++i) bitset_container_set(ans, ac->array[i]); +return ans; +} + +bitset_container_t *bitset_container_from_run(const run_container_t *arr) { +int card = run_container_cardinality(arr); +bitset_container_t *answer = bitset_container_create(); +for (int rlepos = 0; rlepos < arr->n_runs; ++rlepos) { +rle16_t vl = arr->runs[rlepos]; +bitset_set_lenrange(answer->words, vl.value, vl.length); +} +answer->cardinality = card; +return answer; +} + +array_container_t *array_container_from_run(const run_container_t *arr) { +array_container_t *answer = +array_container_create_given_capacity(run_container_cardinality(arr)); +answer->cardinality = 0; +for (int rlepos = 0; rlepos < arr->n_runs; ++rlepos) { +int run_start = arr->runs[rlepos].value; +int run_end = run_start + arr->runs[rlepos].length; + +for (int run_value = run_start; run_value <= run_end; ++run_value) { +answer->array[answer->cardinality++] = (uint16_t)run_value; +} +} +return answer; +} + +array_container_t *array_container_from_bitset(const bitset_container_t *bits) { +array_container_t *result = +array_container_create_given_capacity(bits->cardinality); +result->cardinality = bits->cardinality; +#if CROARING_IS_X64 +#if CROARING_COMPILER_SUPPORTS_AVX512 +if( croaring_hardware_support() & ROARING_SUPPORTS_AVX512 ) { +bitset_extract_setbits_avx512_uint16(bits->words, BITSET_CONTAINER_SIZE_IN_WORDS, +result->array, bits->cardinality , 0); +} else +#endif +{ +// sse version ends up being slower here +// (bitset_extract_setbits_sse_uint16) +// because of the sparsity of the data +bitset_extract_setbits_uint16(bits->words, BITSET_CONTAINER_SIZE_IN_WORDS, +result->array, 0); +} +#else +// If the system is not x64, then we have no accelerated function. +bitset_extract_setbits_uint16(bits->words, BITSET_CONTAINER_SIZE_IN_WORDS, +result->array, 0); +#endif + + +return result; +} + +/* assumes that container has adequate space. Run from [s,e] (inclusive) */ +static void add_run(run_container_t *rc, int s, int e) { +rc->runs[rc->n_runs].value = s; +rc->runs[rc->n_runs].length = e - s; +rc->n_runs++; +} + +run_container_t *run_container_from_array(const array_container_t *c) { +int32_t n_runs = array_container_number_of_runs(c); +run_container_t *answer = run_container_create_given_capacity(n_runs); +int prev = -2; +int run_start = -1; +int32_t card = c->cardinality; +if (card == 0) return answer; +for (int i = 0; i < card; ++i) { +const uint16_t cur_val = c->array[i]; +if (cur_val != prev + 1) { +// new run starts; flush old one, if any +if (run_start != -1) add_run(answer, run_start, prev); +run_start = cur_val; +} +prev = c->array[i]; +} +// now prev is the last seen value +add_run(answer, run_start, prev); +// assert(run_container_cardinality(answer) == c->cardinality); +return answer; +} + +/** + * Convert the runcontainer to either a Bitmap or an Array Container, depending + * on the cardinality. Frees the container. + * Allocates and returns new container, which caller is responsible for freeing. + * It does not free the run container. + */ +container_t *convert_to_bitset_or_array_container( +run_container_t *rc, int32_t card, +uint8_t *resulttype +){ +if (card <= DEFAULT_MAX_SIZE) { +array_container_t *answer = array_container_create_given_capacity(card); +answer->cardinality = 0; +for (int rlepos = 0; rlepos < rc->n_runs; ++rlepos) { +uint16_t run_start = rc->runs[rlepos].value; +uint16_t run_end = run_start + rc->runs[rlepos].length; +for (uint16_t run_value = run_start; run_value < run_end; +++run_value) { +answer->array[answer->cardinality++] = run_value; +} +answer->array[answer->cardinality++] = run_end; +} +assert(card == answer->cardinality); +*resulttype = ARRAY_CONTAINER_TYPE; +//run_container_free(r); +return answer; +} +bitset_container_t *answer = bitset_container_create(); +for (int rlepos = 0; rlepos < rc->n_runs; ++rlepos) { +uint16_t run_start = rc->runs[rlepos].value; +bitset_set_lenrange(answer->words, run_start, rc->runs[rlepos].length); +} +answer->cardinality = card; +*resulttype = BITSET_CONTAINER_TYPE; +//run_container_free(r); +return answer; +} + +/* Converts a run container to either an array or a bitset, IF it saves space. + */ +/* If a conversion occurs, the caller is responsible to free the original + * container and + * he becomes responsible to free the new one. */ +container_t *convert_run_to_efficient_container( +run_container_t *c, +uint8_t *typecode_after +){ +int32_t size_as_run_container = +run_container_serialized_size_in_bytes(c->n_runs); + +int32_t size_as_bitset_container = +bitset_container_serialized_size_in_bytes(); +int32_t card = run_container_cardinality(c); +int32_t size_as_array_container = +array_container_serialized_size_in_bytes(card); + +int32_t min_size_non_run = +size_as_bitset_container < size_as_array_container +? size_as_bitset_container +: size_as_array_container; +if (size_as_run_container <= min_size_non_run) { // no conversion +*typecode_after = RUN_CONTAINER_TYPE; +return c; +} +if (card <= DEFAULT_MAX_SIZE) { +// to array +array_container_t *answer = array_container_create_given_capacity(card); +answer->cardinality = 0; +for (int rlepos = 0; rlepos < c->n_runs; ++rlepos) { +int run_start = c->runs[rlepos].value; +int run_end = run_start + c->runs[rlepos].length; + +for (int run_value = run_start; run_value <= run_end; ++run_value) { +answer->array[answer->cardinality++] = (uint16_t)run_value; +} +} +*typecode_after = ARRAY_CONTAINER_TYPE; +return answer; +} + +// else to bitset +bitset_container_t *answer = bitset_container_create(); + +for (int rlepos = 0; rlepos < c->n_runs; ++rlepos) { +int start = c->runs[rlepos].value; +int end = start + c->runs[rlepos].length; +bitset_set_range(answer->words, start, end + 1); +} +answer->cardinality = card; +*typecode_after = BITSET_CONTAINER_TYPE; +return answer; +} + +// like convert_run_to_efficient_container but frees the old result if needed +container_t *convert_run_to_efficient_container_and_free( +run_container_t *c, +uint8_t *typecode_after +){ +container_t *answer = convert_run_to_efficient_container(c, typecode_after); +if (answer != c) run_container_free(c); +return answer; +} + +/* once converted, the original container is disposed here, rather than + in roaring_array +*/ + +// TODO: split into run- array- and bitset- subfunctions for sanity; +// a few function calls won't really matter. + +container_t *convert_run_optimize( +container_t *c, uint8_t typecode_original, +uint8_t *typecode_after +){ +if (typecode_original == RUN_CONTAINER_TYPE) { +container_t *newc = convert_run_to_efficient_container( +CAST_run(c), typecode_after); +if (newc != c) { +container_free(c, typecode_original); +} +return newc; +} else if (typecode_original == ARRAY_CONTAINER_TYPE) { +// it might need to be converted to a run container. +array_container_t *c_qua_array = CAST_array(c); +int32_t n_runs = array_container_number_of_runs(c_qua_array); +int32_t size_as_run_container = +run_container_serialized_size_in_bytes(n_runs); +int32_t card = array_container_cardinality(c_qua_array); +int32_t size_as_array_container = +array_container_serialized_size_in_bytes(card); + +if (size_as_run_container >= size_as_array_container) { +*typecode_after = ARRAY_CONTAINER_TYPE; +return c; +} +// else convert array to run container +run_container_t *answer = run_container_create_given_capacity(n_runs); +int prev = -2; +int run_start = -1; + +assert(card > 0); +for (int i = 0; i < card; ++i) { +uint16_t cur_val = c_qua_array->array[i]; +if (cur_val != prev + 1) { +// new run starts; flush old one, if any +if (run_start != -1) add_run(answer, run_start, prev); +run_start = cur_val; +} +prev = c_qua_array->array[i]; +} +assert(run_start >= 0); +// now prev is the last seen value +add_run(answer, run_start, prev); +*typecode_after = RUN_CONTAINER_TYPE; +array_container_free(c_qua_array); +return answer; +} else if (typecode_original == +BITSET_CONTAINER_TYPE) { // run conversions on bitset +// does bitset need conversion to run? +bitset_container_t *c_qua_bitset = CAST_bitset(c); +int32_t n_runs = bitset_container_number_of_runs(c_qua_bitset); +int32_t size_as_run_container = +run_container_serialized_size_in_bytes(n_runs); +int32_t size_as_bitset_container = +bitset_container_serialized_size_in_bytes(); + +if (size_as_bitset_container <= size_as_run_container) { +// no conversion needed. +*typecode_after = BITSET_CONTAINER_TYPE; +return c; +} +// bitset to runcontainer (ported from Java RunContainer( +// BitmapContainer bc, int nbrRuns)) +assert(n_runs > 0); // no empty bitmaps +run_container_t *answer = run_container_create_given_capacity(n_runs); + +int long_ctr = 0; +uint64_t cur_word = c_qua_bitset->words[0]; +while (true) { +while (cur_word == UINT64_C(0) && +long_ctr < BITSET_CONTAINER_SIZE_IN_WORDS - 1) +cur_word = c_qua_bitset->words[++long_ctr]; + +if (cur_word == UINT64_C(0)) { +bitset_container_free(c_qua_bitset); +*typecode_after = RUN_CONTAINER_TYPE; +return answer; +} + +int local_run_start = roaring_trailing_zeroes(cur_word); +int run_start = local_run_start + 64 * long_ctr; +uint64_t cur_word_with_1s = cur_word | (cur_word - 1); + +int run_end = 0; +while (cur_word_with_1s == UINT64_C(0xFFFFFFFFFFFFFFFF) && +long_ctr < BITSET_CONTAINER_SIZE_IN_WORDS - 1) +cur_word_with_1s = c_qua_bitset->words[++long_ctr]; + +if (cur_word_with_1s == UINT64_C(0xFFFFFFFFFFFFFFFF)) { +run_end = 64 + long_ctr * 64; // exclusive, I guess +add_run(answer, run_start, run_end - 1); +bitset_container_free(c_qua_bitset); +*typecode_after = RUN_CONTAINER_TYPE; +return answer; +} +int local_run_end = roaring_trailing_zeroes(~cur_word_with_1s); +run_end = local_run_end + long_ctr * 64; +add_run(answer, run_start, run_end - 1); +cur_word = cur_word_with_1s & (cur_word_with_1s + 1); +} +return answer; +} else { +assert(false); +roaring_unreachable; +return NULL; +} +} + +container_t *container_from_run_range( +const run_container_t *run, +uint32_t min, uint32_t max, uint8_t *typecode_after +){ +// We expect most of the time to end up with a bitset container +bitset_container_t *bitset = bitset_container_create(); +*typecode_after = BITSET_CONTAINER_TYPE; +int32_t union_cardinality = 0; +for (int32_t i = 0; i < run->n_runs; ++i) { +uint32_t rle_min = run->runs[i].value; +uint32_t rle_max = rle_min + run->runs[i].length; +bitset_set_lenrange(bitset->words, rle_min, rle_max - rle_min); +union_cardinality += run->runs[i].length + 1; +} +union_cardinality += max - min + 1; +union_cardinality -= bitset_lenrange_cardinality(bitset->words, min, max-min); +bitset_set_lenrange(bitset->words, min, max - min); +bitset->cardinality = union_cardinality; +if(bitset->cardinality <= DEFAULT_MAX_SIZE) { +// we need to convert to an array container +array_container_t * array = array_container_from_bitset(bitset); +*typecode_after = ARRAY_CONTAINER_TYPE; +bitset_container_free(bitset); +return array; +} +return bitset; +} + +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace internal { #endif -#define LOOP_SIZE \ - BITSET_CONTAINER_SIZE_IN_WORDS / \ - ((WORDS_IN_AVX2_REG)*BITSET_CONTAINER_FN_REPEAT) +/* end file src/containers/convert.c */ +/* begin file src/containers/mixed_andnot.c */ +/* + * mixed_andnot.c. More methods since operation is not symmetric, + * except no "wide" andnot , so no lazy options motivated. + */ + +#include +#include + + +#ifdef __cplusplus +extern "C" { namespace roaring { namespace internal { +#endif + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst, a valid array container that could be the same as dst.*/ +void array_bitset_container_andnot(const array_container_t *src_1, +const bitset_container_t *src_2, +array_container_t *dst) { +// follows Java implementation as of June 2016 +if (dst->capacity < src_1->cardinality) { +array_container_grow(dst, src_1->cardinality, false); +} +int32_t newcard = 0; +const int32_t origcard = src_1->cardinality; +for (int i = 0; i < origcard; ++i) { +uint16_t key = src_1->array[i]; +dst->array[newcard] = key; +newcard += 1 - bitset_container_contains(src_2, key); +} +dst->cardinality = newcard; +} + +/* Compute the andnot of src_1 and src_2 and write the result to + * src_1 */ + +void array_bitset_container_iandnot(array_container_t *src_1, +const bitset_container_t *src_2) { +array_bitset_container_andnot(src_1, src_2, src_1); +} + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst, which does not initially have a valid container. + * Return true for a bitset result; false for array + */ + +bool bitset_array_container_andnot( +const bitset_container_t *src_1, const array_container_t *src_2, +container_t **dst +){ +// Java did this directly, but we have option of asm or avx +bitset_container_t *result = bitset_container_create(); +bitset_container_copy(src_1, result); +result->cardinality = +(int32_t)bitset_clear_list(result->words, (uint64_t)result->cardinality, +src_2->array, (uint64_t)src_2->cardinality); + +// do required type conversions. +if (result->cardinality <= DEFAULT_MAX_SIZE) { +*dst = array_container_from_bitset(result); +bitset_container_free(result); +return false; +} +*dst = result; +return true; +} + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst (which has no container initially). It will modify src_1 + * to be dst if the result is a bitset. Otherwise, it will + * free src_1 and dst will be a new array container. In both + * cases, the caller is responsible for deallocating dst. + * Returns true iff dst is a bitset */ + +bool bitset_array_container_iandnot( +bitset_container_t *src_1, const array_container_t *src_2, +container_t **dst +){ +*dst = src_1; +src_1->cardinality = +(int32_t)bitset_clear_list(src_1->words, (uint64_t)src_1->cardinality, +src_2->array, (uint64_t)src_2->cardinality); + +if (src_1->cardinality <= DEFAULT_MAX_SIZE) { +*dst = array_container_from_bitset(src_1); +bitset_container_free(src_1); +return false; // not bitset +} else +return true; +} + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst. Result may be either a bitset or an array container + * (returns "result is bitset"). dst does not initially have + * any container, but becomes either a bitset container (return + * result true) or an array container. + */ + +bool run_bitset_container_andnot( +const run_container_t *src_1, const bitset_container_t *src_2, +container_t **dst +){ +// follows the Java implementation as of June 2016 +int card = run_container_cardinality(src_1); +if (card <= DEFAULT_MAX_SIZE) { +// must be an array +array_container_t *answer = array_container_create_given_capacity(card); +answer->cardinality = 0; +for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { +rle16_t rle = src_1->runs[rlepos]; +for (int run_value = rle.value; run_value <= rle.value + rle.length; +++run_value) { +if (!bitset_container_get(src_2, (uint16_t)run_value)) { +answer->array[answer->cardinality++] = (uint16_t)run_value; +} +} +} +*dst = answer; +return false; +} else { // we guess it will be a bitset, though have to check guess when +// done +bitset_container_t *answer = bitset_container_clone(src_2); + +uint32_t last_pos = 0; +for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { +rle16_t rle = src_1->runs[rlepos]; + +uint32_t start = rle.value; +uint32_t end = start + rle.length + 1; +bitset_reset_range(answer->words, last_pos, start); +bitset_flip_range(answer->words, start, end); +last_pos = end; +} +bitset_reset_range(answer->words, last_pos, (uint32_t)(1 << 16)); + +answer->cardinality = bitset_container_compute_cardinality(answer); + +if (answer->cardinality <= DEFAULT_MAX_SIZE) { +*dst = array_container_from_bitset(answer); +bitset_container_free(answer); +return false; // not bitset +} +*dst = answer; +return true; // bitset +} +} + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst. Result may be either a bitset or an array container + * (returns "result is bitset"). dst does not initially have + * any container, but becomes either a bitset container (return + * result true) or an array container. + */ + +bool run_bitset_container_iandnot( +run_container_t *src_1, const bitset_container_t *src_2, +container_t **dst +){ +// dummy implementation +bool ans = run_bitset_container_andnot(src_1, src_2, dst); +run_container_free(src_1); +return ans; +} + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst. Result may be either a bitset or an array container + * (returns "result is bitset"). dst does not initially have + * any container, but becomes either a bitset container (return + * result true) or an array container. + */ + +bool bitset_run_container_andnot( +const bitset_container_t *src_1, const run_container_t *src_2, +container_t **dst +){ +// follows Java implementation +bitset_container_t *result = bitset_container_create(); + +bitset_container_copy(src_1, result); +for (int32_t rlepos = 0; rlepos < src_2->n_runs; ++rlepos) { +rle16_t rle = src_2->runs[rlepos]; +bitset_reset_range(result->words, rle.value, +rle.value + rle.length + UINT32_C(1)); +} +result->cardinality = bitset_container_compute_cardinality(result); + +if (result->cardinality <= DEFAULT_MAX_SIZE) { +*dst = array_container_from_bitset(result); +bitset_container_free(result); +return false; // not bitset +} +*dst = result; +return true; // bitset +} + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst (which has no container initially). It will modify src_1 + * to be dst if the result is a bitset. Otherwise, it will + * free src_1 and dst will be a new array container. In both + * cases, the caller is responsible for deallocating dst. + * Returns true iff dst is a bitset */ + +bool bitset_run_container_iandnot( +bitset_container_t *src_1, const run_container_t *src_2, +container_t **dst +){ +*dst = src_1; + +for (int32_t rlepos = 0; rlepos < src_2->n_runs; ++rlepos) { +rle16_t rle = src_2->runs[rlepos]; +bitset_reset_range(src_1->words, rle.value, +rle.value + rle.length + UINT32_C(1)); +} +src_1->cardinality = bitset_container_compute_cardinality(src_1); + +if (src_1->cardinality <= DEFAULT_MAX_SIZE) { +*dst = array_container_from_bitset(src_1); +bitset_container_free(src_1); +return false; // not bitset +} else +return true; +} + +/* helper. a_out must be a valid array container with adequate capacity. + * Returns the cardinality of the output container. Partly Based on Java + * implementation Util.unsignedDifference. + * + * TODO: Util.unsignedDifference does not use advanceUntil. Is it cheaper + * to avoid advanceUntil? + */ + +static int run_array_array_subtract(const run_container_t *rc, +const array_container_t *a_in, +array_container_t *a_out) { +int out_card = 0; +int32_t in_array_pos = +-1; // since advanceUntil always assumes we start the search AFTER this + +for (int rlepos = 0; rlepos < rc->n_runs; rlepos++) { +int32_t start = rc->runs[rlepos].value; +int32_t end = start + rc->runs[rlepos].length + 1; + +in_array_pos = advanceUntil(a_in->array, in_array_pos, +a_in->cardinality, (uint16_t)start); + +if (in_array_pos >= a_in->cardinality) { // run has no items subtracted +for (int32_t i = start; i < end; ++i) +a_out->array[out_card++] = (uint16_t)i; +} else { +uint16_t next_nonincluded = a_in->array[in_array_pos]; +if (next_nonincluded >= end) { +// another case when run goes unaltered +for (int32_t i = start; i < end; ++i) +a_out->array[out_card++] = (uint16_t)i; +in_array_pos--; // ensure we see this item again if necessary +} else { +for (int32_t i = start; i < end; ++i) +if (i != next_nonincluded) +a_out->array[out_card++] = (uint16_t)i; +else // 0 should ensure we don't match +next_nonincluded = +(in_array_pos + 1 >= a_in->cardinality) +? 0 +: a_in->array[++in_array_pos]; +in_array_pos--; // see again +} +} +} +return out_card; +} + +/* dst does not indicate a valid container initially. Eventually it + * can become any type of container. + */ + +int run_array_container_andnot( +const run_container_t *src_1, const array_container_t *src_2, +container_t **dst +){ +// follows the Java impl as of June 2016 + +int card = run_container_cardinality(src_1); +const int arbitrary_threshold = 32; + +if (card <= arbitrary_threshold) { +if (src_2->cardinality == 0) { +*dst = run_container_clone(src_1); +return RUN_CONTAINER_TYPE; +} +// Java's "lazyandNot.toEfficientContainer" thing +run_container_t *answer = run_container_create_given_capacity( +card + array_container_cardinality(src_2)); + +int rlepos = 0; +int xrlepos = 0; // "x" is src_2 +rle16_t rle = src_1->runs[rlepos]; +int32_t start = rle.value; +int32_t end = start + rle.length + 1; +int32_t xstart = src_2->array[xrlepos]; + +while ((rlepos < src_1->n_runs) && (xrlepos < src_2->cardinality)) { +if (end <= xstart) { +// output the first run +answer->runs[answer->n_runs++] = +MAKE_RLE16(start, end - start - 1); +rlepos++; +if (rlepos < src_1->n_runs) { +start = src_1->runs[rlepos].value; +end = start + src_1->runs[rlepos].length + 1; +} +} else if (xstart + 1 <= start) { +// exit the second run +xrlepos++; +if (xrlepos < src_2->cardinality) { +xstart = src_2->array[xrlepos]; +} +} else { +if (start < xstart) { +answer->runs[answer->n_runs++] = +MAKE_RLE16(start, xstart - start - 1); +} +if (xstart + 1 < end) { +start = xstart + 1; +} else { +rlepos++; +if (rlepos < src_1->n_runs) { +start = src_1->runs[rlepos].value; +end = start + src_1->runs[rlepos].length + 1; +} +} +} +} +if (rlepos < src_1->n_runs) { +answer->runs[answer->n_runs++] = MAKE_RLE16(start, end - start - 1); +rlepos++; +if (rlepos < src_1->n_runs) { +memcpy(answer->runs + answer->n_runs, src_1->runs + rlepos, +(src_1->n_runs - rlepos) * sizeof(rle16_t)); +answer->n_runs += (src_1->n_runs - rlepos); +} +} +uint8_t return_type; +*dst = convert_run_to_efficient_container(answer, &return_type); +if (answer != *dst) run_container_free(answer); +return return_type; +} +// else it's a bitmap or array + +if (card <= DEFAULT_MAX_SIZE) { +array_container_t *ac = array_container_create_given_capacity(card); +// nb Java code used a generic iterator-based merge to compute +// difference +ac->cardinality = run_array_array_subtract(src_1, src_2, ac); +*dst = ac; +return ARRAY_CONTAINER_TYPE; +} +bitset_container_t *ans = bitset_container_from_run(src_1); +bool result_is_bitset = bitset_array_container_iandnot(ans, src_2, dst); +return (result_is_bitset ? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE); +} + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst (which has no container initially). It will modify src_1 + * to be dst if the result is a bitset. Otherwise, it will + * free src_1 and dst will be a new array container. In both + * cases, the caller is responsible for deallocating dst. + * Returns true iff dst is a bitset */ + +int run_array_container_iandnot( +run_container_t *src_1, const array_container_t *src_2, +container_t **dst +){ +// dummy implementation same as June 2016 Java +int ans = run_array_container_andnot(src_1, src_2, dst); +run_container_free(src_1); +return ans; +} + +/* dst must be a valid array container, allowed to be src_1 */ + +void array_run_container_andnot(const array_container_t *src_1, +const run_container_t *src_2, +array_container_t *dst) { +// basically following Java impl as of June 2016 +if (src_1->cardinality > dst->capacity) { +array_container_grow(dst, src_1->cardinality, false); +} + +if (src_2->n_runs == 0) { +memmove(dst->array, src_1->array, +sizeof(uint16_t) * src_1->cardinality); +dst->cardinality = src_1->cardinality; +return; +} +int32_t run_start = src_2->runs[0].value; +int32_t run_end = run_start + src_2->runs[0].length; +int which_run = 0; + +uint16_t val = 0; +int dest_card = 0; +for (int i = 0; i < src_1->cardinality; ++i) { +val = src_1->array[i]; +if (val < run_start) +dst->array[dest_card++] = val; +else if (val <= run_end) { +; // omitted item +} else { +do { +if (which_run + 1 < src_2->n_runs) { +++which_run; +run_start = src_2->runs[which_run].value; +run_end = run_start + src_2->runs[which_run].length; + +} else +run_start = run_end = (1 << 16) + 1; +} while (val > run_end); +--i; +} +} +dst->cardinality = dest_card; +} + +/* dst does not indicate a valid container initially. Eventually it + * can become any kind of container. + */ + +void array_run_container_iandnot(array_container_t *src_1, +const run_container_t *src_2) { +array_run_container_andnot(src_1, src_2, src_1); +} + +/* dst does not indicate a valid container initially. Eventually it + * can become any kind of container. + */ + +int run_run_container_andnot( +const run_container_t *src_1, const run_container_t *src_2, +container_t **dst +){ +run_container_t *ans = run_container_create(); +run_container_andnot(src_1, src_2, ans); +uint8_t typecode_after; +*dst = convert_run_to_efficient_container_and_free(ans, &typecode_after); +return typecode_after; +} + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst (which has no container initially). It will modify src_1 + * to be dst if the result is a bitset. Otherwise, it will + * free src_1 and dst will be a new array container. In both + * cases, the caller is responsible for deallocating dst. + * Returns true iff dst is a bitset */ + +int run_run_container_iandnot( +run_container_t *src_1, const run_container_t *src_2, +container_t **dst +){ +// following Java impl as of June 2016 (dummy) +int ans = run_run_container_andnot(src_1, src_2, dst); +run_container_free(src_1); +return ans; +} + +/* + * dst is a valid array container and may be the same as src_1 + */ + +void array_array_container_andnot(const array_container_t *src_1, +const array_container_t *src_2, +array_container_t *dst) { +array_container_andnot(src_1, src_2, dst); +} + +/* inplace array-array andnot will always be able to reuse the space of + * src_1 */ +void array_array_container_iandnot(array_container_t *src_1, +const array_container_t *src_2) { +array_container_andnot(src_1, src_2, src_1); +} + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst (which has no container initially). Return value is + * "dst is a bitset" + */ + +bool bitset_bitset_container_andnot( +const bitset_container_t *src_1, const bitset_container_t *src_2, +container_t **dst +){ +bitset_container_t *ans = bitset_container_create(); +int card = bitset_container_andnot(src_1, src_2, ans); +if (card <= DEFAULT_MAX_SIZE) { +*dst = array_container_from_bitset(ans); +bitset_container_free(ans); +return false; // not bitset +} else { +*dst = ans; +return true; +} +} + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst (which has no container initially). It will modify src_1 + * to be dst if the result is a bitset. Otherwise, it will + * free src_1 and dst will be a new array container. In both + * cases, the caller is responsible for deallocating dst. + * Returns true iff dst is a bitset */ + +bool bitset_bitset_container_iandnot( +bitset_container_t *src_1, const bitset_container_t *src_2, +container_t **dst +){ +int card = bitset_container_andnot(src_1, src_2, src_1); +if (card <= DEFAULT_MAX_SIZE) { +*dst = array_container_from_bitset(src_1); +bitset_container_free(src_1); +return false; // not bitset +} else { +*dst = src_1; +return true; +} +} + +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace internal { +#endif +/* end file src/containers/mixed_andnot.c */ +/* begin file src/containers/mixed_equal.c */ + +#ifdef __cplusplus +extern "C" { namespace roaring { namespace internal { +#endif + +bool array_container_equal_bitset(const array_container_t* container1, +const bitset_container_t* container2) { +if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) { +if (container2->cardinality != container1->cardinality) { +return false; +} +} +int32_t pos = 0; +for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) { +uint64_t w = container2->words[i]; +while (w != 0) { +uint64_t t = w & (~w + 1); +uint16_t r = i * 64 + roaring_trailing_zeroes(w); +if (pos >= container1->cardinality) { +return false; +} +if (container1->array[pos] != r) { +return false; +} +++pos; +w ^= t; +} +} +return (pos == container1->cardinality); +} + +bool run_container_equals_array(const run_container_t* container1, +const array_container_t* container2) { +if (run_container_cardinality(container1) != container2->cardinality) +return false; +int32_t pos = 0; +for (int i = 0; i < container1->n_runs; ++i) { +const uint32_t run_start = container1->runs[i].value; +const uint32_t le = container1->runs[i].length; + +if (container2->array[pos] != run_start) { +return false; +} + +if (container2->array[pos + le] != run_start + le) { +return false; +} + +pos += le + 1; +} +return true; +} + +bool run_container_equals_bitset(const run_container_t* container1, +const bitset_container_t* container2) { + +int run_card = run_container_cardinality(container1); +int bitset_card = (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) ? +container2->cardinality : +bitset_container_compute_cardinality(container2); +if (bitset_card != run_card) { +return false; +} + +for (int32_t i = 0; i < container1->n_runs; i++) { +uint32_t begin = container1->runs[i].value; +if (container1->runs[i].length) { +uint32_t end = begin + container1->runs[i].length + 1; +if (!bitset_container_contains_range(container2, begin, end)) { +return false; +} +} else { +if (!bitset_container_contains(container2, begin)) { +return false; +} +} +} + +return true; +} + +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace internal { +#endif +/* end file src/containers/mixed_equal.c */ +/* begin file src/containers/mixed_intersection.c */ +/* + * mixed_intersection.c + * + */ + + +#ifdef __cplusplus +extern "C" { namespace roaring { namespace internal { +#endif + +/* Compute the intersection of src_1 and src_2 and write the result to + * dst. */ +void array_bitset_container_intersection(const array_container_t *src_1, +const bitset_container_t *src_2, +array_container_t *dst) { +if (dst->capacity < src_1->cardinality) { +array_container_grow(dst, src_1->cardinality, false); +} +int32_t newcard = 0; // dst could be src_1 +const int32_t origcard = src_1->cardinality; +for (int i = 0; i < origcard; ++i) { +uint16_t key = src_1->array[i]; +// this branchless approach is much faster... +dst->array[newcard] = key; +newcard += bitset_container_contains(src_2, key); +/** + * we could do it this way instead... + * if (bitset_container_contains(src_2, key)) { + * dst->array[newcard++] = key; + * } + * but if the result is unpredictible, the processor generates + * many mispredicted branches. + * Difference can be huge (from 3 cycles when predictible all the way + * to 16 cycles when unpredictible. + * See + * https://github.com/lemire/Code-used-on-Daniel-Lemire-s-blog/blob/master/extra/bitset/c/arraybitsetintersection.c + */ +} +dst->cardinality = newcard; +} + +/* Compute the size of the intersection of src_1 and src_2. */ +int array_bitset_container_intersection_cardinality( +const array_container_t *src_1, const bitset_container_t *src_2) { +int32_t newcard = 0; +const int32_t origcard = src_1->cardinality; +for (int i = 0; i < origcard; ++i) { +uint16_t key = src_1->array[i]; +newcard += bitset_container_contains(src_2, key); +} +return newcard; +} + + +bool array_bitset_container_intersect(const array_container_t *src_1, +const bitset_container_t *src_2) { +const int32_t origcard = src_1->cardinality; +for (int i = 0; i < origcard; ++i) { +uint16_t key = src_1->array[i]; +if(bitset_container_contains(src_2, key)) return true; +} +return false; +} -/* Computes a binary operation (eg union) on bitset1 and bitset2 and write the - result to bitsetout */ -// clang-format off -#define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic) \ -int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \ - const bitset_container_t *src_2, \ - bitset_container_t *dst) { \ - const uint8_t * __restrict__ array_1 = (const uint8_t *)src_1->array; \ - const uint8_t * __restrict__ array_2 = (const uint8_t *)src_2->array; \ - /* not using the blocking optimization for some reason*/ \ - uint8_t *out = (uint8_t*)dst->array; \ - const int innerloop = 8; \ - for (size_t i = 0; \ - i < BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG); \ - i+=innerloop) {\ - __m256i A1, A2, AO; \ - A1 = _mm256_lddqu_si256((const __m256i *)(array_1)); \ - A2 = _mm256_lddqu_si256((const __m256i *)(array_2)); \ - AO = avx_intrinsic(A2, A1); \ - _mm256_storeu_si256((__m256i *)out, AO); \ - A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 32)); \ - A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 32)); \ - AO = avx_intrinsic(A2, A1); \ - _mm256_storeu_si256((__m256i *)(out+32), AO); \ - A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 64)); \ - A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 64)); \ - AO = avx_intrinsic(A2, A1); \ - _mm256_storeu_si256((__m256i *)(out+64), AO); \ - A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 96)); \ - A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 96)); \ - AO = avx_intrinsic(A2, A1); \ - _mm256_storeu_si256((__m256i *)(out+96), AO); \ - A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 128)); \ - A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 128)); \ - AO = avx_intrinsic(A2, A1); \ - _mm256_storeu_si256((__m256i *)(out+128), AO); \ - A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 160)); \ - A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 160)); \ - AO = avx_intrinsic(A2, A1); \ - _mm256_storeu_si256((__m256i *)(out+160), AO); \ - A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 192)); \ - A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 192)); \ - AO = avx_intrinsic(A2, A1); \ - _mm256_storeu_si256((__m256i *)(out+192), AO); \ - A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 224)); \ - A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 224)); \ - AO = avx_intrinsic(A2, A1); \ - _mm256_storeu_si256((__m256i *)(out+224), AO); \ - out+=256; \ - array_1 += 256; \ - array_2 += 256; \ - } \ - dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \ - return dst->cardinality; \ -} \ -/* next, a version that updates cardinality*/ \ -int bitset_container_##opname(const bitset_container_t *src_1, \ - const bitset_container_t *src_2, \ - bitset_container_t *dst) { \ - const __m256i * __restrict__ array_1 = (const __m256i *) src_1->array; \ - const __m256i * __restrict__ array_2 = (const __m256i *) src_2->array; \ - __m256i *out = (__m256i *) dst->array; \ - dst->cardinality = (int32_t)avx2_harley_seal_popcount256andstore_##opname(array_2,\ - array_1, out,BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG));\ - return dst->cardinality; \ -} \ -/* next, a version that just computes the cardinality*/ \ -int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \ - const bitset_container_t *src_2) { \ - const __m256i * __restrict__ data1 = (const __m256i *) src_1->array; \ - const __m256i * __restrict__ data2 = (const __m256i *) src_2->array; \ - return (int)avx2_harley_seal_popcount256_##opname(data2, \ - data1, BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG));\ +/* Compute the intersection of src_1 and src_2 and write the result to + * dst. It is allowed for dst to be equal to src_1. We assume that dst is a + * valid container. */ +void array_run_container_intersection(const array_container_t *src_1, +const run_container_t *src_2, +array_container_t *dst) { +if (run_container_is_full(src_2)) { +if (dst != src_1) array_container_copy(src_1, dst); +return; +} +if (dst->capacity < src_1->cardinality) { +array_container_grow(dst, src_1->cardinality, false); +} +if (src_2->n_runs == 0) { +return; +} +int32_t rlepos = 0; +int32_t arraypos = 0; +rle16_t rle = src_2->runs[rlepos]; +int32_t newcard = 0; +while (arraypos < src_1->cardinality) { +const uint16_t arrayval = src_1->array[arraypos]; +while (rle.value + rle.length < +arrayval) { // this will frequently be false +++rlepos; +if (rlepos == src_2->n_runs) { +dst->cardinality = newcard; +return; // we are done +} +rle = src_2->runs[rlepos]; +} +if (rle.value > arrayval) { +arraypos = advanceUntil(src_1->array, arraypos, src_1->cardinality, +rle.value); +} else { +dst->array[newcard] = arrayval; +newcard++; +arraypos++; +} +} +dst->cardinality = newcard; } -#elif defined(USENEON) +/* Compute the intersection of src_1 and src_2 and write the result to + * *dst. If the result is true then the result is a bitset_container_t + * otherwise is a array_container_t. If *dst == src_2, an in-place processing + * is attempted.*/ +bool run_bitset_container_intersection( +const run_container_t *src_1, const bitset_container_t *src_2, +container_t **dst +){ +if (run_container_is_full(src_1)) { +if (*dst != src_2) *dst = bitset_container_clone(src_2); +return true; +} +int32_t card = run_container_cardinality(src_1); +if (card <= DEFAULT_MAX_SIZE) { +// result can only be an array (assuming that we never make a +// RunContainer) +if (card > src_2->cardinality) { +card = src_2->cardinality; +} +array_container_t *answer = array_container_create_given_capacity(card); +*dst = answer; +if (*dst == NULL) { +return false; +} +for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { +rle16_t rle = src_1->runs[rlepos]; +uint32_t endofrun = (uint32_t)rle.value + rle.length; +for (uint32_t runValue = rle.value; runValue <= endofrun; +++runValue) { +answer->array[answer->cardinality] = (uint16_t)runValue; +answer->cardinality += +bitset_container_contains(src_2, runValue); +} +} +return false; +} +if (*dst == src_2) { // we attempt in-place +bitset_container_t *answer = CAST_bitset(*dst); +uint32_t start = 0; +for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { +const rle16_t rle = src_1->runs[rlepos]; +uint32_t end = rle.value; +bitset_reset_range(src_2->words, start, end); + +start = end + rle.length + 1; +} +bitset_reset_range(src_2->words, start, UINT32_C(1) << 16); +answer->cardinality = bitset_container_compute_cardinality(answer); +if (src_2->cardinality > DEFAULT_MAX_SIZE) { +return true; +} else { +array_container_t *newanswer = array_container_from_bitset(src_2); +if (newanswer == NULL) { +*dst = NULL; +return false; +} +*dst = newanswer; +return false; +} +} else { // no inplace +// we expect the answer to be a bitmap (if we are lucky) +bitset_container_t *answer = bitset_container_clone(src_2); + +*dst = answer; +if (answer == NULL) { +return true; +} +uint32_t start = 0; +for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { +const rle16_t rle = src_1->runs[rlepos]; +uint32_t end = rle.value; +bitset_reset_range(answer->words, start, end); +start = end + rle.length + 1; +} +bitset_reset_range(answer->words, start, UINT32_C(1) << 16); +answer->cardinality = bitset_container_compute_cardinality(answer); + +if (answer->cardinality > DEFAULT_MAX_SIZE) { +return true; +} else { +array_container_t *newanswer = array_container_from_bitset(answer); +bitset_container_free(CAST_bitset(*dst)); +if (newanswer == NULL) { +*dst = NULL; +return false; +} +*dst = newanswer; +return false; +} +} +} -#define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic) \ -int bitset_container_##opname(const bitset_container_t *src_1, \ - const bitset_container_t *src_2, \ - bitset_container_t *dst) { \ - const uint64_t * __restrict__ array_1 = src_1->array; \ - const uint64_t * __restrict__ array_2 = src_2->array; \ - uint64_t *out = dst->array; \ - uint16x8_t n0 = vdupq_n_u16(0); \ - uint16x8_t n1 = vdupq_n_u16(0); \ - uint16x8_t n2 = vdupq_n_u16(0); \ - uint16x8_t n3 = vdupq_n_u16(0); \ - for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { \ - uint64x2_t c0 = neon_intrinsic(vld1q_u64(&array_1[i + 0]), \ - vld1q_u64(&array_2[i + 0])); \ - n0 = vaddq_u16(n0, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c0)))); \ - vst1q_u64(&out[i + 0], c0); \ - uint64x2_t c1 = neon_intrinsic(vld1q_u64(&array_1[i + 2]), \ - vld1q_u64(&array_2[i + 2])); \ - n1 = vaddq_u16(n1, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c1)))); \ - vst1q_u64(&out[i + 2], c1); \ - uint64x2_t c2 = neon_intrinsic(vld1q_u64(&array_1[i + 4]), \ - vld1q_u64(&array_2[i + 4])); \ - n2 = vaddq_u16(n2, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c2)))); \ - vst1q_u64(&out[i + 4], c2); \ - uint64x2_t c3 = neon_intrinsic(vld1q_u64(&array_1[i + 6]), \ - vld1q_u64(&array_2[i + 6])); \ - n3 = vaddq_u16(n3, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c3)))); \ - vst1q_u64(&out[i + 6], c3); \ - } \ - uint64x2_t n = vdupq_n_u64(0); \ - n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n0))); \ - n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n1))); \ - n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n2))); \ - n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n3))); \ - dst->cardinality = vgetq_lane_u64(n, 0) + vgetq_lane_u64(n, 1); \ - return dst->cardinality; \ -} \ -int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \ - const bitset_container_t *src_2, \ - bitset_container_t *dst) { \ - const uint64_t * __restrict__ array_1 = src_1->array; \ - const uint64_t * __restrict__ array_2 = src_2->array; \ - uint64_t *out = dst->array; \ - for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { \ - vst1q_u64(&out[i + 0], neon_intrinsic(vld1q_u64(&array_1[i + 0]), \ - vld1q_u64(&array_2[i + 0]))); \ - vst1q_u64(&out[i + 2], neon_intrinsic(vld1q_u64(&array_1[i + 2]), \ - vld1q_u64(&array_2[i + 2]))); \ - vst1q_u64(&out[i + 4], neon_intrinsic(vld1q_u64(&array_1[i + 4]), \ - vld1q_u64(&array_2[i + 4]))); \ - vst1q_u64(&out[i + 6], neon_intrinsic(vld1q_u64(&array_1[i + 6]), \ - vld1q_u64(&array_2[i + 6]))); \ - } \ - dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \ - return dst->cardinality; \ -} \ -int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \ - const bitset_container_t *src_2) { \ - const uint64_t * __restrict__ array_1 = src_1->array; \ - const uint64_t * __restrict__ array_2 = src_2->array; \ - uint16x8_t n0 = vdupq_n_u16(0); \ - uint16x8_t n1 = vdupq_n_u16(0); \ - uint16x8_t n2 = vdupq_n_u16(0); \ - uint16x8_t n3 = vdupq_n_u16(0); \ - for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { \ - uint64x2_t c0 = neon_intrinsic(vld1q_u64(&array_1[i + 0]), \ - vld1q_u64(&array_2[i + 0])); \ - n0 = vaddq_u16(n0, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c0)))); \ - uint64x2_t c1 = neon_intrinsic(vld1q_u64(&array_1[i + 2]), \ - vld1q_u64(&array_2[i + 2])); \ - n1 = vaddq_u16(n1, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c1)))); \ - uint64x2_t c2 = neon_intrinsic(vld1q_u64(&array_1[i + 4]), \ - vld1q_u64(&array_2[i + 4])); \ - n2 = vaddq_u16(n2, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c2)))); \ - uint64x2_t c3 = neon_intrinsic(vld1q_u64(&array_1[i + 6]), \ - vld1q_u64(&array_2[i + 6])); \ - n3 = vaddq_u16(n3, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c3)))); \ - } \ - uint64x2_t n = vdupq_n_u64(0); \ - n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n0))); \ - n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n1))); \ - n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n2))); \ - n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n3))); \ - return vgetq_lane_u64(n, 0) + vgetq_lane_u64(n, 1); \ +/* Compute the size of the intersection between src_1 and src_2 . */ +int array_run_container_intersection_cardinality(const array_container_t *src_1, +const run_container_t *src_2) { +if (run_container_is_full(src_2)) { +return src_1->cardinality; +} +if (src_2->n_runs == 0) { +return 0; +} +int32_t rlepos = 0; +int32_t arraypos = 0; +rle16_t rle = src_2->runs[rlepos]; +int32_t newcard = 0; +while (arraypos < src_1->cardinality) { +const uint16_t arrayval = src_1->array[arraypos]; +while (rle.value + rle.length < +arrayval) { // this will frequently be false +++rlepos; +if (rlepos == src_2->n_runs) { +return newcard; // we are done +} +rle = src_2->runs[rlepos]; +} +if (rle.value > arrayval) { +arraypos = advanceUntil(src_1->array, arraypos, src_1->cardinality, +rle.value); +} else { +newcard++; +arraypos++; +} +} +return newcard; } -#else /* not USEAVX */ +/* Compute the intersection between src_1 and src_2 + **/ +int run_bitset_container_intersection_cardinality( +const run_container_t *src_1, const bitset_container_t *src_2) { +if (run_container_is_full(src_1)) { +return bitset_container_cardinality(src_2); +} +int answer = 0; +for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { +rle16_t rle = src_1->runs[rlepos]; +answer += +bitset_lenrange_cardinality(src_2->words, rle.value, rle.length); +} +return answer; +} -#define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic) \ -int bitset_container_##opname(const bitset_container_t *src_1, \ - const bitset_container_t *src_2, \ - bitset_container_t *dst) { \ - const uint64_t * __restrict__ array_1 = src_1->array; \ - const uint64_t * __restrict__ array_2 = src_2->array; \ - uint64_t *out = dst->array; \ - int32_t sum = 0; \ - for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \ - const uint64_t word_1 = (array_1[i])opsymbol(array_2[i]), \ - word_2 = (array_1[i + 1])opsymbol(array_2[i + 1]); \ - out[i] = word_1; \ - out[i + 1] = word_2; \ - sum += hamming(word_1); \ - sum += hamming(word_2); \ - } \ - dst->cardinality = sum; \ - return dst->cardinality; \ -} \ -int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \ - const bitset_container_t *src_2, \ - bitset_container_t *dst) { \ - const uint64_t * __restrict__ array_1 = src_1->array; \ - const uint64_t * __restrict__ array_2 = src_2->array; \ - uint64_t *out = dst->array; \ - for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i++) { \ - out[i] = (array_1[i])opsymbol(array_2[i]); \ - } \ - dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \ - return dst->cardinality; \ -} \ -int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \ - const bitset_container_t *src_2) { \ - const uint64_t * __restrict__ array_1 = src_1->array; \ - const uint64_t * __restrict__ array_2 = src_2->array; \ - int32_t sum = 0; \ - for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \ - const uint64_t word_1 = (array_1[i])opsymbol(array_2[i]), \ - word_2 = (array_1[i + 1])opsymbol(array_2[i + 1]); \ - sum += hamming(word_1); \ - sum += hamming(word_2); \ - } \ - return sum; \ + +bool array_run_container_intersect(const array_container_t *src_1, +const run_container_t *src_2) { +if( run_container_is_full(src_2) ) { +return !array_container_empty(src_1); +} +if (src_2->n_runs == 0) { +return false; +} +int32_t rlepos = 0; +int32_t arraypos = 0; +rle16_t rle = src_2->runs[rlepos]; +while (arraypos < src_1->cardinality) { +const uint16_t arrayval = src_1->array[arraypos]; +while (rle.value + rle.length < +arrayval) { // this will frequently be false +++rlepos; +if (rlepos == src_2->n_runs) { +return false; // we are done +} +rle = src_2->runs[rlepos]; +} +if (rle.value > arrayval) { +arraypos = advanceUntil(src_1->array, arraypos, src_1->cardinality, +rle.value); +} else { +return true; +} +} +return false; +} + +/* Compute the intersection between src_1 and src_2 + **/ +bool run_bitset_container_intersect(const run_container_t *src_1, +const bitset_container_t *src_2) { +if( run_container_is_full(src_1) ) { +return !bitset_container_empty(src_2); +} +for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { +rle16_t rle = src_1->runs[rlepos]; +if(!bitset_lenrange_empty(src_2->words, rle.value,rle.length)) return true; } +return false; +} + +/* + * Compute the intersection between src_1 and src_2 and write the result + * to *dst. If the return function is true, the result is a bitset_container_t + * otherwise is a array_container_t. + */ +bool bitset_bitset_container_intersection( +const bitset_container_t *src_1, const bitset_container_t *src_2, +container_t **dst +){ +const int newCardinality = bitset_container_and_justcard(src_1, src_2); +if (newCardinality > DEFAULT_MAX_SIZE) { +*dst = bitset_container_create(); +if (*dst != NULL) { +bitset_container_and_nocard(src_1, src_2, CAST_bitset(*dst)); +CAST_bitset(*dst)->cardinality = newCardinality; +} +return true; // it is a bitset +} +*dst = array_container_create_given_capacity(newCardinality); +if (*dst != NULL) { +CAST_array(*dst)->cardinality = newCardinality; +bitset_extract_intersection_setbits_uint16( +src_1->words, src_2->words, BITSET_CONTAINER_SIZE_IN_WORDS, +CAST_array(*dst)->array, 0); +} +return false; // not a bitset +} + +bool bitset_bitset_container_intersection_inplace( +bitset_container_t *src_1, const bitset_container_t *src_2, +container_t **dst +){ +const int newCardinality = bitset_container_and_justcard(src_1, src_2); +if (newCardinality > DEFAULT_MAX_SIZE) { +*dst = src_1; +bitset_container_and_nocard(src_1, src_2, src_1); +CAST_bitset(*dst)->cardinality = newCardinality; +return true; // it is a bitset +} +*dst = array_container_create_given_capacity(newCardinality); +if (*dst != NULL) { +CAST_array(*dst)->cardinality = newCardinality; +bitset_extract_intersection_setbits_uint16( +src_1->words, src_2->words, BITSET_CONTAINER_SIZE_IN_WORDS, +CAST_array(*dst)->array, 0); +} +return false; // not a bitset +} + +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace internal { +#endif +/* end file src/containers/mixed_intersection.c */ +/* begin file src/containers/mixed_negation.c */ +/* + * mixed_negation.c + * + */ + +#include +#include + +#ifdef __cplusplus +extern "C" { namespace roaring { namespace internal { #endif -// we duplicate the function because other containers use the "or" term, makes API more consistent -BITSET_CONTAINER_FN(or, |, _mm256_or_si256, vorrq_u64) -BITSET_CONTAINER_FN(union, |, _mm256_or_si256, vorrq_u64) +// TODO: make simplified and optimized negation code across +// the full range. + +/* Negation across the entire range of the container. + * Compute the negation of src and write the result + * to *dst. The complement of a + * sufficiently sparse set will always be dense and a hence a bitmap +' * We assume that dst is pre-allocated and a valid bitset container + * There can be no in-place version. + */ +void array_container_negation(const array_container_t *src, +bitset_container_t *dst) { +uint64_t card = UINT64_C(1 << 16); +bitset_container_set_all(dst); + +if (src->cardinality == 0) { +return; +} + +dst->cardinality = (int32_t)bitset_clear_list(dst->words, card, src->array, +(uint64_t)src->cardinality); +} + +/* Negation across the entire range of the container + * Compute the negation of src and write the result + * to *dst. A true return value indicates a bitset result, + * otherwise the result is an array container. + * We assume that dst is not pre-allocated. In + * case of failure, *dst will be NULL. + */ +bool bitset_container_negation( +const bitset_container_t *src, container_t **dst +){ +return bitset_container_negation_range(src, 0, (1 << 16), dst); +} + +/* inplace version */ +/* + * Same as bitset_container_negation except that if the output is to + * be a + * bitset_container_t, then src is modified and no allocation is made. + * If the output is to be an array_container_t, then caller is responsible + * to free the container. + * In all cases, the result is in *dst. + */ +bool bitset_container_negation_inplace( +bitset_container_t *src, container_t **dst +){ +return bitset_container_negation_range_inplace(src, 0, (1 << 16), dst); +} + +/* Negation across the entire range of container + * Compute the negation of src and write the result + * to *dst. Return values are the *_TYPECODES as defined * in containers.h + * We assume that dst is not pre-allocated. In + * case of failure, *dst will be NULL. + */ +int run_container_negation(const run_container_t *src, container_t **dst) { +return run_container_negation_range(src, 0, (1 << 16), dst); +} + +/* + * Same as run_container_negation except that if the output is to + * be a + * run_container_t, and has the capacity to hold the result, + * then src is modified and no allocation is made. + * In all cases, the result is in *dst. + */ +int run_container_negation_inplace(run_container_t *src, container_t **dst) { +return run_container_negation_range_inplace(src, 0, (1 << 16), dst); +} + +/* Negation across a range of the container. + * Compute the negation of src and write the result + * to *dst. Returns true if the result is a bitset container + * and false for an array container. *dst is not preallocated. + */ +bool array_container_negation_range( +const array_container_t *src, +const int range_start, const int range_end, +container_t **dst +){ +/* close port of the Java implementation */ +if (range_start >= range_end) { +*dst = array_container_clone(src); +return false; +} + +int32_t start_index = +binarySearch(src->array, src->cardinality, (uint16_t)range_start); +if (start_index < 0) start_index = -start_index - 1; + +int32_t last_index = +binarySearch(src->array, src->cardinality, (uint16_t)(range_end - 1)); +if (last_index < 0) last_index = -last_index - 2; + +const int32_t current_values_in_range = last_index - start_index + 1; +const int32_t span_to_be_flipped = range_end - range_start; +const int32_t new_values_in_range = +span_to_be_flipped - current_values_in_range; +const int32_t cardinality_change = +new_values_in_range - current_values_in_range; +const int32_t new_cardinality = src->cardinality + cardinality_change; + +if (new_cardinality > DEFAULT_MAX_SIZE) { +bitset_container_t *temp = bitset_container_from_array(src); +bitset_flip_range(temp->words, (uint32_t)range_start, +(uint32_t)range_end); +temp->cardinality = new_cardinality; +*dst = temp; +return true; +} + +array_container_t *arr = +array_container_create_given_capacity(new_cardinality); +*dst = (container_t *)arr; +if(new_cardinality == 0) { +arr->cardinality = new_cardinality; +return false; // we are done. +} +// copy stuff before the active area +memcpy(arr->array, src->array, start_index * sizeof(uint16_t)); + +// work on the range +int32_t out_pos = start_index, in_pos = start_index; +int32_t val_in_range = range_start; +for (; val_in_range < range_end && in_pos <= last_index; ++val_in_range) { +if ((uint16_t)val_in_range != src->array[in_pos]) { +arr->array[out_pos++] = (uint16_t)val_in_range; +} else { +++in_pos; +} +} +for (; val_in_range < range_end; ++val_in_range) +arr->array[out_pos++] = (uint16_t)val_in_range; + +// content after the active range +memcpy(arr->array + out_pos, src->array + (last_index + 1), +(src->cardinality - (last_index + 1)) * sizeof(uint16_t)); +arr->cardinality = new_cardinality; +return false; +} + +/* Even when the result would fit, it is unclear how to make an + * inplace version without inefficient copying. + */ + +bool array_container_negation_range_inplace( +array_container_t *src, +const int range_start, const int range_end, +container_t **dst +){ +bool ans = array_container_negation_range(src, range_start, range_end, dst); +// TODO : try a real inplace version +array_container_free(src); +return ans; +} + +/* Negation across a range of the container + * Compute the negation of src and write the result + * to *dst. A true return value indicates a bitset result, + * otherwise the result is an array container. + * We assume that dst is not pre-allocated. In + * case of failure, *dst will be NULL. + */ +bool bitset_container_negation_range( +const bitset_container_t *src, +const int range_start, const int range_end, +container_t **dst +){ +// TODO maybe consider density-based estimate +// and sometimes build result directly as array, with +// conversion back to bitset if wrong. Or determine +// actual result cardinality, then go directly for the known final cont. + +// keep computation using bitsets as long as possible. +bitset_container_t *t = bitset_container_clone(src); +bitset_flip_range(t->words, (uint32_t)range_start, (uint32_t)range_end); +t->cardinality = bitset_container_compute_cardinality(t); + +if (t->cardinality > DEFAULT_MAX_SIZE) { +*dst = t; +return true; +} else { +*dst = array_container_from_bitset(t); +bitset_container_free(t); +return false; +} +} + +/* inplace version */ +/* + * Same as bitset_container_negation except that if the output is to + * be a + * bitset_container_t, then src is modified and no allocation is made. + * If the output is to be an array_container_t, then caller is responsible + * to free the container. + * In all cases, the result is in *dst. + */ +bool bitset_container_negation_range_inplace( +bitset_container_t *src, +const int range_start, const int range_end, +container_t **dst +){ +bitset_flip_range(src->words, (uint32_t)range_start, (uint32_t)range_end); +src->cardinality = bitset_container_compute_cardinality(src); +if (src->cardinality > DEFAULT_MAX_SIZE) { +*dst = src; +return true; +} +*dst = array_container_from_bitset(src); +bitset_container_free(src); +return false; +} + +/* Negation across a range of container + * Compute the negation of src and write the result + * to *dst. Return values are the *_TYPECODES as defined * in containers.h + * We assume that dst is not pre-allocated. In + * case of failure, *dst will be NULL. + */ +int run_container_negation_range( +const run_container_t *src, +const int range_start, const int range_end, +container_t **dst +){ +uint8_t return_typecode; + +// follows the Java implementation +if (range_end <= range_start) { +*dst = run_container_clone(src); +return RUN_CONTAINER_TYPE; +} -// we duplicate the function because other containers use the "intersection" term, makes API more consistent -BITSET_CONTAINER_FN(and, &, _mm256_and_si256, vandq_u64) -BITSET_CONTAINER_FN(intersection, &, _mm256_and_si256, vandq_u64) +run_container_t *ans = run_container_create_given_capacity( +src->n_runs + 1); // src->n_runs + 1); +int k = 0; +for (; k < src->n_runs && src->runs[k].value < range_start; ++k) { +ans->runs[k] = src->runs[k]; +ans->n_runs++; +} -BITSET_CONTAINER_FN(xor, ^, _mm256_xor_si256, veorq_u64) -BITSET_CONTAINER_FN(andnot, &~, _mm256_andnot_si256, vbicq_u64) -// clang-format On +run_container_smart_append_exclusive( +ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - 1)); +for (; k < src->n_runs; ++k) { +run_container_smart_append_exclusive(ans, src->runs[k].value, +src->runs[k].length); +} +*dst = convert_run_to_efficient_container(ans, &return_typecode); +if (return_typecode != RUN_CONTAINER_TYPE) run_container_free(ans); -int bitset_container_to_uint32_array( void *vout, const bitset_container_t *cont, uint32_t base) { -#ifdef USEAVX2FORDECODING - if(cont->cardinality >= 8192)// heuristic - return (int) bitset_extract_setbits_avx2(cont->array, BITSET_CONTAINER_SIZE_IN_WORDS, vout,cont->cardinality,base); - else - return (int) bitset_extract_setbits(cont->array, BITSET_CONTAINER_SIZE_IN_WORDS, vout,base); -#else - return (int) bitset_extract_setbits(cont->array, BITSET_CONTAINER_SIZE_IN_WORDS, vout,base); -#endif +return return_typecode; } /* - * Print this container using printf (useful for debugging). + * Same as run_container_negation except that if the output is to + * be a + * run_container_t, and has the capacity to hold the result, + * then src is modified and no allocation is made. + * In all cases, the result is in *dst. */ -void bitset_container_printf(const bitset_container_t * v) { - printf("{"); - uint32_t base = 0; - bool iamfirst = true;// TODO: rework so that this is not necessary yet still readable - for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) { - uint64_t w = v->array[i]; - while (w != 0) { - uint64_t t = w & (~w + 1); - int r = __builtin_ctzll(w); - if(iamfirst) {// predicted to be false - printf("%u",base + r); - iamfirst = false; - } else { - printf(",%u",base + r); - } - w ^= t; - } - base += 64; - } - printf("}"); +int run_container_negation_range_inplace( +run_container_t *src, +const int range_start, const int range_end, +container_t **dst +){ +uint8_t return_typecode; + +if (range_end <= range_start) { +*dst = src; +return RUN_CONTAINER_TYPE; } +// TODO: efficient special case when range is 0 to 65535 inclusive -/* - * Print this container using printf as a comma-separated list of 32-bit integers starting at base. - */ -void bitset_container_printf_as_uint32_array(const bitset_container_t * v, uint32_t base) { - bool iamfirst = true;// TODO: rework so that this is not necessary yet still readable - for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) { - uint64_t w = v->array[i]; - while (w != 0) { - uint64_t t = w & (~w + 1); - int r = __builtin_ctzll(w); - if(iamfirst) {// predicted to be false - printf("%u", r + base); - iamfirst = false; - } else { - printf(",%u",r + base); - } - w ^= t; - } - base += 64; - } -} +if (src->capacity == src->n_runs) { +// no excess room. More checking to see if result can fit +bool last_val_before_range = false; +bool first_val_in_range = false; +bool last_val_in_range = false; +bool first_val_past_range = false; +if (range_start > 0) +last_val_before_range = +run_container_contains(src, (uint16_t)(range_start - 1)); +first_val_in_range = run_container_contains(src, (uint16_t)range_start); -// TODO: use the fast lower bound, also -int bitset_container_number_of_runs(bitset_container_t *b) { - int num_runs = 0; - uint64_t next_word = b->array[0]; - - for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS-1; ++i) { - uint64_t word = next_word; - next_word = b->array[i+1]; - num_runs += hamming((~word) & (word << 1)) + ( (word >> 63) & ~next_word); - } +if (last_val_before_range == first_val_in_range) { +last_val_in_range = +run_container_contains(src, (uint16_t)(range_end - 1)); +if (range_end != 0x10000) +first_val_past_range = +run_container_contains(src, (uint16_t)range_end); - uint64_t word = next_word; - num_runs += hamming((~word) & (word << 1)); - if((word & 0x8000000000000000ULL) != 0) - num_runs++; - return num_runs; +if (last_val_in_range == +first_val_past_range) { // no space for inplace +int ans = run_container_negation_range(src, range_start, +range_end, dst); +run_container_free(src); +return ans; +} } +} +// all other cases: result will fit + +run_container_t *ans = src; +int my_nbr_runs = src->n_runs; -int32_t bitset_container_serialize(const bitset_container_t *container, char *buf) { - int32_t l = sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS; - memcpy(buf, container->array, l); - return(l); +ans->n_runs = 0; +int k = 0; +for (; (k < my_nbr_runs) && (src->runs[k].value < range_start); ++k) { +// ans->runs[k] = src->runs[k]; (would be self-copy) +ans->n_runs++; } +// as with Java implementation, use locals to give self a buffer of depth 1 +rle16_t buffered = MAKE_RLE16(0, 0); +rle16_t next = buffered; +if (k < my_nbr_runs) buffered = src->runs[k]; +run_container_smart_append_exclusive( +ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - 1)); -int32_t bitset_container_write(const bitset_container_t *container, - char *buf) { - memcpy(buf, container->array, BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t)); - return bitset_container_size_in_bytes(container); +for (; k < my_nbr_runs; ++k) { +if (k + 1 < my_nbr_runs) next = src->runs[k + 1]; + +run_container_smart_append_exclusive(ans, buffered.value, +buffered.length); +buffered = next; } +*dst = convert_run_to_efficient_container(ans, &return_typecode); +if (return_typecode != RUN_CONTAINER_TYPE) run_container_free(ans); -int32_t bitset_container_read(int32_t cardinality, bitset_container_t *container, - const char *buf) { - container->cardinality = cardinality; - memcpy(container->array, buf, BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t)); - return bitset_container_size_in_bytes(container); +return return_typecode; } -uint32_t bitset_container_serialization_len() { - return(sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); -} +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace internal { +#endif +/* end file src/containers/mixed_negation.c */ +/* begin file src/containers/mixed_subset.c */ -void* bitset_container_deserialize(const char *buf, size_t buf_len) { - bitset_container_t *ptr; - size_t l = sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS; +#ifdef __cplusplus +extern "C" { namespace roaring { namespace internal { +#endif - if(l != buf_len) - return(NULL); +bool array_container_is_subset_bitset(const array_container_t* container1, +const bitset_container_t* container2) { +if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) { +if (container2->cardinality < container1->cardinality) { +return false; +} +} +for (int i = 0; i < container1->cardinality; ++i) { +if (!bitset_container_contains(container2, container1->array[i])) { +return false; +} +} +return true; +} - if((ptr = (bitset_container_t *)malloc(sizeof(bitset_container_t))) != NULL) { - memcpy(ptr, buf, sizeof(bitset_container_t)); - // sizeof(__m256i) == 32 - ptr->array = (uint64_t *) roaring_bitmap_aligned_malloc(32, l); - if (! ptr->array) { - free(ptr); - return NULL; - } - memcpy(ptr->array, buf, l); - ptr->cardinality = bitset_container_compute_cardinality(ptr); - } +bool run_container_is_subset_array(const run_container_t* container1, +const array_container_t* container2) { +if (run_container_cardinality(container1) > container2->cardinality) +return false; +int32_t start_pos = -1, stop_pos = -1; +for (int i = 0; i < container1->n_runs; ++i) { +int32_t start = container1->runs[i].value; +int32_t stop = start + container1->runs[i].length; +start_pos = advanceUntil(container2->array, stop_pos, +container2->cardinality, start); +stop_pos = advanceUntil(container2->array, stop_pos, +container2->cardinality, stop); +if (stop_pos == container2->cardinality) { +return false; +} else if (stop_pos - start_pos != stop - start || +container2->array[start_pos] != start || +container2->array[stop_pos] != stop) { +return false; +} +} +return true; +} - return((void*)ptr); +bool array_container_is_subset_run(const array_container_t* container1, +const run_container_t* container2) { +if (container1->cardinality > run_container_cardinality(container2)) +return false; +int i_array = 0, i_run = 0; +while (i_array < container1->cardinality && i_run < container2->n_runs) { +uint32_t start = container2->runs[i_run].value; +uint32_t stop = start + container2->runs[i_run].length; +if (container1->array[i_array] < start) { +return false; +} else if (container1->array[i_array] > stop) { +i_run++; +} else { // the value of the array is in the run +i_array++; +} +} +if (i_array == container1->cardinality) { +return true; +} else { +return false; +} } -bool bitset_container_iterate(const bitset_container_t *cont, uint32_t base, roaring_iterator iterator, void *ptr) { - for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { - uint64_t w = cont->array[i]; - while (w != 0) { - uint64_t t = w & (~w + 1); - int r = __builtin_ctzll(w); - if(!iterator(r + base, ptr)) return false; - w ^= t; - } - base += 64; - } - return true; +bool run_container_is_subset_bitset(const run_container_t* container1, +const bitset_container_t* container2) { +// todo: this code could be much faster +if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) { +if (container2->cardinality < run_container_cardinality(container1)) { +return false; +} +} else { +int32_t card = bitset_container_compute_cardinality( +container2); // modify container2? +if (card < run_container_cardinality(container1)) { +return false; +} +} +for (int i = 0; i < container1->n_runs; ++i) { +uint32_t run_start = container1->runs[i].value; +uint32_t le = container1->runs[i].length; +for (uint32_t j = run_start; j <= run_start + le; ++j) { +if (!bitset_container_contains(container2, j)) { +return false; +} +} +} +return true; } -bool bitset_container_iterate64(const bitset_container_t *cont, uint32_t base, roaring_iterator64 iterator, uint64_t high_bits, void *ptr) { - for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { - uint64_t w = cont->array[i]; - while (w != 0) { - uint64_t t = w & (~w + 1); - int r = __builtin_ctzll(w); - if(!iterator(high_bits | (uint64_t)(r + base), ptr)) return false; - w ^= t; - } - base += 64; - } - return true; +bool bitset_container_is_subset_run(const bitset_container_t* container1, +const run_container_t* container2) { +// todo: this code could be much faster +if (container1->cardinality != BITSET_UNKNOWN_CARDINALITY) { +if (container1->cardinality > run_container_cardinality(container2)) { +return false; +} +} +int32_t i_bitset = 0, i_run = 0; +while (i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS && +i_run < container2->n_runs) { +uint64_t w = container1->words[i_bitset]; +while (w != 0 && i_run < container2->n_runs) { +uint32_t start = container2->runs[i_run].value; +uint32_t stop = start + container2->runs[i_run].length; +uint64_t t = w & (~w + 1); +uint16_t r = i_bitset * 64 + roaring_trailing_zeroes(w); +if (r < start) { +return false; +} else if (r > stop) { +i_run++; +continue; +} else { +w ^= t; +} +} +if (w == 0) { +i_bitset++; +} else { +return false; +} +} +if (i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS) { +// terminated iterating on the run containers, check that rest of bitset +// is empty +for (; i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS; i_bitset++) { +if (container1->words[i_bitset] != 0) { +return false; +} +} +} +return true; } +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace internal { +#endif +/* end file src/containers/mixed_subset.c */ +/* begin file src/containers/mixed_union.c */ +/* + * mixed_union.c + * + */ -bool bitset_container_equals(const bitset_container_t *container1, const bitset_container_t *container2) { - if((container1->cardinality != BITSET_UNKNOWN_CARDINALITY) && (container2->cardinality != BITSET_UNKNOWN_CARDINALITY)) { - if(container1->cardinality != container2->cardinality) { - return false; - } - if (container1->cardinality == INT32_C(0x10000)) { - return true; - } - } -#ifdef USEAVX - const __m256i *ptr1 = (const __m256i*)container1->array; - const __m256i *ptr2 = (const __m256i*)container2->array; - for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS*sizeof(uint64_t)/32; i++) { - __m256i r1 = _mm256_load_si256(ptr1+i); - __m256i r2 = _mm256_load_si256(ptr2+i); - int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(r1, r2)); - if ((uint32_t)mask != UINT32_MAX) { - return false; - } - } -#else - return memcmp(container1->array, - container2->array, - BITSET_CONTAINER_SIZE_IN_WORDS*sizeof(uint64_t)) == 0; +#include +#include + + +#ifdef __cplusplus +extern "C" { namespace roaring { namespace internal { #endif - return true; + +/* Compute the union of src_1 and src_2 and write the result to + * dst. */ +void array_bitset_container_union(const array_container_t *src_1, +const bitset_container_t *src_2, +bitset_container_t *dst) { +if (src_2 != dst) bitset_container_copy(src_2, dst); +dst->cardinality = (int32_t)bitset_set_list_withcard( +dst->words, dst->cardinality, src_1->array, src_1->cardinality); } -bool bitset_container_is_subset(const bitset_container_t *container1, - const bitset_container_t *container2) { - if((container1->cardinality != BITSET_UNKNOWN_CARDINALITY) && (container2->cardinality != BITSET_UNKNOWN_CARDINALITY)) { - if(container1->cardinality > container2->cardinality) { - return false; - } - } - for(int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { - if((container1->array[i] & container2->array[i]) != container1->array[i]) { - return false; - } - } - return true; +/* Compute the union of src_1 and src_2 and write the result to + * dst. It is allowed for src_2 to be dst. This version does not + * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). */ +void array_bitset_container_lazy_union(const array_container_t *src_1, +const bitset_container_t *src_2, +bitset_container_t *dst) { +if (src_2 != dst) bitset_container_copy(src_2, dst); +bitset_set_list(dst->words, src_1->array, src_1->cardinality); +dst->cardinality = BITSET_UNKNOWN_CARDINALITY; } -bool bitset_container_select(const bitset_container_t *container, uint32_t *start_rank, uint32_t rank, uint32_t *element) { - int card = bitset_container_cardinality(container); - if(rank >= *start_rank + card) { - *start_rank += card; - return false; - } - const uint64_t *array = container->array; - int32_t size; - for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 1) { - size = hamming(array[i]); - if(rank <= *start_rank + size) { - uint64_t w = container->array[i]; - uint16_t base = i*64; - while (w != 0) { - uint64_t t = w & (~w + 1); - int r = __builtin_ctzll(w); - if(*start_rank == rank) { - *element = r+base; - return true; - } - w ^= t; - *start_rank += 1; - } - } - else - *start_rank += size; - } - assert(false); - __builtin_unreachable(); +void run_bitset_container_union(const run_container_t *src_1, +const bitset_container_t *src_2, +bitset_container_t *dst) { +assert(!run_container_is_full(src_1)); // catch this case upstream +if (src_2 != dst) bitset_container_copy(src_2, dst); +for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { +rle16_t rle = src_1->runs[rlepos]; +bitset_set_lenrange(dst->words, rle.value, rle.length); +} +dst->cardinality = bitset_container_compute_cardinality(dst); } +void run_bitset_container_lazy_union(const run_container_t *src_1, +const bitset_container_t *src_2, +bitset_container_t *dst) { +assert(!run_container_is_full(src_1)); // catch this case upstream +if (src_2 != dst) bitset_container_copy(src_2, dst); +for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { +rle16_t rle = src_1->runs[rlepos]; +bitset_set_lenrange(dst->words, rle.value, rle.length); +} +dst->cardinality = BITSET_UNKNOWN_CARDINALITY; +} -/* Returns the smallest value (assumes not empty) */ -uint16_t bitset_container_minimum(const bitset_container_t *container) { - for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { - uint64_t w = container->array[i]; - if (w != 0) { - int r = __builtin_ctzll(w); - return r + i * 64; - } - } - return UINT16_MAX; +// why do we leave the result as a run container?? +void array_run_container_union(const array_container_t *src_1, +const run_container_t *src_2, +run_container_t *dst) { +if (run_container_is_full(src_2)) { +run_container_copy(src_2, dst); +return; +} +// TODO: see whether the "2*" is spurious +run_container_grow(dst, 2 * (src_1->cardinality + src_2->n_runs), false); +int32_t rlepos = 0; +int32_t arraypos = 0; +rle16_t previousrle; +if (src_2->runs[rlepos].value <= src_1->array[arraypos]) { +previousrle = run_container_append_first(dst, src_2->runs[rlepos]); +rlepos++; +} else { +previousrle = +run_container_append_value_first(dst, src_1->array[arraypos]); +arraypos++; +} +while ((rlepos < src_2->n_runs) && (arraypos < src_1->cardinality)) { +if (src_2->runs[rlepos].value <= src_1->array[arraypos]) { +run_container_append(dst, src_2->runs[rlepos], &previousrle); +rlepos++; +} else { +run_container_append_value(dst, src_1->array[arraypos], +&previousrle); +arraypos++; +} +} +if (arraypos < src_1->cardinality) { +while (arraypos < src_1->cardinality) { +run_container_append_value(dst, src_1->array[arraypos], +&previousrle); +arraypos++; +} +} else { +while (rlepos < src_2->n_runs) { +run_container_append(dst, src_2->runs[rlepos], &previousrle); +rlepos++; +} +} } -/* Returns the largest value (assumes not empty) */ -uint16_t bitset_container_maximum(const bitset_container_t *container) { - for (int32_t i = BITSET_CONTAINER_SIZE_IN_WORDS - 1; i > 0; --i ) { - uint64_t w = container->array[i]; - if (w != 0) { - int r = __builtin_clzll(w); - return i * 64 + 63 - r; - } - } - return 0; +void array_run_container_inplace_union(const array_container_t *src_1, +run_container_t *src_2) { +if (run_container_is_full(src_2)) { +return; +} +const int32_t maxoutput = src_1->cardinality + src_2->n_runs; +const int32_t neededcapacity = maxoutput + src_2->n_runs; +if (src_2->capacity < neededcapacity) +run_container_grow(src_2, neededcapacity, true); +memmove(src_2->runs + maxoutput, src_2->runs, +src_2->n_runs * sizeof(rle16_t)); +rle16_t *inputsrc2 = src_2->runs + maxoutput; +int32_t rlepos = 0; +int32_t arraypos = 0; +int src2nruns = src_2->n_runs; +src_2->n_runs = 0; + +rle16_t previousrle; + +if (inputsrc2[rlepos].value <= src_1->array[arraypos]) { +previousrle = run_container_append_first(src_2, inputsrc2[rlepos]); +rlepos++; +} else { +previousrle = +run_container_append_value_first(src_2, src_1->array[arraypos]); +arraypos++; +} + +while ((rlepos < src2nruns) && (arraypos < src_1->cardinality)) { +if (inputsrc2[rlepos].value <= src_1->array[arraypos]) { +run_container_append(src_2, inputsrc2[rlepos], &previousrle); +rlepos++; +} else { +run_container_append_value(src_2, src_1->array[arraypos], +&previousrle); +arraypos++; +} +} +if (arraypos < src_1->cardinality) { +while (arraypos < src_1->cardinality) { +run_container_append_value(src_2, src_1->array[arraypos], +&previousrle); +arraypos++; +} +} else { +while (rlepos < src2nruns) { +run_container_append(src_2, inputsrc2[rlepos], &previousrle); +rlepos++; +} +} +} + +bool array_array_container_union( +const array_container_t *src_1, const array_container_t *src_2, +container_t **dst +){ +int totalCardinality = src_1->cardinality + src_2->cardinality; +if (totalCardinality <= DEFAULT_MAX_SIZE) { +*dst = array_container_create_given_capacity(totalCardinality); +if (*dst != NULL) { +array_container_union(src_1, src_2, CAST_array(*dst)); +} else { +return true; // otherwise failure won't be caught +} +return false; // not a bitset +} +*dst = bitset_container_create(); +bool returnval = true; // expect a bitset +if (*dst != NULL) { +bitset_container_t *ourbitset = CAST_bitset(*dst); +bitset_set_list(ourbitset->words, src_1->array, src_1->cardinality); +ourbitset->cardinality = (int32_t)bitset_set_list_withcard( +ourbitset->words, src_1->cardinality, src_2->array, +src_2->cardinality); +if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) { +// need to convert! +*dst = array_container_from_bitset(ourbitset); +bitset_container_free(ourbitset); +returnval = false; // not going to be a bitset +} +} +return returnval; +} + +bool array_array_container_inplace_union( +array_container_t *src_1, const array_container_t *src_2, +container_t **dst +){ +int totalCardinality = src_1->cardinality + src_2->cardinality; +*dst = NULL; +if (totalCardinality <= DEFAULT_MAX_SIZE) { +if(src_1->capacity < totalCardinality) { +*dst = array_container_create_given_capacity(2 * totalCardinality); // be purposefully generous +if (*dst != NULL) { +array_container_union(src_1, src_2, CAST_array(*dst)); +} else { +return true; // otherwise failure won't be caught +} +return false; // not a bitset +} else { +memmove(src_1->array + src_2->cardinality, src_1->array, src_1->cardinality * sizeof(uint16_t)); +// In theory, we could use fast_union_uint16, but it is unsafe. It fails +// with Intel compilers in particular. +// https://github.com/RoaringBitmap/CRoaring/pull/452 +// See report https://github.com/RoaringBitmap/CRoaring/issues/476 +src_1->cardinality = (int32_t)union_uint16(src_1->array + src_2->cardinality, src_1->cardinality, +src_2->array, src_2->cardinality, src_1->array); +return false; // not a bitset +} +} +*dst = bitset_container_create(); +bool returnval = true; // expect a bitset +if (*dst != NULL) { +bitset_container_t *ourbitset = CAST_bitset(*dst); +bitset_set_list(ourbitset->words, src_1->array, src_1->cardinality); +ourbitset->cardinality = (int32_t)bitset_set_list_withcard( +ourbitset->words, src_1->cardinality, src_2->array, +src_2->cardinality); +if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) { +// need to convert! +if(src_1->capacity < ourbitset->cardinality) { +array_container_grow(src_1, ourbitset->cardinality, false); +} + +bitset_extract_setbits_uint16(ourbitset->words, BITSET_CONTAINER_SIZE_IN_WORDS, +src_1->array, 0); +src_1->cardinality = ourbitset->cardinality; +*dst = src_1; +bitset_container_free(ourbitset); +returnval = false; // not going to be a bitset +} +} +return returnval; +} + + +bool array_array_container_lazy_union( +const array_container_t *src_1, const array_container_t *src_2, +container_t **dst +){ +int totalCardinality = src_1->cardinality + src_2->cardinality; +// +// We assume that operations involving bitset containers will be faster than +// operations involving solely array containers, except maybe when array containers +// are small. Indeed, for example, it is cheap to compute the union between an array and +// a bitset container, generally more so than between a large array and another array. +// So it is advantageous to favour bitset containers during the computation. +// Of course, if we convert array containers eagerly to bitset containers, we may later +// need to revert the bitset containers to array containerr to satisfy the Roaring format requirements, +// but such one-time conversions at the end may not be overly expensive. We arrived to this design +// based on extensive benchmarking. +// +if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) { +*dst = array_container_create_given_capacity(totalCardinality); +if (*dst != NULL) { +array_container_union(src_1, src_2, CAST_array(*dst)); +} else { +return true; // otherwise failure won't be caught +} +return false; // not a bitset +} +*dst = bitset_container_create(); +bool returnval = true; // expect a bitset +if (*dst != NULL) { +bitset_container_t *ourbitset = CAST_bitset(*dst); +bitset_set_list(ourbitset->words, src_1->array, src_1->cardinality); +bitset_set_list(ourbitset->words, src_2->array, src_2->cardinality); +ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY; +} +return returnval; } -/* Returns the number of values equal or smaller than x */ -int bitset_container_rank(const bitset_container_t *container, uint16_t x) { - // credit: aqrit - int sum = 0; - int i = 0; - for (int end = x / 64; i < end; i++){ - sum += hamming(container->array[i]); - } - uint64_t lastword = container->array[i]; - uint64_t lastpos = UINT64_C(1) << (x % 64); - uint64_t mask = lastpos + lastpos - 1; // smear right - sum += hamming(lastword & mask); - return sum; + +bool array_array_container_lazy_inplace_union( +array_container_t *src_1, const array_container_t *src_2, +container_t **dst +){ +int totalCardinality = src_1->cardinality + src_2->cardinality; +*dst = NULL; +// +// We assume that operations involving bitset containers will be faster than +// operations involving solely array containers, except maybe when array containers +// are small. Indeed, for example, it is cheap to compute the union between an array and +// a bitset container, generally more so than between a large array and another array. +// So it is advantageous to favour bitset containers during the computation. +// Of course, if we convert array containers eagerly to bitset containers, we may later +// need to revert the bitset containers to array containerr to satisfy the Roaring format requirements, +// but such one-time conversions at the end may not be overly expensive. We arrived to this design +// based on extensive benchmarking. +// +if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) { +if(src_1->capacity < totalCardinality) { +*dst = array_container_create_given_capacity(2 * totalCardinality); // be purposefully generous +if (*dst != NULL) { +array_container_union(src_1, src_2, CAST_array(*dst)); +} else { +return true; // otherwise failure won't be caught +} +return false; // not a bitset +} else { +memmove(src_1->array + src_2->cardinality, src_1->array, src_1->cardinality * sizeof(uint16_t)); +/* + Next line is safe: + + We just need to focus on the reading and writing performed on array1. In `union_vector16`, both vectorized and scalar code still obey the basic rule: read from two inputs, do the union, and then write the output. + + Let's say the length(cardinality) of input2 is L2: + ``` + |<- L2 ->| + array1: [output--- |input 1---|---] + array2: [input 2---] + ``` + Let's define 3 __m128i pointers, `pos1` starts from `input1`, `pos2` starts from `input2`, these 2 point at the next byte to read, `out` starts from `output`, pointing at the next byte to overwrite. + ``` + array1: [output--- |input 1---|---] + ^ ^ + out pos1 + array2: [input 2---] + ^ + pos2 + ``` + The union output always contains less or equal number of elements than all inputs added, so we have: + ``` + out <= pos1 + pos2 + ``` + therefore: + ``` + out <= pos1 + L2 + ``` + which means you will not overwrite data beyond pos1, so the data haven't read is safe, and we don't care the data already read. + */ +src_1->cardinality = (int32_t)fast_union_uint16(src_1->array + src_2->cardinality, src_1->cardinality, +src_2->array, src_2->cardinality, src_1->array); +return false; // not a bitset +} +} +*dst = bitset_container_create(); +bool returnval = true; // expect a bitset +if (*dst != NULL) { +bitset_container_t *ourbitset = CAST_bitset(*dst); +bitset_set_list(ourbitset->words, src_1->array, src_1->cardinality); +bitset_set_list(ourbitset->words, src_2->array, src_2->cardinality); +ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY; +} +return returnval; +} + +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace internal { +#endif +/* end file src/containers/mixed_union.c */ +/* begin file src/containers/mixed_xor.c */ +/* + * mixed_xor.c + */ + +#include +#include + + +#ifdef __cplusplus +extern "C" { namespace roaring { namespace internal { +#endif + +/* Compute the xor of src_1 and src_2 and write the result to + * dst (which has no container initially). + * Result is true iff dst is a bitset */ +bool array_bitset_container_xor( +const array_container_t *src_1, const bitset_container_t *src_2, +container_t **dst +){ +bitset_container_t *result = bitset_container_create(); +bitset_container_copy(src_2, result); +result->cardinality = (int32_t)bitset_flip_list_withcard( +result->words, result->cardinality, src_1->array, src_1->cardinality); + +// do required type conversions. +if (result->cardinality <= DEFAULT_MAX_SIZE) { +*dst = array_container_from_bitset(result); +bitset_container_free(result); +return false; // not bitset +} +*dst = result; +return true; // bitset } -/* Returns the index of the first value equal or larger than x, or -1 */ -int bitset_container_index_equalorlarger(const bitset_container_t *container, uint16_t x) { - uint32_t x32 = x; - uint32_t k = x32 / 64; - uint64_t word = container->array[k]; - const int diff = x32 - k * 64; // in [0,64) - word = (word >> diff) << diff; // a mask is faster, but we don't care - while(word == 0) { - k++; - if(k == BITSET_CONTAINER_SIZE_IN_WORDS) return -1; - word = container->array[k]; - } - return k * 64 + __builtin_ctzll(word); +/* Compute the xor of src_1 and src_2 and write the result to + * dst. It is allowed for src_2 to be dst. This version does not + * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). + */ + +void array_bitset_container_lazy_xor(const array_container_t *src_1, +const bitset_container_t *src_2, +bitset_container_t *dst) { +if (src_2 != dst) bitset_container_copy(src_2, dst); +bitset_flip_list(dst->words, src_1->array, src_1->cardinality); +dst->cardinality = BITSET_UNKNOWN_CARDINALITY; } -/* end file src/containers/bitset.c */ -/* begin file src/containers/containers.c */ +/* Compute the xor of src_1 and src_2 and write the result to + * dst. Result may be either a bitset or an array container + * (returns "result is bitset"). dst does not initially have + * any container, but becomes either a bitset container (return + * result true) or an array container. + */ -extern inline const void *container_unwrap_shared( - const void *candidate_shared_container, uint8_t *type); -extern inline void *container_mutable_unwrap_shared( - void *candidate_shared_container, uint8_t *type); +bool run_bitset_container_xor( +const run_container_t *src_1, const bitset_container_t *src_2, +container_t **dst +){ +bitset_container_t *result = bitset_container_create(); -extern inline const char *get_container_name(uint8_t typecode); +bitset_container_copy(src_2, result); +for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { +rle16_t rle = src_1->runs[rlepos]; +bitset_flip_range(result->words, rle.value, +rle.value + rle.length + UINT32_C(1)); +} +result->cardinality = bitset_container_compute_cardinality(result); -extern inline int container_get_cardinality(const void *container, uint8_t typecode); +if (result->cardinality <= DEFAULT_MAX_SIZE) { +*dst = array_container_from_bitset(result); +bitset_container_free(result); +return false; // not bitset +} +*dst = result; +return true; // bitset +} -extern inline void *container_iand(void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type); +/* lazy xor. Dst is initialized and may be equal to src_2. + * Result is left as a bitset container, even if actual + * cardinality would dictate an array container. + */ -extern inline void *container_ior(void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type); +void run_bitset_container_lazy_xor(const run_container_t *src_1, +const bitset_container_t *src_2, +bitset_container_t *dst) { +if (src_2 != dst) bitset_container_copy(src_2, dst); +for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { +rle16_t rle = src_1->runs[rlepos]; +bitset_flip_range(dst->words, rle.value, +rle.value + rle.length + UINT32_C(1)); +} +dst->cardinality = BITSET_UNKNOWN_CARDINALITY; +} -extern inline void *container_ixor(void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type); +/* dst does not indicate a valid container initially. Eventually it + * can become any kind of container. + */ -extern inline void *container_iandnot(void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type); +int array_run_container_xor( +const array_container_t *src_1, const run_container_t *src_2, +container_t **dst +){ +// semi following Java XOR implementation as of May 2016 +// the C OR implementation works quite differently and can return a run +// container +// TODO could optimize for full run containers. + +// use of lazy following Java impl. +const int arbitrary_threshold = 32; +if (src_1->cardinality < arbitrary_threshold) { +run_container_t *ans = run_container_create(); +array_run_container_lazy_xor(src_1, src_2, ans); // keeps runs. +uint8_t typecode_after; +*dst = +convert_run_to_efficient_container_and_free(ans, &typecode_after); +return typecode_after; +} + +int card = run_container_cardinality(src_2); +if (card <= DEFAULT_MAX_SIZE) { +// Java implementation works with the array, xoring the run elements via +// iterator +array_container_t *temp = array_container_from_run(src_2); +bool ret_is_bitset = array_array_container_xor(temp, src_1, dst); +array_container_free(temp); +return ret_is_bitset ? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE; + +} else { // guess that it will end up as a bitset +bitset_container_t *result = bitset_container_from_run(src_2); +bool is_bitset = bitset_array_container_ixor(result, src_1, dst); +// any necessary type conversion has been done by the ixor +int retval = (is_bitset ? BITSET_CONTAINER_TYPE +: ARRAY_CONTAINER_TYPE); +return retval; +} +} -void container_free(void *container, uint8_t typecode) { - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - bitset_container_free((bitset_container_t *)container); - break; - case ARRAY_CONTAINER_TYPE_CODE: - array_container_free((array_container_t *)container); - break; - case RUN_CONTAINER_TYPE_CODE: - run_container_free((run_container_t *)container); - break; - case SHARED_CONTAINER_TYPE_CODE: - shared_container_free((shared_container_t *)container); - break; - default: - assert(false); - __builtin_unreachable(); - } +/* Dst is a valid run container. (Can it be src_2? Let's say not.) + * Leaves result as run container, even if other options are + * smaller. + */ + +void array_run_container_lazy_xor(const array_container_t *src_1, +const run_container_t *src_2, +run_container_t *dst) { +run_container_grow(dst, src_1->cardinality + src_2->n_runs, false); +int32_t rlepos = 0; +int32_t arraypos = 0; +dst->n_runs = 0; + +while ((rlepos < src_2->n_runs) && (arraypos < src_1->cardinality)) { +if (src_2->runs[rlepos].value <= src_1->array[arraypos]) { +run_container_smart_append_exclusive(dst, src_2->runs[rlepos].value, +src_2->runs[rlepos].length); +rlepos++; +} else { +run_container_smart_append_exclusive(dst, src_1->array[arraypos], +0); +arraypos++; +} +} +while (arraypos < src_1->cardinality) { +run_container_smart_append_exclusive(dst, src_1->array[arraypos], 0); +arraypos++; +} +while (rlepos < src_2->n_runs) { +run_container_smart_append_exclusive(dst, src_2->runs[rlepos].value, +src_2->runs[rlepos].length); +rlepos++; +} } -void container_printf(const void *container, uint8_t typecode) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - bitset_container_printf((const bitset_container_t *)container); - return; - case ARRAY_CONTAINER_TYPE_CODE: - array_container_printf((const array_container_t *)container); - return; - case RUN_CONTAINER_TYPE_CODE: - run_container_printf((const run_container_t *)container); - return; - default: - __builtin_unreachable(); - } +/* dst does not indicate a valid container initially. Eventually it + * can become any kind of container. + */ + +int run_run_container_xor( +const run_container_t *src_1, const run_container_t *src_2, +container_t **dst +){ +run_container_t *ans = run_container_create(); +run_container_xor(src_1, src_2, ans); +uint8_t typecode_after; +*dst = convert_run_to_efficient_container_and_free(ans, &typecode_after); +return typecode_after; } -void container_printf_as_uint32_array(const void *container, uint8_t typecode, - uint32_t base) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - bitset_container_printf_as_uint32_array( - (const bitset_container_t *)container, base); - return; - case ARRAY_CONTAINER_TYPE_CODE: - array_container_printf_as_uint32_array( - (const array_container_t *)container, base); - return; - case RUN_CONTAINER_TYPE_CODE: - run_container_printf_as_uint32_array( - (const run_container_t *)container, base); - return; - return; - default: - __builtin_unreachable(); - } +/* + * Java implementation (as of May 2016) for array_run, run_run + * and bitset_run don't do anything different for inplace. + * Could adopt the mixed_union.c approach instead (ie, using + * smart_append_exclusive) + * + */ + +bool array_array_container_xor( +const array_container_t *src_1, const array_container_t *src_2, +container_t **dst +){ +int totalCardinality = +src_1->cardinality + src_2->cardinality; // upper bound +if (totalCardinality <= DEFAULT_MAX_SIZE) { +*dst = array_container_create_given_capacity(totalCardinality); +array_container_xor(src_1, src_2, CAST_array(*dst)); +return false; // not a bitset +} +*dst = bitset_container_from_array(src_1); +bool returnval = true; // expect a bitset +bitset_container_t *ourbitset = CAST_bitset(*dst); +ourbitset->cardinality = (uint32_t)bitset_flip_list_withcard( +ourbitset->words, src_1->cardinality, src_2->array, src_2->cardinality); +if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) { +// need to convert! +*dst = array_container_from_bitset(ourbitset); +bitset_container_free(ourbitset); +returnval = false; // not going to be a bitset +} + +return returnval; +} + +bool array_array_container_lazy_xor( +const array_container_t *src_1, const array_container_t *src_2, +container_t **dst +){ +int totalCardinality = src_1->cardinality + src_2->cardinality; +// +// We assume that operations involving bitset containers will be faster than +// operations involving solely array containers, except maybe when array containers +// are small. Indeed, for example, it is cheap to compute the exclusive union between an array and +// a bitset container, generally more so than between a large array and another array. +// So it is advantageous to favour bitset containers during the computation. +// Of course, if we convert array containers eagerly to bitset containers, we may later +// need to revert the bitset containers to array containerr to satisfy the Roaring format requirements, +// but such one-time conversions at the end may not be overly expensive. We arrived to this design +// based on extensive benchmarking on unions. +// For XOR/exclusive union, we simply followed the heuristic used by the unions (see mixed_union.c). +// Further tuning is possible. +// +if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) { +*dst = array_container_create_given_capacity(totalCardinality); +if (*dst != NULL) +array_container_xor(src_1, src_2, CAST_array(*dst)); +return false; // not a bitset +} +*dst = bitset_container_from_array(src_1); +bool returnval = true; // expect a bitset (maybe, for XOR??) +if (*dst != NULL) { +bitset_container_t *ourbitset = CAST_bitset(*dst); +bitset_flip_list(ourbitset->words, src_2->array, src_2->cardinality); +ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY; +} +return returnval; } -int32_t container_serialize(const void *container, uint8_t typecode, - char *buf) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return (bitset_container_serialize((const bitset_container_t *)container, - buf)); - case ARRAY_CONTAINER_TYPE_CODE: - return ( - array_container_serialize((const array_container_t *)container, buf)); - case RUN_CONTAINER_TYPE_CODE: - return (run_container_serialize((const run_container_t *)container, buf)); - default: - assert(0); - __builtin_unreachable(); - return (-1); - } +/* Compute the xor of src_1 and src_2 and write the result to + * dst (which has no container initially). Return value is + * "dst is a bitset" + */ + +bool bitset_bitset_container_xor( +const bitset_container_t *src_1, const bitset_container_t *src_2, +container_t **dst +){ +bitset_container_t *ans = bitset_container_create(); +int card = bitset_container_xor(src_1, src_2, ans); +if (card <= DEFAULT_MAX_SIZE) { +*dst = array_container_from_bitset(ans); +bitset_container_free(ans); +return false; // not bitset +} else { +*dst = ans; +return true; +} } -uint32_t container_serialization_len(const void *container, uint8_t typecode) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_serialization_len(); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_serialization_len( - (const array_container_t *)container); - case RUN_CONTAINER_TYPE_CODE: - return run_container_serialization_len( - (const run_container_t *)container); - default: - assert(0); - __builtin_unreachable(); - return (0); - } +/* Compute the xor of src_1 and src_2 and write the result to + * dst (which has no container initially). It will modify src_1 + * to be dst if the result is a bitset. Otherwise, it will + * free src_1 and dst will be a new array container. In both + * cases, the caller is responsible for deallocating dst. + * Returns true iff dst is a bitset */ + +bool bitset_array_container_ixor( +bitset_container_t *src_1, const array_container_t *src_2, +container_t **dst +){ +*dst = src_1; +src_1->cardinality = (uint32_t)bitset_flip_list_withcard( +src_1->words, src_1->cardinality, src_2->array, src_2->cardinality); + +if (src_1->cardinality <= DEFAULT_MAX_SIZE) { +*dst = array_container_from_bitset(src_1); +bitset_container_free(src_1); +return false; // not bitset +} else +return true; +} + +/* a bunch of in-place, some of which may not *really* be inplace. + * TODO: write actual inplace routine if efficiency warrants it + * Anything inplace with a bitset is a good candidate + */ + +bool bitset_bitset_container_ixor( +bitset_container_t *src_1, const bitset_container_t *src_2, +container_t **dst +){ +int card = bitset_container_xor(src_1, src_2, src_1); +if (card <= DEFAULT_MAX_SIZE) { +*dst = array_container_from_bitset(src_1); +bitset_container_free(src_1); +return false; // not bitset +} else { +*dst = src_1; +return true; +} } -void *container_deserialize(uint8_t typecode, const char *buf, size_t buf_len) { - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return (bitset_container_deserialize(buf, buf_len)); - case ARRAY_CONTAINER_TYPE_CODE: - return (array_container_deserialize(buf, buf_len)); - case RUN_CONTAINER_TYPE_CODE: - return (run_container_deserialize(buf, buf_len)); - case SHARED_CONTAINER_TYPE_CODE: - printf("this should never happen.\n"); - assert(0); - __builtin_unreachable(); - return (NULL); - default: - assert(0); - __builtin_unreachable(); - return (NULL); - } +bool array_bitset_container_ixor( +array_container_t *src_1, const bitset_container_t *src_2, +container_t **dst +){ +bool ans = array_bitset_container_xor(src_1, src_2, dst); +array_container_free(src_1); +return ans; +} + +/* Compute the xor of src_1 and src_2 and write the result to + * dst. Result may be either a bitset or an array container + * (returns "result is bitset"). dst does not initially have + * any container, but becomes either a bitset container (return + * result true) or an array container. + */ + +bool run_bitset_container_ixor( +run_container_t *src_1, const bitset_container_t *src_2, +container_t **dst +){ +bool ans = run_bitset_container_xor(src_1, src_2, dst); +run_container_free(src_1); +return ans; } -extern inline bool container_nonzero_cardinality(const void *container, - uint8_t typecode); +bool bitset_run_container_ixor( +bitset_container_t *src_1, const run_container_t *src_2, +container_t **dst +){ +bool ans = run_bitset_container_xor(src_2, src_1, dst); +bitset_container_free(src_1); +return ans; +} +/* dst does not indicate a valid container initially. Eventually it + * can become any kind of container. + */ -extern inline int container_to_uint32_array(uint32_t *output, const void *container, - uint8_t typecode, uint32_t base); +int array_run_container_ixor( +array_container_t *src_1, const run_container_t *src_2, +container_t **dst +){ +int ans = array_run_container_xor(src_1, src_2, dst); +array_container_free(src_1); +return ans; +} -extern inline void *container_add(void *container, uint16_t val, uint8_t typecode, - uint8_t *new_typecode); +int run_array_container_ixor( +run_container_t *src_1, const array_container_t *src_2, +container_t **dst +){ +int ans = array_run_container_xor(src_2, src_1, dst); +run_container_free(src_1); +return ans; +} -extern inline bool container_contains(const void *container, uint16_t val, - uint8_t typecode); +bool array_array_container_ixor( +array_container_t *src_1, const array_container_t *src_2, +container_t **dst +){ +bool ans = array_array_container_xor(src_1, src_2, dst); +array_container_free(src_1); +return ans; +} -extern inline void *container_clone(const void *container, uint8_t typecode); +int run_run_container_ixor( +run_container_t *src_1, const run_container_t *src_2, +container_t **dst +){ +int ans = run_run_container_xor(src_1, src_2, dst); +run_container_free(src_1); +return ans; +} -extern inline void *container_and(const void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type); +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace internal { +#endif +/* end file src/containers/mixed_xor.c */ +/* begin file src/containers/run.c */ +#include +#include -extern inline void *container_or(const void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type); -extern inline void *container_xor(const void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type); +#if CROARING_IS_X64 +#ifndef CROARING_COMPILER_SUPPORTS_AVX512 +#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined." +#endif // CROARING_COMPILER_SUPPORTS_AVX512 +#endif -void *get_copy_of_container(void *container, uint8_t *typecode, - bool copy_on_write) { - if (copy_on_write) { - shared_container_t *shared_container; - if (*typecode == SHARED_CONTAINER_TYPE_CODE) { - shared_container = (shared_container_t *)container; - shared_container->counter += 1; - return shared_container; - } - assert(*typecode != SHARED_CONTAINER_TYPE_CODE); +#ifdef __cplusplus +extern "C" { namespace roaring { namespace internal { +#endif - if ((shared_container = (shared_container_t *)malloc( - sizeof(shared_container_t))) == NULL) { - return NULL; - } +extern inline uint16_t run_container_minimum(const run_container_t *run); +extern inline uint16_t run_container_maximum(const run_container_t *run); +extern inline int32_t interleavedBinarySearch(const rle16_t *array, +int32_t lenarray, uint16_t ikey); +extern inline bool run_container_contains(const run_container_t *run, +uint16_t pos); +extern inline int run_container_index_equalorlarger(const run_container_t *arr, uint16_t x); +extern inline bool run_container_is_full(const run_container_t *run); +extern inline bool run_container_nonzero_cardinality(const run_container_t *rc); +extern inline int32_t run_container_serialized_size_in_bytes(int32_t num_runs); +extern inline run_container_t *run_container_create_range(uint32_t start, +uint32_t stop); +extern inline int run_container_cardinality(const run_container_t *run); - shared_container->container = container; - shared_container->typecode = *typecode; - shared_container->counter = 2; - *typecode = SHARED_CONTAINER_TYPE_CODE; +bool run_container_add(run_container_t *run, uint16_t pos) { +int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos); +if (index >= 0) return false; // already there +index = -index - 2; // points to preceding value, possibly -1 +if (index >= 0) { // possible match +int32_t offset = pos - run->runs[index].value; +int32_t le = run->runs[index].length; +if (offset <= le) return false; // already there +if (offset == le + 1) { +// we may need to fuse +if (index + 1 < run->n_runs) { +if (run->runs[index + 1].value == pos + 1) { +// indeed fusion is needed +run->runs[index].length = run->runs[index + 1].value + +run->runs[index + 1].length - +run->runs[index].value; +recoverRoomAtIndex(run, (uint16_t)(index + 1)); +return true; +} +} +run->runs[index].length++; +return true; +} +if (index + 1 < run->n_runs) { +// we may need to fuse +if (run->runs[index + 1].value == pos + 1) { +// indeed fusion is needed +run->runs[index + 1].value = pos; +run->runs[index + 1].length = run->runs[index + 1].length + 1; +return true; +} +} +} +if (index == -1) { +// we may need to extend the first run +if (0 < run->n_runs) { +if (run->runs[0].value == pos + 1) { +run->runs[0].length++; +run->runs[0].value--; +return true; +} +} +} +makeRoomAtIndex(run, (uint16_t)(index + 1)); +run->runs[index + 1].value = pos; +run->runs[index + 1].length = 0; +return true; +} - return shared_container; - } // copy_on_write - // otherwise, no copy on write... - const void *actualcontainer = - container_unwrap_shared((const void *)container, typecode); - assert(*typecode != SHARED_CONTAINER_TYPE_CODE); - return container_clone(actualcontainer, *typecode); +/* Create a new run container. Return NULL in case of failure. */ +run_container_t *run_container_create_given_capacity(int32_t size) { +run_container_t *run; +/* Allocate the run container itself. */ +if ((run = (run_container_t *)roaring_malloc(sizeof(run_container_t))) == NULL) { +return NULL; } -/** - * Copies a container, requires a typecode. This allocates new memory, caller - * is responsible for deallocation. - */ -void *container_clone(const void *container, uint8_t typecode) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_clone((const bitset_container_t *)container); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_clone((const array_container_t *)container); - case RUN_CONTAINER_TYPE_CODE: - return run_container_clone((const run_container_t *)container); - case SHARED_CONTAINER_TYPE_CODE: - printf("shared containers are not cloneable\n"); - assert(false); - return NULL; - default: - assert(false); - __builtin_unreachable(); - return NULL; - } +if (size <= 0 ) { // we don't want to rely on malloc(0) +run->runs = NULL; +} else if ((run->runs = (rle16_t *)roaring_malloc(sizeof(rle16_t) * size)) == NULL) { +roaring_free(run); +return NULL; } - -void *shared_container_extract_copy(shared_container_t *container, - uint8_t *typecode) { - assert(container->counter > 0); - assert(container->typecode != SHARED_CONTAINER_TYPE_CODE); - container->counter--; - *typecode = container->typecode; - void *answer; - if (container->counter == 0) { - answer = container->container; - container->container = NULL; // paranoid - free(container); - } else { - answer = container_clone(container->container, *typecode); - } - assert(*typecode != SHARED_CONTAINER_TYPE_CODE); - return answer; +run->capacity = size; +run->n_runs = 0; +return run; } -void shared_container_free(shared_container_t *container) { - assert(container->counter > 0); - container->counter--; - if (container->counter == 0) { - assert(container->typecode != SHARED_CONTAINER_TYPE_CODE); - container_free(container->container, container->typecode); - container->container = NULL; // paranoid - free(container); - } +int run_container_shrink_to_fit(run_container_t *src) { +if (src->n_runs == src->capacity) return 0; // nothing to do +int savings = src->capacity - src->n_runs; +src->capacity = src->n_runs; +rle16_t *oldruns = src->runs; +src->runs = (rle16_t *)roaring_realloc(oldruns, src->capacity * sizeof(rle16_t)); +if (src->runs == NULL) roaring_free(oldruns); // should never happen? +return savings; +} +/* Create a new run container. Return NULL in case of failure. */ +run_container_t *run_container_create(void) { +return run_container_create_given_capacity(RUN_DEFAULT_INIT_SIZE); } -extern inline void *container_not(const void *c1, uint8_t type1, uint8_t *result_type); - -extern inline void *container_not_range(const void *c1, uint8_t type1, - uint32_t range_start, uint32_t range_end, - uint8_t *result_type); - -extern inline void *container_inot(void *c1, uint8_t type1, uint8_t *result_type); - -extern inline void *container_inot_range(void *c1, uint8_t type1, uint32_t range_start, - uint32_t range_end, uint8_t *result_type); +run_container_t *run_container_clone(const run_container_t *src) { +run_container_t *run = run_container_create_given_capacity(src->capacity); +if (run == NULL) return NULL; +run->capacity = src->capacity; +run->n_runs = src->n_runs; +memcpy(run->runs, src->runs, src->n_runs * sizeof(rle16_t)); +return run; +} -extern inline void *container_range_of_ones(uint32_t range_start, uint32_t range_end, - uint8_t *result_type); +void run_container_offset(const run_container_t *c, +container_t **loc, container_t **hic, +uint16_t offset) { +run_container_t *lo = NULL, *hi = NULL; -// where are the correponding things for union and intersection?? -extern inline void *container_lazy_xor(const void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type); +bool split; +int lo_cap, hi_cap; +int top, pivot; -extern inline void *container_lazy_ixor(void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type); +top = (1 << 16) - offset; +pivot = run_container_index_equalorlarger(c, top); -extern inline void *container_andnot(const void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type); -/* end file src/containers/containers.c */ -/* begin file src/containers/convert.c */ -#include +if (pivot == -1) { +split = false; +lo_cap = c->n_runs; +hi_cap = 0; +} else { +split = c->runs[pivot].value < top; +lo_cap = pivot + (split ? 1 : 0); +hi_cap = c->n_runs - pivot; +} +if (loc && lo_cap) { +lo = run_container_create_given_capacity(lo_cap); +memcpy(lo->runs, c->runs, lo_cap*sizeof(rle16_t)); +lo->n_runs = lo_cap; +for (int i = 0; i < lo_cap; ++i) { +lo->runs[i].value += offset; +} +*loc = (container_t*)lo; +} -// file contains grubby stuff that must know impl. details of all container -// types. -bitset_container_t *bitset_container_from_array(const array_container_t *a) { - bitset_container_t *ans = bitset_container_create(); - int limit = array_container_cardinality(a); - for (int i = 0; i < limit; ++i) bitset_container_set(ans, a->array[i]); - return ans; +if (hic && hi_cap) { +hi = run_container_create_given_capacity(hi_cap); +memcpy(hi->runs, c->runs+pivot, hi_cap*sizeof(rle16_t)); +hi->n_runs = hi_cap; +for (int i = 0; i < hi_cap; ++i) { +hi->runs[i].value += offset; +} +*hic = (container_t*)hi; } -bitset_container_t *bitset_container_from_run(const run_container_t *arr) { - int card = run_container_cardinality(arr); - bitset_container_t *answer = bitset_container_create(); - for (int rlepos = 0; rlepos < arr->n_runs; ++rlepos) { - rle16_t vl = arr->runs[rlepos]; - bitset_set_lenrange(answer->array, vl.value, vl.length); - } - answer->cardinality = card; - return answer; +// Fix the split. +if (split) { +if (lo != NULL) { +// Add the missing run to 'lo', exhausting length. +lo->runs[lo->n_runs-1].length = (1 << 16) - lo->runs[lo->n_runs-1].value - 1; } -array_container_t *array_container_from_run(const run_container_t *arr) { - array_container_t *answer = - array_container_create_given_capacity(run_container_cardinality(arr)); - answer->cardinality = 0; - for (int rlepos = 0; rlepos < arr->n_runs; ++rlepos) { - int run_start = arr->runs[rlepos].value; - int run_end = run_start + arr->runs[rlepos].length; - - for (int run_value = run_start; run_value <= run_end; ++run_value) { - answer->array[answer->cardinality++] = (uint16_t)run_value; - } - } - return answer; +if (hi != NULL) { +// Fix the first run in 'hi'. +hi->runs[0].length -= UINT16_MAX - hi->runs[0].value + 1; +hi->runs[0].value = 0; +} +} } -array_container_t *array_container_from_bitset(const bitset_container_t *bits) { - array_container_t *result = - array_container_create_given_capacity(bits->cardinality); - result->cardinality = bits->cardinality; - // sse version ends up being slower here - // (bitset_extract_setbits_sse_uint16) - // because of the sparsity of the data - bitset_extract_setbits_uint16(bits->array, BITSET_CONTAINER_SIZE_IN_WORDS, - result->array, 0); - return result; +/* Free memory. */ +void run_container_free(run_container_t *run) { +if(run->runs != NULL) {// Jon Strabala reports that some tools complain otherwise +roaring_free(run->runs); +run->runs = NULL; // pedantic +} +roaring_free(run); } -/* assumes that container has adequate space. Run from [s,e] (inclusive) */ -static void add_run(run_container_t *r, int s, int e) { - r->runs[r->n_runs].value = s; - r->runs[r->n_runs].length = e - s; - r->n_runs++; +void run_container_grow(run_container_t *run, int32_t min, bool copy) { +int32_t newCapacity = +(run->capacity == 0) +? RUN_DEFAULT_INIT_SIZE +: run->capacity < 64 ? run->capacity * 2 +: run->capacity < 1024 ? run->capacity * 3 / 2 +: run->capacity * 5 / 4; +if (newCapacity < min) newCapacity = min; +run->capacity = newCapacity; +assert(run->capacity >= min); +if (copy) { +rle16_t *oldruns = run->runs; +run->runs = +(rle16_t *)roaring_realloc(oldruns, run->capacity * sizeof(rle16_t)); +if (run->runs == NULL) roaring_free(oldruns); +} else { +// Jon Strabala reports that some tools complain otherwise +if (run->runs != NULL) { +roaring_free(run->runs); +} +run->runs = (rle16_t *)roaring_malloc(run->capacity * sizeof(rle16_t)); +} +// We may have run->runs == NULL. } -run_container_t *run_container_from_array(const array_container_t *c) { - int32_t n_runs = array_container_number_of_runs(c); - run_container_t *answer = run_container_create_given_capacity(n_runs); - int prev = -2; - int run_start = -1; - int32_t card = c->cardinality; - if (card == 0) return answer; - for (int i = 0; i < card; ++i) { - const uint16_t cur_val = c->array[i]; - if (cur_val != prev + 1) { - // new run starts; flush old one, if any - if (run_start != -1) add_run(answer, run_start, prev); - run_start = cur_val; - } - prev = c->array[i]; - } - // now prev is the last seen value - add_run(answer, run_start, prev); - // assert(run_container_cardinality(answer) == c->cardinality); - return answer; +/* copy one container into another */ +void run_container_copy(const run_container_t *src, run_container_t *dst) { +const int32_t n_runs = src->n_runs; +if (src->n_runs > dst->capacity) { +run_container_grow(dst, n_runs, false); +} +dst->n_runs = n_runs; +memcpy(dst->runs, src->runs, sizeof(rle16_t) * n_runs); } -/** - * Convert the runcontainer to either a Bitmap or an Array Container, depending - * on the cardinality. Frees the container. - * Allocates and returns new container, which caller is responsible for freeing. - * It does not free the run container. - */ +/* Compute the union of `src_1' and `src_2' and write the result to `dst' + * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ +void run_container_union(const run_container_t *src_1, +const run_container_t *src_2, run_container_t *dst) { +// TODO: this could be a lot more efficient -void *convert_to_bitset_or_array_container(run_container_t *r, int32_t card, - uint8_t *resulttype) { - if (card <= DEFAULT_MAX_SIZE) { - array_container_t *answer = array_container_create_given_capacity(card); - answer->cardinality = 0; - for (int rlepos = 0; rlepos < r->n_runs; ++rlepos) { - uint16_t run_start = r->runs[rlepos].value; - uint16_t run_end = run_start + r->runs[rlepos].length; - for (uint16_t run_value = run_start; run_value <= run_end; - ++run_value) { - answer->array[answer->cardinality++] = run_value; - } - } - assert(card == answer->cardinality); - *resulttype = ARRAY_CONTAINER_TYPE_CODE; - //run_container_free(r); - return answer; - } - bitset_container_t *answer = bitset_container_create(); - for (int rlepos = 0; rlepos < r->n_runs; ++rlepos) { - uint16_t run_start = r->runs[rlepos].value; - bitset_set_lenrange(answer->array, run_start, r->runs[rlepos].length); - } - answer->cardinality = card; - *resulttype = BITSET_CONTAINER_TYPE_CODE; - //run_container_free(r); - return answer; +// we start out with inexpensive checks +const bool if1 = run_container_is_full(src_1); +const bool if2 = run_container_is_full(src_2); +if (if1 || if2) { +if (if1) { +run_container_copy(src_1, dst); +return; } +if (if2) { +run_container_copy(src_2, dst); +return; +} +} +const int32_t neededcapacity = src_1->n_runs + src_2->n_runs; +if (dst->capacity < neededcapacity) +run_container_grow(dst, neededcapacity, false); +dst->n_runs = 0; +int32_t rlepos = 0; +int32_t xrlepos = 0; -/* Converts a run container to either an array or a bitset, IF it saves space. - */ -/* If a conversion occurs, the caller is responsible to free the original - * container and - * he becomes responsible to free the new one. */ -void *convert_run_to_efficient_container(run_container_t *c, - uint8_t *typecode_after) { - int32_t size_as_run_container = - run_container_serialized_size_in_bytes(c->n_runs); - - int32_t size_as_bitset_container = - bitset_container_serialized_size_in_bytes(); - int32_t card = run_container_cardinality(c); - int32_t size_as_array_container = - array_container_serialized_size_in_bytes(card); - - int32_t min_size_non_run = - size_as_bitset_container < size_as_array_container - ? size_as_bitset_container - : size_as_array_container; - if (size_as_run_container <= min_size_non_run) { // no conversion - *typecode_after = RUN_CONTAINER_TYPE_CODE; - return c; - } - if (card <= DEFAULT_MAX_SIZE) { - // to array - array_container_t *answer = array_container_create_given_capacity(card); - answer->cardinality = 0; - for (int rlepos = 0; rlepos < c->n_runs; ++rlepos) { - int run_start = c->runs[rlepos].value; - int run_end = run_start + c->runs[rlepos].length; - - for (int run_value = run_start; run_value <= run_end; ++run_value) { - answer->array[answer->cardinality++] = (uint16_t)run_value; - } - } - *typecode_after = ARRAY_CONTAINER_TYPE_CODE; - return answer; - } - - // else to bitset - bitset_container_t *answer = bitset_container_create(); +rle16_t previousrle; +if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) { +previousrle = run_container_append_first(dst, src_1->runs[rlepos]); +rlepos++; +} else { +previousrle = run_container_append_first(dst, src_2->runs[xrlepos]); +xrlepos++; +} - for (int rlepos = 0; rlepos < c->n_runs; ++rlepos) { - int start = c->runs[rlepos].value; - int end = start + c->runs[rlepos].length; - bitset_set_range(answer->array, start, end + 1); - } - answer->cardinality = card; - *typecode_after = BITSET_CONTAINER_TYPE_CODE; - return answer; +while ((xrlepos < src_2->n_runs) && (rlepos < src_1->n_runs)) { +rle16_t newrl; +if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) { +newrl = src_1->runs[rlepos]; +rlepos++; +} else { +newrl = src_2->runs[xrlepos]; +xrlepos++; +} +run_container_append(dst, newrl, &previousrle); +} +while (xrlepos < src_2->n_runs) { +run_container_append(dst, src_2->runs[xrlepos], &previousrle); +xrlepos++; +} +while (rlepos < src_1->n_runs) { +run_container_append(dst, src_1->runs[rlepos], &previousrle); +rlepos++; +} } -// like convert_run_to_efficient_container but frees the old result if needed -void *convert_run_to_efficient_container_and_free(run_container_t *c, - uint8_t *typecode_after) { - void *answer = convert_run_to_efficient_container(c, typecode_after); - if (answer != c) run_container_free(c); - return answer; +/* Compute the union of `src_1' and `src_2' and write the result to `src_1' + */ +void run_container_union_inplace(run_container_t *src_1, +const run_container_t *src_2) { +// TODO: this could be a lot more efficient + +// we start out with inexpensive checks +const bool if1 = run_container_is_full(src_1); +const bool if2 = run_container_is_full(src_2); +if (if1 || if2) { +if (if1) { +return; +} +if (if2) { +run_container_copy(src_2, src_1); +return; +} +} +// we move the data to the end of the current array +const int32_t maxoutput = src_1->n_runs + src_2->n_runs; +const int32_t neededcapacity = maxoutput + src_1->n_runs; +if (src_1->capacity < neededcapacity) +run_container_grow(src_1, neededcapacity, true); +memmove(src_1->runs + maxoutput, src_1->runs, +src_1->n_runs * sizeof(rle16_t)); +rle16_t *inputsrc1 = src_1->runs + maxoutput; +const int32_t input1nruns = src_1->n_runs; +src_1->n_runs = 0; +int32_t rlepos = 0; +int32_t xrlepos = 0; + +rle16_t previousrle; +if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) { +previousrle = run_container_append_first(src_1, inputsrc1[rlepos]); +rlepos++; +} else { +previousrle = run_container_append_first(src_1, src_2->runs[xrlepos]); +xrlepos++; +} +while ((xrlepos < src_2->n_runs) && (rlepos < input1nruns)) { +rle16_t newrl; +if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) { +newrl = inputsrc1[rlepos]; +rlepos++; +} else { +newrl = src_2->runs[xrlepos]; +xrlepos++; +} +run_container_append(src_1, newrl, &previousrle); +} +while (xrlepos < src_2->n_runs) { +run_container_append(src_1, src_2->runs[xrlepos], &previousrle); +xrlepos++; +} +while (rlepos < input1nruns) { +run_container_append(src_1, inputsrc1[rlepos], &previousrle); +rlepos++; +} } -/* once converted, the original container is disposed here, rather than - in roaring_array -*/ +/* Compute the symmetric difference of `src_1' and `src_2' and write the result + * to `dst' + * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ +void run_container_xor(const run_container_t *src_1, +const run_container_t *src_2, run_container_t *dst) { +// don't bother to convert xor with full range into negation +// since negation is implemented similarly -// TODO: split into run- array- and bitset- subfunctions for sanity; -// a few function calls won't really matter. +const int32_t neededcapacity = src_1->n_runs + src_2->n_runs; +if (dst->capacity < neededcapacity) +run_container_grow(dst, neededcapacity, false); -void *convert_run_optimize(void *c, uint8_t typecode_original, - uint8_t *typecode_after) { - if (typecode_original == RUN_CONTAINER_TYPE_CODE) { - void *newc = convert_run_to_efficient_container((run_container_t *)c, - typecode_after); - if (newc != c) { - container_free(c, typecode_original); - } - return newc; - } else if (typecode_original == ARRAY_CONTAINER_TYPE_CODE) { - // it might need to be converted to a run container. - array_container_t *c_qua_array = (array_container_t *)c; - int32_t n_runs = array_container_number_of_runs(c_qua_array); - int32_t size_as_run_container = - run_container_serialized_size_in_bytes(n_runs); - int32_t card = array_container_cardinality(c_qua_array); - int32_t size_as_array_container = - array_container_serialized_size_in_bytes(card); - - if (size_as_run_container >= size_as_array_container) { - *typecode_after = ARRAY_CONTAINER_TYPE_CODE; - return c; - } - // else convert array to run container - run_container_t *answer = run_container_create_given_capacity(n_runs); - int prev = -2; - int run_start = -1; - - assert(card > 0); - for (int i = 0; i < card; ++i) { - uint16_t cur_val = c_qua_array->array[i]; - if (cur_val != prev + 1) { - // new run starts; flush old one, if any - if (run_start != -1) add_run(answer, run_start, prev); - run_start = cur_val; - } - prev = c_qua_array->array[i]; - } - assert(run_start >= 0); - // now prev is the last seen value - add_run(answer, run_start, prev); - *typecode_after = RUN_CONTAINER_TYPE_CODE; - array_container_free(c_qua_array); - return answer; - } else if (typecode_original == - BITSET_CONTAINER_TYPE_CODE) { // run conversions on bitset - // does bitset need conversion to run? - bitset_container_t *c_qua_bitset = (bitset_container_t *)c; - int32_t n_runs = bitset_container_number_of_runs(c_qua_bitset); - int32_t size_as_run_container = - run_container_serialized_size_in_bytes(n_runs); - int32_t size_as_bitset_container = - bitset_container_serialized_size_in_bytes(); - - if (size_as_bitset_container <= size_as_run_container) { - // no conversion needed. - *typecode_after = BITSET_CONTAINER_TYPE_CODE; - return c; - } - // bitset to runcontainer (ported from Java RunContainer( - // BitmapContainer bc, int nbrRuns)) - assert(n_runs > 0); // no empty bitmaps - run_container_t *answer = run_container_create_given_capacity(n_runs); - - int long_ctr = 0; - uint64_t cur_word = c_qua_bitset->array[0]; - int run_count = 0; - while (true) { - while (cur_word == UINT64_C(0) && - long_ctr < BITSET_CONTAINER_SIZE_IN_WORDS - 1) - cur_word = c_qua_bitset->array[++long_ctr]; - - if (cur_word == UINT64_C(0)) { - bitset_container_free(c_qua_bitset); - *typecode_after = RUN_CONTAINER_TYPE_CODE; - return answer; - } - - int local_run_start = __builtin_ctzll(cur_word); - int run_start = local_run_start + 64 * long_ctr; - uint64_t cur_word_with_1s = cur_word | (cur_word - 1); - - int run_end = 0; - while (cur_word_with_1s == UINT64_C(0xFFFFFFFFFFFFFFFF) && - long_ctr < BITSET_CONTAINER_SIZE_IN_WORDS - 1) - cur_word_with_1s = c_qua_bitset->array[++long_ctr]; - - if (cur_word_with_1s == UINT64_C(0xFFFFFFFFFFFFFFFF)) { - run_end = 64 + long_ctr * 64; // exclusive, I guess - add_run(answer, run_start, run_end - 1); - bitset_container_free(c_qua_bitset); - *typecode_after = RUN_CONTAINER_TYPE_CODE; - return answer; - } - int local_run_end = __builtin_ctzll(~cur_word_with_1s); - run_end = local_run_end + long_ctr * 64; - add_run(answer, run_start, run_end - 1); - run_count++; - cur_word = cur_word_with_1s & (cur_word_with_1s + 1); - } - return answer; - } else { - assert(false); - __builtin_unreachable(); - return NULL; - } +int32_t pos1 = 0; +int32_t pos2 = 0; +dst->n_runs = 0; + +while ((pos1 < src_1->n_runs) && (pos2 < src_2->n_runs)) { +if (src_1->runs[pos1].value <= src_2->runs[pos2].value) { +run_container_smart_append_exclusive(dst, src_1->runs[pos1].value, +src_1->runs[pos1].length); +pos1++; +} else { +run_container_smart_append_exclusive(dst, src_2->runs[pos2].value, +src_2->runs[pos2].length); +pos2++; +} +} +while (pos1 < src_1->n_runs) { +run_container_smart_append_exclusive(dst, src_1->runs[pos1].value, +src_1->runs[pos1].length); +pos1++; } -bitset_container_t *bitset_container_from_run_range(const run_container_t *run, - uint32_t min, uint32_t max) { - bitset_container_t *bitset = bitset_container_create(); - int32_t union_cardinality = 0; - for (int32_t i = 0; i < run->n_runs; ++i) { - uint32_t rle_min = run->runs[i].value; - uint32_t rle_max = rle_min + run->runs[i].length; - bitset_set_lenrange(bitset->array, rle_min, rle_max - rle_min); - union_cardinality += run->runs[i].length + 1; - } - union_cardinality += max - min + 1; - union_cardinality -= bitset_lenrange_cardinality(bitset->array, min, max-min); - bitset_set_lenrange(bitset->array, min, max - min); - bitset->cardinality = union_cardinality; - return bitset; +while (pos2 < src_2->n_runs) { +run_container_smart_append_exclusive(dst, src_2->runs[pos2].value, +src_2->runs[pos2].length); +pos2++; +} } -/* end file src/containers/convert.c */ -/* begin file src/containers/mixed_andnot.c */ -/* - * mixed_andnot.c. More methods since operation is not symmetric, - * except no "wide" andnot , so no lazy options motivated. - */ -#include -#include +/* Compute the intersection of src_1 and src_2 and write the result to + * dst. It is assumed that dst is distinct from both src_1 and src_2. */ +void run_container_intersection(const run_container_t *src_1, +const run_container_t *src_2, +run_container_t *dst) { +const bool if1 = run_container_is_full(src_1); +const bool if2 = run_container_is_full(src_2); +if (if1 || if2) { +if (if1) { +run_container_copy(src_2, dst); +return; +} +if (if2) { +run_container_copy(src_1, dst); +return; +} +} +// TODO: this could be a lot more efficient, could use SIMD optimizations +const int32_t neededcapacity = src_1->n_runs + src_2->n_runs; +if (dst->capacity < neededcapacity) +run_container_grow(dst, neededcapacity, false); +dst->n_runs = 0; +int32_t rlepos = 0; +int32_t xrlepos = 0; +int32_t start = src_1->runs[rlepos].value; +int32_t end = start + src_1->runs[rlepos].length + 1; +int32_t xstart = src_2->runs[xrlepos].value; +int32_t xend = xstart + src_2->runs[xrlepos].length + 1; +while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) { +if (end <= xstart) { +++rlepos; +if (rlepos < src_1->n_runs) { +start = src_1->runs[rlepos].value; +end = start + src_1->runs[rlepos].length + 1; +} +} else if (xend <= start) { +++xrlepos; +if (xrlepos < src_2->n_runs) { +xstart = src_2->runs[xrlepos].value; +xend = xstart + src_2->runs[xrlepos].length + 1; +} +} else { // they overlap +const int32_t lateststart = start > xstart ? start : xstart; +int32_t earliestend; +if (end == xend) { // improbable +earliestend = end; +rlepos++; +xrlepos++; +if (rlepos < src_1->n_runs) { +start = src_1->runs[rlepos].value; +end = start + src_1->runs[rlepos].length + 1; +} +if (xrlepos < src_2->n_runs) { +xstart = src_2->runs[xrlepos].value; +xend = xstart + src_2->runs[xrlepos].length + 1; +} +} else if (end < xend) { +earliestend = end; +rlepos++; +if (rlepos < src_1->n_runs) { +start = src_1->runs[rlepos].value; +end = start + src_1->runs[rlepos].length + 1; +} + +} else { // end > xend +earliestend = xend; +xrlepos++; +if (xrlepos < src_2->n_runs) { +xstart = src_2->runs[xrlepos].value; +xend = xstart + src_2->runs[xrlepos].length + 1; +} +} +dst->runs[dst->n_runs].value = (uint16_t)lateststart; +dst->runs[dst->n_runs].length = +(uint16_t)(earliestend - lateststart - 1); +dst->n_runs++; +} +} +} +/* Compute the size of the intersection of src_1 and src_2 . */ +int run_container_intersection_cardinality(const run_container_t *src_1, +const run_container_t *src_2) { +const bool if1 = run_container_is_full(src_1); +const bool if2 = run_container_is_full(src_2); +if (if1 || if2) { +if (if1) { +return run_container_cardinality(src_2); +} +if (if2) { +return run_container_cardinality(src_1); +} +} +int answer = 0; +int32_t rlepos = 0; +int32_t xrlepos = 0; +int32_t start = src_1->runs[rlepos].value; +int32_t end = start + src_1->runs[rlepos].length + 1; +int32_t xstart = src_2->runs[xrlepos].value; +int32_t xend = xstart + src_2->runs[xrlepos].length + 1; +while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) { +if (end <= xstart) { +++rlepos; +if (rlepos < src_1->n_runs) { +start = src_1->runs[rlepos].value; +end = start + src_1->runs[rlepos].length + 1; +} +} else if (xend <= start) { +++xrlepos; +if (xrlepos < src_2->n_runs) { +xstart = src_2->runs[xrlepos].value; +xend = xstart + src_2->runs[xrlepos].length + 1; +} +} else { // they overlap +const int32_t lateststart = start > xstart ? start : xstart; +int32_t earliestend; +if (end == xend) { // improbable +earliestend = end; +rlepos++; +xrlepos++; +if (rlepos < src_1->n_runs) { +start = src_1->runs[rlepos].value; +end = start + src_1->runs[rlepos].length + 1; +} +if (xrlepos < src_2->n_runs) { +xstart = src_2->runs[xrlepos].value; +xend = xstart + src_2->runs[xrlepos].length + 1; +} +} else if (end < xend) { +earliestend = end; +rlepos++; +if (rlepos < src_1->n_runs) { +start = src_1->runs[rlepos].value; +end = start + src_1->runs[rlepos].length + 1; +} + +} else { // end > xend +earliestend = xend; +xrlepos++; +if (xrlepos < src_2->n_runs) { +xstart = src_2->runs[xrlepos].value; +xend = xstart + src_2->runs[xrlepos].length + 1; +} +} +answer += earliestend - lateststart; +} +} +return answer; +} -/* Compute the andnot of src_1 and src_2 and write the result to - * dst, a valid array container that could be the same as dst.*/ -void array_bitset_container_andnot(const array_container_t *src_1, - const bitset_container_t *src_2, - array_container_t *dst) { - // follows Java implementation as of June 2016 - if (dst->capacity < src_1->cardinality) { - array_container_grow(dst, src_1->cardinality, false); - } - int32_t newcard = 0; - const int32_t origcard = src_1->cardinality; - for (int i = 0; i < origcard; ++i) { - uint16_t key = src_1->array[i]; - dst->array[newcard] = key; - newcard += 1 - bitset_container_contains(src_2, key); - } - dst->cardinality = newcard; +bool run_container_intersect(const run_container_t *src_1, +const run_container_t *src_2) { +const bool if1 = run_container_is_full(src_1); +const bool if2 = run_container_is_full(src_2); +if (if1 || if2) { +if (if1) { +return !run_container_empty(src_2); +} +if (if2) { +return !run_container_empty(src_1); +} +} +int32_t rlepos = 0; +int32_t xrlepos = 0; +int32_t start = src_1->runs[rlepos].value; +int32_t end = start + src_1->runs[rlepos].length + 1; +int32_t xstart = src_2->runs[xrlepos].value; +int32_t xend = xstart + src_2->runs[xrlepos].length + 1; +while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) { +if (end <= xstart) { +++rlepos; +if (rlepos < src_1->n_runs) { +start = src_1->runs[rlepos].value; +end = start + src_1->runs[rlepos].length + 1; +} +} else if (xend <= start) { +++xrlepos; +if (xrlepos < src_2->n_runs) { +xstart = src_2->runs[xrlepos].value; +xend = xstart + src_2->runs[xrlepos].length + 1; +} +} else { // they overlap +return true; +} +} +return false; } -/* Compute the andnot of src_1 and src_2 and write the result to - * src_1 */ -void array_bitset_container_iandnot(array_container_t *src_1, - const bitset_container_t *src_2) { - array_bitset_container_andnot(src_1, src_2, src_1); -} +/* Compute the difference of src_1 and src_2 and write the result to + * dst. It is assumed that dst is distinct from both src_1 and src_2. */ +void run_container_andnot(const run_container_t *src_1, +const run_container_t *src_2, run_container_t *dst) { +// following Java implementation as of June 2016 -/* Compute the andnot of src_1 and src_2 and write the result to - * dst, which does not initially have a valid container. - * Return true for a bitset result; false for array - */ +if (dst->capacity < src_1->n_runs + src_2->n_runs) +run_container_grow(dst, src_1->n_runs + src_2->n_runs, false); -bool bitset_array_container_andnot(const bitset_container_t *src_1, - const array_container_t *src_2, void **dst) { - // Java did this directly, but we have option of asm or avx - bitset_container_t *result = bitset_container_create(); - bitset_container_copy(src_1, result); - result->cardinality = - (int32_t)bitset_clear_list(result->array, (uint64_t)result->cardinality, - src_2->array, (uint64_t)src_2->cardinality); - - // do required type conversions. - if (result->cardinality <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(result); - bitset_container_free(result); - return false; - } - *dst = result; - return true; -} +dst->n_runs = 0; -/* Compute the andnot of src_1 and src_2 and write the result to - * dst (which has no container initially). It will modify src_1 - * to be dst if the result is a bitset. Otherwise, it will - * free src_1 and dst will be a new array container. In both - * cases, the caller is responsible for deallocating dst. - * Returns true iff dst is a bitset */ +int rlepos1 = 0; +int rlepos2 = 0; +int32_t start = src_1->runs[rlepos1].value; +int32_t end = start + src_1->runs[rlepos1].length + 1; +int32_t start2 = src_2->runs[rlepos2].value; +int32_t end2 = start2 + src_2->runs[rlepos2].length + 1; -bool bitset_array_container_iandnot(bitset_container_t *src_1, - const array_container_t *src_2, - void **dst) { - *dst = src_1; - src_1->cardinality = - (int32_t)bitset_clear_list(src_1->array, (uint64_t)src_1->cardinality, - src_2->array, (uint64_t)src_2->cardinality); +while ((rlepos1 < src_1->n_runs) && (rlepos2 < src_2->n_runs)) { +if (end <= start2) { +// output the first run +dst->runs[dst->n_runs++] = MAKE_RLE16(start, end - start - 1); +rlepos1++; +if (rlepos1 < src_1->n_runs) { +start = src_1->runs[rlepos1].value; +end = start + src_1->runs[rlepos1].length + 1; +} +} else if (end2 <= start) { +// exit the second run +rlepos2++; +if (rlepos2 < src_2->n_runs) { +start2 = src_2->runs[rlepos2].value; +end2 = start2 + src_2->runs[rlepos2].length + 1; +} +} else { +if (start < start2) { +dst->runs[dst->n_runs++] = +MAKE_RLE16(start, start2 - start - 1); +} +if (end2 < end) { +start = end2; +} else { +rlepos1++; +if (rlepos1 < src_1->n_runs) { +start = src_1->runs[rlepos1].value; +end = start + src_1->runs[rlepos1].length + 1; +} +} +} +} +if (rlepos1 < src_1->n_runs) { +dst->runs[dst->n_runs++] = MAKE_RLE16(start, end - start - 1); +rlepos1++; +if (rlepos1 < src_1->n_runs) { +memcpy(dst->runs + dst->n_runs, src_1->runs + rlepos1, +sizeof(rle16_t) * (src_1->n_runs - rlepos1)); +dst->n_runs += src_1->n_runs - rlepos1; +} +} +} - if (src_1->cardinality <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(src_1); - bitset_container_free(src_1); - return false; // not bitset - } else - return true; +ALLOW_UNALIGNED +int run_container_to_uint32_array(void *vout, const run_container_t *cont, +uint32_t base) { +int outpos = 0; +uint32_t *out = (uint32_t *)vout; +for (int i = 0; i < cont->n_runs; ++i) { +uint32_t run_start = base + cont->runs[i].value; +uint16_t le = cont->runs[i].length; +for (int j = 0; j <= le; ++j) { +uint32_t val = run_start + j; +memcpy(out + outpos, &val, +sizeof(uint32_t)); // should be compiled as a MOV on x64 +outpos++; +} +} +return outpos; } -/* Compute the andnot of src_1 and src_2 and write the result to - * dst. Result may be either a bitset or an array container - * (returns "result is bitset"). dst does not initially have - * any container, but becomes either a bitset container (return - * result true) or an array container. +/* + * Print this container using printf (useful for debugging). */ - -bool run_bitset_container_andnot(const run_container_t *src_1, - const bitset_container_t *src_2, void **dst) { - // follows the Java implementation as of June 2016 - int card = run_container_cardinality(src_1); - if (card <= DEFAULT_MAX_SIZE) { - // must be an array - array_container_t *answer = array_container_create_given_capacity(card); - answer->cardinality = 0; - for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { - rle16_t rle = src_1->runs[rlepos]; - for (int run_value = rle.value; run_value <= rle.value + rle.length; - ++run_value) { - if (!bitset_container_get(src_2, (uint16_t)run_value)) { - answer->array[answer->cardinality++] = (uint16_t)run_value; - } - } - } - *dst = answer; - return false; - } else { // we guess it will be a bitset, though have to check guess when - // done - bitset_container_t *answer = bitset_container_clone(src_2); - - uint32_t last_pos = 0; - for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { - rle16_t rle = src_1->runs[rlepos]; - - uint32_t start = rle.value; - uint32_t end = start + rle.length + 1; - bitset_reset_range(answer->array, last_pos, start); - bitset_flip_range(answer->array, start, end); - last_pos = end; - } - bitset_reset_range(answer->array, last_pos, (uint32_t)(1 << 16)); - - answer->cardinality = bitset_container_compute_cardinality(answer); - - if (answer->cardinality <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(answer); - bitset_container_free(answer); - return false; // not bitset - } - *dst = answer; - return true; // bitset - } +void run_container_printf(const run_container_t *cont) { +for (int i = 0; i < cont->n_runs; ++i) { +uint16_t run_start = cont->runs[i].value; +uint16_t le = cont->runs[i].length; +printf("[%d,%d]", run_start, run_start + le); +} } -/* Compute the andnot of src_1 and src_2 and write the result to - * dst. Result may be either a bitset or an array container - * (returns "result is bitset"). dst does not initially have - * any container, but becomes either a bitset container (return - * result true) or an array container. +/* + * Print this container using printf as a comma-separated list of 32-bit + * integers starting at base. */ - -bool run_bitset_container_iandnot(run_container_t *src_1, - const bitset_container_t *src_2, void **dst) { - // dummy implementation - bool ans = run_bitset_container_andnot(src_1, src_2, dst); - run_container_free(src_1); - return ans; +void run_container_printf_as_uint32_array(const run_container_t *cont, +uint32_t base) { +if (cont->n_runs == 0) return; +{ +uint32_t run_start = base + cont->runs[0].value; +uint16_t le = cont->runs[0].length; +printf("%u", run_start); +for (uint32_t j = 1; j <= le; ++j) printf(",%u", run_start + j); +} +for (int32_t i = 1; i < cont->n_runs; ++i) { +uint32_t run_start = base + cont->runs[i].value; +uint16_t le = cont->runs[i].length; +for (uint32_t j = 0; j <= le; ++j) printf(",%u", run_start + j); +} } -/* Compute the andnot of src_1 and src_2 and write the result to - * dst. Result may be either a bitset or an array container - * (returns "result is bitset"). dst does not initially have - * any container, but becomes either a bitset container (return - * result true) or an array container. +/* + * Validate the container. Returns true if valid. */ +bool run_container_validate(const run_container_t *run, const char **reason) { +if (run->n_runs < 0) { +*reason = "negative run count"; +return false; +} +if (run->capacity < 0) { +*reason = "negative run capacity"; +return false; +} +if (run->capacity < run->n_runs) { +*reason = "capacity less than run count"; +return false; +} -bool bitset_run_container_andnot(const bitset_container_t *src_1, - const run_container_t *src_2, void **dst) { - // follows Java implementation - bitset_container_t *result = bitset_container_create(); +if (run->n_runs == 0) { +return true; +} +if (run->runs == NULL) { +*reason = "NULL runs"; +return false; +} - bitset_container_copy(src_1, result); - for (int32_t rlepos = 0; rlepos < src_2->n_runs; ++rlepos) { - rle16_t rle = src_2->runs[rlepos]; - bitset_reset_range(result->array, rle.value, - rle.value + rle.length + UINT32_C(1)); - } - result->cardinality = bitset_container_compute_cardinality(result); +// Use uint32_t to avoid overflow issues on ranges that contain UINT16_MAX. +uint32_t last_end = 0; +for (int i = 0; i < run->n_runs; ++i) { +uint32_t start = run->runs[i].value; +uint32_t end = start + run->runs[i].length + 1; +if (end <= start) { +*reason = "run start + length overflow"; +return false; +} +if (end > (1<<16)) { +*reason = "run start + length too large"; +return false; +} +if (start < last_end) { +*reason = "run start less than last end"; +return false; +} +if (start == last_end && last_end != 0) { +*reason = "run start equal to last end, should have combined"; +return false; +} +last_end = end; +} +return true; +} - if (result->cardinality <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(result); - bitset_container_free(result); - return false; // not bitset - } - *dst = result; - return true; // bitset +int32_t run_container_write(const run_container_t *container, char *buf) { +uint16_t cast_16 = container->n_runs; +memcpy(buf, &cast_16, sizeof(uint16_t)); +memcpy(buf + sizeof(uint16_t), container->runs, +container->n_runs * sizeof(rle16_t)); +return run_container_size_in_bytes(container); } -/* Compute the andnot of src_1 and src_2 and write the result to - * dst (which has no container initially). It will modify src_1 - * to be dst if the result is a bitset. Otherwise, it will - * free src_1 and dst will be a new array container. In both - * cases, the caller is responsible for deallocating dst. - * Returns true iff dst is a bitset */ +int32_t run_container_read(int32_t cardinality, run_container_t *container, +const char *buf) { +(void)cardinality; +uint16_t cast_16; +memcpy(&cast_16, buf, sizeof(uint16_t)); +container->n_runs = cast_16; +if (container->n_runs > container->capacity) +run_container_grow(container, container->n_runs, false); +if(container->n_runs > 0) { +memcpy(container->runs, buf + sizeof(uint16_t), +container->n_runs * sizeof(rle16_t)); +} +return run_container_size_in_bytes(container); +} -bool bitset_run_container_iandnot(bitset_container_t *src_1, - const run_container_t *src_2, void **dst) { - *dst = src_1; +bool run_container_iterate(const run_container_t *cont, uint32_t base, +roaring_iterator iterator, void *ptr) { +for (int i = 0; i < cont->n_runs; ++i) { +uint32_t run_start = base + cont->runs[i].value; +uint16_t le = cont->runs[i].length; - for (int32_t rlepos = 0; rlepos < src_2->n_runs; ++rlepos) { - rle16_t rle = src_2->runs[rlepos]; - bitset_reset_range(src_1->array, rle.value, - rle.value + rle.length + UINT32_C(1)); - } - src_1->cardinality = bitset_container_compute_cardinality(src_1); +for (int j = 0; j <= le; ++j) +if (!iterator(run_start + j, ptr)) return false; +} +return true; +} + +bool run_container_iterate64(const run_container_t *cont, uint32_t base, +roaring_iterator64 iterator, uint64_t high_bits, +void *ptr) { +for (int i = 0; i < cont->n_runs; ++i) { +uint32_t run_start = base + cont->runs[i].value; +uint16_t le = cont->runs[i].length; + +for (int j = 0; j <= le; ++j) +if (!iterator(high_bits | (uint64_t)(run_start + j), ptr)) +return false; +} +return true; +} - if (src_1->cardinality <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(src_1); - bitset_container_free(src_1); - return false; // not bitset - } else - return true; +bool run_container_is_subset(const run_container_t *container1, +const run_container_t *container2) { +int i1 = 0, i2 = 0; +while (i1 < container1->n_runs && i2 < container2->n_runs) { +int start1 = container1->runs[i1].value; +int stop1 = start1 + container1->runs[i1].length; +int start2 = container2->runs[i2].value; +int stop2 = start2 + container2->runs[i2].length; +if (start1 < start2) { +return false; +} else { // start1 >= start2 +if (stop1 < stop2) { +i1++; +} else if (stop1 == stop2) { +i1++; +i2++; +} else { // stop1 > stop2 +i2++; +} +} +} +if (i1 == container1->n_runs) { +return true; +} else { +return false; +} } -/* helper. a_out must be a valid array container with adequate capacity. - * Returns the cardinality of the output container. Partly Based on Java - * implementation Util.unsignedDifference. - * - * TODO: Util.unsignedDifference does not use advanceUntil. Is it cheaper - * to avoid advanceUntil? - */ +// TODO: write smart_append_exclusive version to match the overloaded 1 param +// Java version (or is it even used?) -static int run_array_array_subtract(const run_container_t *r, - const array_container_t *a_in, - array_container_t *a_out) { - int out_card = 0; - int32_t in_array_pos = - -1; // since advanceUntil always assumes we start the search AFTER this - - for (int rlepos = 0; rlepos < r->n_runs; rlepos++) { - int32_t start = r->runs[rlepos].value; - int32_t end = start + r->runs[rlepos].length + 1; - - in_array_pos = advanceUntil(a_in->array, in_array_pos, - a_in->cardinality, (uint16_t)start); - - if (in_array_pos >= a_in->cardinality) { // run has no items subtracted - for (int32_t i = start; i < end; ++i) - a_out->array[out_card++] = (uint16_t)i; - } else { - uint16_t next_nonincluded = a_in->array[in_array_pos]; - if (next_nonincluded >= end) { - // another case when run goes unaltered - for (int32_t i = start; i < end; ++i) - a_out->array[out_card++] = (uint16_t)i; - in_array_pos--; // ensure we see this item again if necessary - } else { - for (int32_t i = start; i < end; ++i) - if (i != next_nonincluded) - a_out->array[out_card++] = (uint16_t)i; - else // 0 should ensure we don't match - next_nonincluded = - (in_array_pos + 1 >= a_in->cardinality) - ? 0 - : a_in->array[++in_array_pos]; - in_array_pos--; // see again - } - } - } - return out_card; +// follows the Java implementation closely +// length is the rle-value. Ie, run [10,12) uses a length value 1. +void run_container_smart_append_exclusive(run_container_t *src, +const uint16_t start, +const uint16_t length) { +int old_end; +rle16_t *last_run = src->n_runs ? src->runs + (src->n_runs - 1) : NULL; +rle16_t *appended_last_run = src->runs + src->n_runs; + +if (!src->n_runs || +(start > (old_end = last_run->value + last_run->length + 1))) { +*appended_last_run = MAKE_RLE16(start, length); +src->n_runs++; +return; +} +if (old_end == start) { +// we merge +last_run->length += (length + 1); +return; +} +int new_end = start + length + 1; + +if (start == last_run->value) { +// wipe out previous +if (new_end < old_end) { +*last_run = MAKE_RLE16(new_end, old_end - new_end - 1); +return; +} else if (new_end > old_end) { +*last_run = MAKE_RLE16(old_end, new_end - old_end - 1); +return; +} else { +src->n_runs--; +return; +} +} +last_run->length = start - last_run->value - 1; +if (new_end < old_end) { +*appended_last_run = MAKE_RLE16(new_end, old_end - new_end - 1); +src->n_runs++; +} else if (new_end > old_end) { +*appended_last_run = MAKE_RLE16(old_end, new_end - old_end - 1); +src->n_runs++; +} } -/* dst does not indicate a valid container initially. Eventually it - * can become any type of container. - */ - -int run_array_container_andnot(const run_container_t *src_1, - const array_container_t *src_2, void **dst) { - // follows the Java impl as of June 2016 - - int card = run_container_cardinality(src_1); - const int arbitrary_threshold = 32; - - if (card <= arbitrary_threshold) { - if (src_2->cardinality == 0) { - *dst = run_container_clone(src_1); - return RUN_CONTAINER_TYPE_CODE; - } - // Java's "lazyandNot.toEfficientContainer" thing - run_container_t *answer = run_container_create_given_capacity( - card + array_container_cardinality(src_2)); - - int rlepos = 0; - int xrlepos = 0; // "x" is src_2 - rle16_t rle = src_1->runs[rlepos]; - int32_t start = rle.value; - int32_t end = start + rle.length + 1; - int32_t xstart = src_2->array[xrlepos]; - - while ((rlepos < src_1->n_runs) && (xrlepos < src_2->cardinality)) { - if (end <= xstart) { - // output the first run - answer->runs[answer->n_runs++] = - (rle16_t){.value = (uint16_t)start, - .length = (uint16_t)(end - start - 1)}; - rlepos++; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - } else if (xstart + 1 <= start) { - // exit the second run - xrlepos++; - if (xrlepos < src_2->cardinality) { - xstart = src_2->array[xrlepos]; - } - } else { - if (start < xstart) { - answer->runs[answer->n_runs++] = - (rle16_t){.value = (uint16_t)start, - .length = (uint16_t)(xstart - start - 1)}; - } - if (xstart + 1 < end) { - start = xstart + 1; - } else { - rlepos++; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - } - } - } - if (rlepos < src_1->n_runs) { - answer->runs[answer->n_runs++] = - (rle16_t){.value = (uint16_t)start, - .length = (uint16_t)(end - start - 1)}; - rlepos++; - if (rlepos < src_1->n_runs) { - memcpy(answer->runs + answer->n_runs, src_1->runs + rlepos, - (src_1->n_runs - rlepos) * sizeof(rle16_t)); - answer->n_runs += (src_1->n_runs - rlepos); - } - } - uint8_t return_type; - *dst = convert_run_to_efficient_container(answer, &return_type); - if (answer != *dst) run_container_free(answer); - return return_type; - } - // else it's a bitmap or array - - if (card <= DEFAULT_MAX_SIZE) { - array_container_t *ac = array_container_create_given_capacity(card); - // nb Java code used a generic iterator-based merge to compute - // difference - ac->cardinality = run_array_array_subtract(src_1, src_2, ac); - *dst = ac; - return ARRAY_CONTAINER_TYPE_CODE; - } - bitset_container_t *ans = bitset_container_from_run(src_1); - bool result_is_bitset = bitset_array_container_iandnot(ans, src_2, dst); - return (result_is_bitset ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE); +bool run_container_select(const run_container_t *container, +uint32_t *start_rank, uint32_t rank, +uint32_t *element) { +for (int i = 0; i < container->n_runs; i++) { +uint16_t length = container->runs[i].length; +if (rank <= *start_rank + length) { +uint16_t value = container->runs[i].value; +*element = value + rank - (*start_rank); +return true; +} else +*start_rank += length + 1; +} +return false; } -/* Compute the andnot of src_1 and src_2 and write the result to - * dst (which has no container initially). It will modify src_1 - * to be dst if the result is a bitset. Otherwise, it will - * free src_1 and dst will be a new array container. In both - * cases, the caller is responsible for deallocating dst. - * Returns true iff dst is a bitset */ +int run_container_rank(const run_container_t *container, uint16_t x) { +int sum = 0; +uint32_t x32 = x; +for (int i = 0; i < container->n_runs; i++) { +uint32_t startpoint = container->runs[i].value; +uint32_t length = container->runs[i].length; +uint32_t endpoint = length + startpoint; +if (x <= endpoint) { +if (x < startpoint) break; +return sum + (x32 - startpoint) + 1; +} else { +sum += length + 1; +} +} +return sum; +} -int run_array_container_iandnot(run_container_t *src_1, - const array_container_t *src_2, void **dst) { - // dummy implementation same as June 2016 Java - int ans = run_array_container_andnot(src_1, src_2, dst); - run_container_free(src_1); - return ans; +int run_container_get_index(const run_container_t *container, uint16_t x) { +if (run_container_contains(container, x)) { +int sum = 0; +uint32_t x32 = x; +for (int i = 0; i < container->n_runs; i++) { +uint32_t startpoint = container->runs[i].value; +uint32_t length = container->runs[i].length; +uint32_t endpoint = length + startpoint; +if (x <= endpoint) { +if (x < startpoint) break; +return sum + (x32 - startpoint); +} else { +sum += length + 1; +} +} +return sum - 1; +} else { +return -1; +} } -/* dst must be a valid array container, allowed to be src_1 */ +#if defined(CROARING_IS_X64) && CROARING_COMPILER_SUPPORTS_AVX512 -void array_run_container_andnot(const array_container_t *src_1, - const run_container_t *src_2, - array_container_t *dst) { - // basically following Java impl as of June 2016 - if (src_1->cardinality > dst->capacity) { - array_container_grow(dst, src_1->cardinality, false); - } +CROARING_TARGET_AVX512 +ALLOW_UNALIGNED +/* Get the cardinality of `run'. Requires an actual computation. */ +static inline int _avx512_run_container_cardinality(const run_container_t *run) { +const int32_t n_runs = run->n_runs; +const rle16_t *runs = run->runs; - if (src_2->n_runs == 0) { - memmove(dst->array, src_1->array, - sizeof(uint16_t) * src_1->cardinality); - dst->cardinality = src_1->cardinality; - return; - } - int32_t run_start = src_2->runs[0].value; - int32_t run_end = run_start + src_2->runs[0].length; - int which_run = 0; - - uint16_t val = 0; - int dest_card = 0; - for (int i = 0; i < src_1->cardinality; ++i) { - val = src_1->array[i]; - if (val < run_start) - dst->array[dest_card++] = val; - else if (val <= run_end) { - ; // omitted item - } else { - do { - if (which_run + 1 < src_2->n_runs) { - ++which_run; - run_start = src_2->runs[which_run].value; - run_end = run_start + src_2->runs[which_run].length; - - } else - run_start = run_end = (1 << 16) + 1; - } while (val > run_end); - --i; - } - } - dst->cardinality = dest_card; +/* by initializing with n_runs, we omit counting the +1 for each pair. */ +int sum = n_runs; +int32_t k = 0; +const int32_t step = sizeof(__m512i) / sizeof(rle16_t); +if (n_runs > step) { +__m512i total = _mm512_setzero_si512(); +for (; k + step <= n_runs; k += step) { +__m512i ymm1 = _mm512_loadu_si512((const __m512i *)(runs + k)); +__m512i justlengths = _mm512_srli_epi32(ymm1, 16); +total = _mm512_add_epi32(total, justlengths); } -/* dst does not indicate a valid container initially. Eventually it - * can become any kind of container. - */ +__m256i lo = _mm512_extracti32x8_epi32(total, 0); +__m256i hi = _mm512_extracti32x8_epi32(total, 1); -void array_run_container_iandnot(array_container_t *src_1, - const run_container_t *src_2) { - array_run_container_andnot(src_1, src_2, src_1); -} +// a store might be faster than extract? +uint32_t buffer[sizeof(__m256i) / sizeof(rle16_t)]; +_mm256_storeu_si256((__m256i *)buffer, lo); +sum += (buffer[0] + buffer[1]) + (buffer[2] + buffer[3]) + +(buffer[4] + buffer[5]) + (buffer[6] + buffer[7]); -/* dst does not indicate a valid container initially. Eventually it - * can become any kind of container. - */ +_mm256_storeu_si256((__m256i *)buffer, hi); +sum += (buffer[0] + buffer[1]) + (buffer[2] + buffer[3]) + +(buffer[4] + buffer[5]) + (buffer[6] + buffer[7]); -int run_run_container_andnot(const run_container_t *src_1, - const run_container_t *src_2, void **dst) { - run_container_t *ans = run_container_create(); - run_container_andnot(src_1, src_2, ans); - uint8_t typecode_after; - *dst = convert_run_to_efficient_container_and_free(ans, &typecode_after); - return typecode_after; +} +for (; k < n_runs; ++k) { +sum += runs[k].length; } -/* Compute the andnot of src_1 and src_2 and write the result to - * dst (which has no container initially). It will modify src_1 - * to be dst if the result is a bitset. Otherwise, it will - * free src_1 and dst will be a new array container. In both - * cases, the caller is responsible for deallocating dst. - * Returns true iff dst is a bitset */ - -int run_run_container_iandnot(run_container_t *src_1, - const run_container_t *src_2, void **dst) { - // following Java impl as of June 2016 (dummy) - int ans = run_run_container_andnot(src_1, src_2, dst); - run_container_free(src_1); - return ans; +return sum; } -/* - * dst is a valid array container and may be the same as src_1 - */ +CROARING_UNTARGET_AVX512 -void array_array_container_andnot(const array_container_t *src_1, - const array_container_t *src_2, - array_container_t *dst) { - array_container_andnot(src_1, src_2, dst); +CROARING_TARGET_AVX2 +ALLOW_UNALIGNED +/* Get the cardinality of `run'. Requires an actual computation. */ +static inline int _avx2_run_container_cardinality(const run_container_t *run) { +const int32_t n_runs = run->n_runs; +const rle16_t *runs = run->runs; + +/* by initializing with n_runs, we omit counting the +1 for each pair. */ +int sum = n_runs; +int32_t k = 0; +const int32_t step = sizeof(__m256i) / sizeof(rle16_t); +if (n_runs > step) { +__m256i total = _mm256_setzero_si256(); +for (; k + step <= n_runs; k += step) { +__m256i ymm1 = _mm256_lddqu_si256((const __m256i *)(runs + k)); +__m256i justlengths = _mm256_srli_epi32(ymm1, 16); +total = _mm256_add_epi32(total, justlengths); +} +// a store might be faster than extract? +uint32_t buffer[sizeof(__m256i) / sizeof(rle16_t)]; +_mm256_storeu_si256((__m256i *)buffer, total); +sum += (buffer[0] + buffer[1]) + (buffer[2] + buffer[3]) + +(buffer[4] + buffer[5]) + (buffer[6] + buffer[7]); +} +for (; k < n_runs; ++k) { +sum += runs[k].length; } -/* inplace array-array andnot will always be able to reuse the space of - * src_1 */ -void array_array_container_iandnot(array_container_t *src_1, - const array_container_t *src_2) { - array_container_andnot(src_1, src_2, src_1); +return sum; } -/* Compute the andnot of src_1 and src_2 and write the result to - * dst (which has no container initially). Return value is - * "dst is a bitset" - */ +CROARING_UNTARGET_AVX2 -bool bitset_bitset_container_andnot(const bitset_container_t *src_1, - const bitset_container_t *src_2, - void **dst) { - bitset_container_t *ans = bitset_container_create(); - int card = bitset_container_andnot(src_1, src_2, ans); - if (card <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(ans); - bitset_container_free(ans); - return false; // not bitset - } else { - *dst = ans; - return true; - } +/* Get the cardinality of `run'. Requires an actual computation. */ +static inline int _scalar_run_container_cardinality(const run_container_t *run) { +const int32_t n_runs = run->n_runs; +const rle16_t *runs = run->runs; + +/* by initializing with n_runs, we omit counting the +1 for each pair. */ +int sum = n_runs; +for (int k = 0; k < n_runs; ++k) { +sum += runs[k].length; } -/* Compute the andnot of src_1 and src_2 and write the result to - * dst (which has no container initially). It will modify src_1 - * to be dst if the result is a bitset. Otherwise, it will - * free src_1 and dst will be a new array container. In both - * cases, the caller is responsible for deallocating dst. - * Returns true iff dst is a bitset */ +return sum; +} -bool bitset_bitset_container_iandnot(bitset_container_t *src_1, - const bitset_container_t *src_2, - void **dst) { - int card = bitset_container_andnot(src_1, src_2, src_1); - if (card <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(src_1); - bitset_container_free(src_1); - return false; // not bitset - } else { - *dst = src_1; - return true; - } +int run_container_cardinality(const run_container_t *run) { +#if CROARING_COMPILER_SUPPORTS_AVX512 +if( croaring_hardware_support() & ROARING_SUPPORTS_AVX512 ) { +return _avx512_run_container_cardinality(run); } -/* end file src/containers/mixed_andnot.c */ -/* begin file src/containers/mixed_equal.c */ +else +#endif +if( croaring_hardware_support() & ROARING_SUPPORTS_AVX2 ) { +return _avx2_run_container_cardinality(run); +} else { +return _scalar_run_container_cardinality(run); +} +} +#else -bool array_container_equal_bitset(const array_container_t* container1, - const bitset_container_t* container2) { - if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) { - if (container2->cardinality != container1->cardinality) { - return false; - } - } - int32_t pos = 0; - for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) { - uint64_t w = container2->array[i]; - while (w != 0) { - uint64_t t = w & (~w + 1); - uint16_t r = i * 64 + __builtin_ctzll(w); - if (pos >= container1->cardinality) { - return false; - } - if (container1->array[pos] != r) { - return false; - } - ++pos; - w ^= t; - } - } - return (pos == container1->cardinality); +/* Get the cardinality of `run'. Requires an actual computation. */ +int run_container_cardinality(const run_container_t *run) { +const int32_t n_runs = run->n_runs; +const rle16_t *runs = run->runs; + +/* by initializing with n_runs, we omit counting the +1 for each pair. */ +int sum = n_runs; +for (int k = 0; k < n_runs; ++k) { +sum += runs[k].length; } -bool run_container_equals_array(const run_container_t* container1, - const array_container_t* container2) { - if (run_container_cardinality(container1) != container2->cardinality) - return false; - int32_t pos = 0; - for (int i = 0; i < container1->n_runs; ++i) { - const uint32_t run_start = container1->runs[i].value; - const uint32_t le = container1->runs[i].length; - - if (container2->array[pos] != run_start) { - return false; - } - - if (container2->array[pos + le] != run_start + le) { - return false; - } - - pos += le + 1; - } - return true; +return sum; } +#endif -bool run_container_equals_bitset(const run_container_t* container1, - const bitset_container_t* container2) { - - int run_card = run_container_cardinality(container1); - int bitset_card = (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) ? - container2->cardinality : - bitset_container_compute_cardinality(container2); - if (bitset_card != run_card) { - return false; - } - for (int32_t i = 0; i < container1->n_runs; i++) { - uint32_t begin = container1->runs[i].value; - if (container1->runs[i].length) { - uint32_t end = begin + container1->runs[i].length + 1; - if (!bitset_container_contains_range(container2, begin, end)) { - return false; - } - } else { - if (!bitset_container_contains(container2, begin)) { - return false; - } - } - } +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace internal { +#endif +/* end file src/containers/run.c */ +/* begin file src/isadetection.c */ + +/* From +https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h +Highly modified. + +Copyright (c) 2016- Facebook, Inc (Adam Paszke) +Copyright (c) 2014- Facebook, Inc (Soumith Chintala) +Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) +Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) +Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) +Copyright (c) 2011-2013 NYU (Clement Farabet) +Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, +Iain Melvin, Jason Weston) Copyright (c) 2006 Idiap Research Institute +(Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, +Samy Bengio, Johnny Mariethoz) + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories +America and IDIAP Research Institute nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ - return true; -} -/* end file src/containers/mixed_equal.c */ -/* begin file src/containers/mixed_intersection.c */ -/* - * mixed_intersection.c - * - */ +#include +#include +#include +// We need portability.h to be included first, see +// https://github.com/RoaringBitmap/CRoaring/issues/394 +#if CROARING_REGULAR_VISUAL_STUDIO +#include +#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID) +#include +#endif // CROARING_REGULAR_VISUAL_STUDIO + +#if CROARING_IS_X64 +#ifndef CROARING_COMPILER_SUPPORTS_AVX512 +#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined." +#endif // CROARING_COMPILER_SUPPORTS_AVX512 +#endif -/* Compute the intersection of src_1 and src_2 and write the result to - * dst. */ -void array_bitset_container_intersection(const array_container_t *src_1, - const bitset_container_t *src_2, - array_container_t *dst) { - if (dst->capacity < src_1->cardinality) { - array_container_grow(dst, src_1->cardinality, false); - } - int32_t newcard = 0; // dst could be src_1 - const int32_t origcard = src_1->cardinality; - for (int i = 0; i < origcard; ++i) { - uint16_t key = src_1->array[i]; - // this branchless approach is much faster... - dst->array[newcard] = key; - newcard += bitset_container_contains(src_2, key); - /** - * we could do it this way instead... - * if (bitset_container_contains(src_2, key)) { - * dst->array[newcard++] = key; - * } - * but if the result is unpredictible, the processor generates - * many mispredicted branches. - * Difference can be huge (from 3 cycles when predictible all the way - * to 16 cycles when unpredictible. - * See - * https://github.com/lemire/Code-used-on-Daniel-Lemire-s-blog/blob/master/extra/bitset/c/arraybitsetintersection.c - */ - } - dst->cardinality = newcard; -} +#ifdef __cplusplus +extern "C" { namespace roaring { namespace internal { +#endif +enum croaring_instruction_set { +CROARING_DEFAULT = 0x0, +CROARING_NEON = 0x1, +CROARING_AVX2 = 0x4, +CROARING_SSE42 = 0x8, +CROARING_PCLMULQDQ = 0x10, +CROARING_BMI1 = 0x20, +CROARING_BMI2 = 0x40, +CROARING_ALTIVEC = 0x80, +CROARING_AVX512F = 0x100, +CROARING_AVX512DQ = 0x200, +CROARING_AVX512BW = 0x400, +CROARING_AVX512VBMI2 = 0x800, +CROARING_AVX512BITALG = 0x1000, +CROARING_AVX512VPOPCNTDQ = 0x2000, +CROARING_UNINITIALIZED = 0x8000 +}; -/* Compute the size of the intersection of src_1 and src_2. */ -int array_bitset_container_intersection_cardinality( - const array_container_t *src_1, const bitset_container_t *src_2) { - int32_t newcard = 0; - const int32_t origcard = src_1->cardinality; - for (int i = 0; i < origcard; ++i) { - uint16_t key = src_1->array[i]; - newcard += bitset_container_contains(src_2, key); - } - return newcard; +#if CROARING_COMPILER_SUPPORTS_AVX512 +unsigned int CROARING_AVX512_REQUIRED = (CROARING_AVX512F | CROARING_AVX512DQ | CROARING_AVX512BW | CROARING_AVX512VBMI2 | CROARING_AVX512BITALG | CROARING_AVX512VPOPCNTDQ); +#endif + +#if defined(__x86_64__) || defined(_M_AMD64) // x64 + + +static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, +uint32_t *edx) { +#if CROARING_REGULAR_VISUAL_STUDIO +int cpu_info[4]; +__cpuidex(cpu_info, *eax, *ecx); +*eax = cpu_info[0]; +*ebx = cpu_info[1]; +*ecx = cpu_info[2]; +*edx = cpu_info[3]; +#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID) +uint32_t level = *eax; +__get_cpuid(level, eax, ebx, ecx, edx); +#else +uint32_t a = *eax, b, c = *ecx, d; +__asm__("cpuid\n\t" : "+a"(a), "=b"(b), "+c"(c), "=d"(d)); +*eax = a; +*ebx = b; +*ecx = c; +*edx = d; +#endif } -bool array_bitset_container_intersect(const array_container_t *src_1, - const bitset_container_t *src_2) { - const int32_t origcard = src_1->cardinality; - for (int i = 0; i < origcard; ++i) { - uint16_t key = src_1->array[i]; - if(bitset_container_contains(src_2, key)) return true; - } - return false; +static inline uint64_t xgetbv(void) { +#if defined(_MSC_VER) +return _xgetbv(0); +#else +uint32_t xcr0_lo, xcr0_hi; +__asm__("xgetbv\n\t" : "=a" (xcr0_lo), "=d" (xcr0_hi) : "c" (0)); +return xcr0_lo | ((uint64_t)xcr0_hi << 32); +#endif } -/* Compute the intersection of src_1 and src_2 and write the result to - * dst. It is allowed for dst to be equal to src_1. We assume that dst is a - * valid container. */ -void array_run_container_intersection(const array_container_t *src_1, - const run_container_t *src_2, - array_container_t *dst) { - if (run_container_is_full(src_2)) { - if (dst != src_1) array_container_copy(src_1, dst); - return; - } - if (dst->capacity < src_1->cardinality) { - array_container_grow(dst, src_1->cardinality, false); - } - if (src_2->n_runs == 0) { - return; - } - int32_t rlepos = 0; - int32_t arraypos = 0; - rle16_t rle = src_2->runs[rlepos]; - int32_t newcard = 0; - while (arraypos < src_1->cardinality) { - const uint16_t arrayval = src_1->array[arraypos]; - while (rle.value + rle.length < - arrayval) { // this will frequently be false - ++rlepos; - if (rlepos == src_2->n_runs) { - dst->cardinality = newcard; - return; // we are done - } - rle = src_2->runs[rlepos]; - } - if (rle.value > arrayval) { - arraypos = advanceUntil(src_1->array, arraypos, src_1->cardinality, - rle.value); - } else { - dst->array[newcard] = arrayval; - newcard++; - arraypos++; - } - } - dst->cardinality = newcard; -} +/** + * This is a relatively expensive function but it will get called at most + * *once* per compilation units. Normally, the CRoaring library is built + * as one compilation unit. + */ +static inline uint32_t dynamic_croaring_detect_supported_architectures(void) { +uint32_t eax, ebx, ecx, edx; +uint32_t host_isa = 0x0; +// Can be found on Intel ISA Reference for CPUID +static uint32_t cpuid_avx2_bit = 1 << 5; ///< @private Bit 5 of EBX for EAX=0x7 +static uint32_t cpuid_bmi1_bit = 1 << 3; ///< @private bit 3 of EBX for EAX=0x7 +static uint32_t cpuid_bmi2_bit = 1 << 8; ///< @private bit 8 of EBX for EAX=0x7 +static uint32_t cpuid_avx512f_bit = 1 << 16; ///< @private bit 16 of EBX for EAX=0x7 +static uint32_t cpuid_avx512dq_bit = 1 << 17; ///< @private bit 17 of EBX for EAX=0x7 +static uint32_t cpuid_avx512bw_bit = 1 << 30; ///< @private bit 30 of EBX for EAX=0x7 +static uint32_t cpuid_avx512vbmi2_bit = 1 << 6; ///< @private bit 6 of ECX for EAX=0x7 +static uint32_t cpuid_avx512bitalg_bit = 1 << 12; ///< @private bit 12 of ECX for EAX=0x7 +static uint32_t cpuid_avx512vpopcntdq_bit = 1 << 14; ///< @private bit 14 of ECX for EAX=0x7 +static uint64_t cpuid_avx256_saved = 1 << 2; ///< @private bit 2 = AVX +static uint64_t cpuid_avx512_saved = 7 << 5; ///< @private bits 5,6,7 = opmask, ZMM_hi256, hi16_ZMM +static uint32_t cpuid_sse42_bit = 1 << 20; ///< @private bit 20 of ECX for EAX=0x1 +static uint32_t cpuid_osxsave = (1 << 26) | (1 << 27); ///< @private bits 26+27 of ECX for EAX=0x1 +static uint32_t cpuid_pclmulqdq_bit = 1 << 1; ///< @private bit 1 of ECX for EAX=0x1 -/* Compute the intersection of src_1 and src_2 and write the result to - * *dst. If the result is true then the result is a bitset_container_t - * otherwise is a array_container_t. If *dst == src_2, an in-place processing - * is attempted.*/ -bool run_bitset_container_intersection(const run_container_t *src_1, - const bitset_container_t *src_2, - void **dst) { - if (run_container_is_full(src_1)) { - if (*dst != src_2) *dst = bitset_container_clone(src_2); - return true; - } - int32_t card = run_container_cardinality(src_1); - if (card <= DEFAULT_MAX_SIZE) { - // result can only be an array (assuming that we never make a - // RunContainer) - if (card > src_2->cardinality) { - card = src_2->cardinality; - } - array_container_t *answer = array_container_create_given_capacity(card); - *dst = answer; - if (*dst == NULL) { - return false; - } - for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { - rle16_t rle = src_1->runs[rlepos]; - uint32_t endofrun = (uint32_t)rle.value + rle.length; - for (uint32_t runValue = rle.value; runValue <= endofrun; - ++runValue) { - answer->array[answer->cardinality] = (uint16_t)runValue; - answer->cardinality += - bitset_container_contains(src_2, runValue); - } - } - return false; - } - if (*dst == src_2) { // we attempt in-place - bitset_container_t *answer = (bitset_container_t *)*dst; - uint32_t start = 0; - for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { - const rle16_t rle = src_1->runs[rlepos]; - uint32_t end = rle.value; - bitset_reset_range(src_2->array, start, end); - - start = end + rle.length + 1; - } - bitset_reset_range(src_2->array, start, UINT32_C(1) << 16); - answer->cardinality = bitset_container_compute_cardinality(answer); - if (src_2->cardinality > DEFAULT_MAX_SIZE) { - return true; - } else { - array_container_t *newanswer = array_container_from_bitset(src_2); - if (newanswer == NULL) { - *dst = NULL; - return false; - } - *dst = newanswer; - return false; - } - } else { // no inplace - // we expect the answer to be a bitmap (if we are lucky) - bitset_container_t *answer = bitset_container_clone(src_2); - - *dst = answer; - if (answer == NULL) { - return true; - } - uint32_t start = 0; - for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { - const rle16_t rle = src_1->runs[rlepos]; - uint32_t end = rle.value; - bitset_reset_range(answer->array, start, end); - start = end + rle.length + 1; - } - bitset_reset_range(answer->array, start, UINT32_C(1) << 16); - answer->cardinality = bitset_container_compute_cardinality(answer); - - if (answer->cardinality > DEFAULT_MAX_SIZE) { - return true; - } else { - array_container_t *newanswer = array_container_from_bitset(answer); - bitset_container_free((bitset_container_t *)*dst); - if (newanswer == NULL) { - *dst = NULL; - return false; - } - *dst = newanswer; - return false; - } - } + +// EBX for EAX=0x1 +eax = 0x1; +ecx = 0x0; +cpuid(&eax, &ebx, &ecx, &edx); + +if (ecx & cpuid_sse42_bit) { +host_isa |= CROARING_SSE42; +} else { +return host_isa; // everything after is redundant } -/* Compute the size of the intersection between src_1 and src_2 . */ -int array_run_container_intersection_cardinality(const array_container_t *src_1, - const run_container_t *src_2) { - if (run_container_is_full(src_2)) { - return src_1->cardinality; - } - if (src_2->n_runs == 0) { - return 0; - } - int32_t rlepos = 0; - int32_t arraypos = 0; - rle16_t rle = src_2->runs[rlepos]; - int32_t newcard = 0; - while (arraypos < src_1->cardinality) { - const uint16_t arrayval = src_1->array[arraypos]; - while (rle.value + rle.length < - arrayval) { // this will frequently be false - ++rlepos; - if (rlepos == src_2->n_runs) { - return newcard; // we are done - } - rle = src_2->runs[rlepos]; - } - if (rle.value > arrayval) { - arraypos = advanceUntil(src_1->array, arraypos, src_1->cardinality, - rle.value); - } else { - newcard++; - arraypos++; - } - } - return newcard; +if (ecx & cpuid_pclmulqdq_bit) { +host_isa |= CROARING_PCLMULQDQ; } -/* Compute the intersection between src_1 and src_2 - **/ -int run_bitset_container_intersection_cardinality( - const run_container_t *src_1, const bitset_container_t *src_2) { - if (run_container_is_full(src_1)) { - return bitset_container_cardinality(src_2); - } - int answer = 0; - for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { - rle16_t rle = src_1->runs[rlepos]; - answer += - bitset_lenrange_cardinality(src_2->array, rle.value, rle.length); - } - return answer; +if ((ecx & cpuid_osxsave) != cpuid_osxsave) { +return host_isa; } +// xgetbv for checking if the OS saves registers +uint64_t xcr0 = xgetbv(); -bool array_run_container_intersect(const array_container_t *src_1, - const run_container_t *src_2) { - if( run_container_is_full(src_2) ) { - return !array_container_empty(src_1); - } - if (src_2->n_runs == 0) { - return false; - } - int32_t rlepos = 0; - int32_t arraypos = 0; - rle16_t rle = src_2->runs[rlepos]; - while (arraypos < src_1->cardinality) { - const uint16_t arrayval = src_1->array[arraypos]; - while (rle.value + rle.length < - arrayval) { // this will frequently be false - ++rlepos; - if (rlepos == src_2->n_runs) { - return false; // we are done - } - rle = src_2->runs[rlepos]; - } - if (rle.value > arrayval) { - arraypos = advanceUntil(src_1->array, arraypos, src_1->cardinality, - rle.value); - } else { - return true; - } - } - return false; +if ((xcr0 & cpuid_avx256_saved) == 0) { +return host_isa; } -/* Compute the intersection between src_1 and src_2 - **/ -bool run_bitset_container_intersect(const run_container_t *src_1, - const bitset_container_t *src_2) { - if( run_container_is_full(src_1) ) { - return !bitset_container_empty(src_2); - } - for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { - rle16_t rle = src_1->runs[rlepos]; - if(!bitset_lenrange_empty(src_2->array, rle.value,rle.length)) return true; - } - return false; +// ECX for EAX=0x7 +eax = 0x7; +ecx = 0x0; +cpuid(&eax, &ebx, &ecx, &edx); +if (ebx & cpuid_avx2_bit) { +host_isa |= CROARING_AVX2; } - -/* - * Compute the intersection between src_1 and src_2 and write the result - * to *dst. If the return function is true, the result is a bitset_container_t - * otherwise is a array_container_t. - */ -bool bitset_bitset_container_intersection(const bitset_container_t *src_1, - const bitset_container_t *src_2, - void **dst) { - const int newCardinality = bitset_container_and_justcard(src_1, src_2); - if (newCardinality > DEFAULT_MAX_SIZE) { - *dst = bitset_container_create(); - if (*dst != NULL) { - bitset_container_and_nocard(src_1, src_2, - (bitset_container_t *)*dst); - ((bitset_container_t *)*dst)->cardinality = newCardinality; - } - return true; // it is a bitset - } - *dst = array_container_create_given_capacity(newCardinality); - if (*dst != NULL) { - ((array_container_t *)*dst)->cardinality = newCardinality; - bitset_extract_intersection_setbits_uint16( - ((const bitset_container_t *)src_1)->array, - ((const bitset_container_t *)src_2)->array, - BITSET_CONTAINER_SIZE_IN_WORDS, ((array_container_t *)*dst)->array, - 0); - } - return false; // not a bitset +if (ebx & cpuid_bmi1_bit) { +host_isa |= CROARING_BMI1; } -bool bitset_bitset_container_intersection_inplace( - bitset_container_t *src_1, const bitset_container_t *src_2, void **dst) { - const int newCardinality = bitset_container_and_justcard(src_1, src_2); - if (newCardinality > DEFAULT_MAX_SIZE) { - *dst = src_1; - bitset_container_and_nocard(src_1, src_2, src_1); - ((bitset_container_t *)*dst)->cardinality = newCardinality; - return true; // it is a bitset - } - *dst = array_container_create_given_capacity(newCardinality); - if (*dst != NULL) { - ((array_container_t *)*dst)->cardinality = newCardinality; - bitset_extract_intersection_setbits_uint16( - ((const bitset_container_t *)src_1)->array, - ((const bitset_container_t *)src_2)->array, - BITSET_CONTAINER_SIZE_IN_WORDS, ((array_container_t *)*dst)->array, - 0); - } - return false; // not a bitset +if (ebx & cpuid_bmi2_bit) { +host_isa |= CROARING_BMI2; } -/* end file src/containers/mixed_intersection.c */ -/* begin file src/containers/mixed_negation.c */ -/* - * mixed_negation.c - * - */ - -#include -#include +if (!((xcr0 & cpuid_avx512_saved) == cpuid_avx512_saved)) { +return host_isa; +} -// TODO: make simplified and optimized negation code across -// the full range. - -/* Negation across the entire range of the container. - * Compute the negation of src and write the result - * to *dst. The complement of a - * sufficiently sparse set will always be dense and a hence a bitmap -' * We assume that dst is pre-allocated and a valid bitset container - * There can be no in-place version. - */ -void array_container_negation(const array_container_t *src, - bitset_container_t *dst) { - uint64_t card = UINT64_C(1 << 16); - bitset_container_set_all(dst); +if (ebx & cpuid_avx512f_bit) { +host_isa |= CROARING_AVX512F; +} - dst->cardinality = (int32_t)bitset_clear_list(dst->array, card, src->array, - (uint64_t)src->cardinality); +if (ebx & cpuid_avx512bw_bit) { +host_isa |= CROARING_AVX512BW; } -/* Negation across the entire range of the container - * Compute the negation of src and write the result - * to *dst. A true return value indicates a bitset result, - * otherwise the result is an array container. - * We assume that dst is not pre-allocated. In - * case of failure, *dst will be NULL. - */ -bool bitset_container_negation(const bitset_container_t *src, void **dst) { - return bitset_container_negation_range(src, 0, (1 << 16), dst); +if (ebx & cpuid_avx512dq_bit) { +host_isa |= CROARING_AVX512DQ; } -/* inplace version */ -/* - * Same as bitset_container_negation except that if the output is to - * be a - * bitset_container_t, then src is modified and no allocation is made. - * If the output is to be an array_container_t, then caller is responsible - * to free the container. - * In all cases, the result is in *dst. - */ -bool bitset_container_negation_inplace(bitset_container_t *src, void **dst) { - return bitset_container_negation_range_inplace(src, 0, (1 << 16), dst); +if (ecx & cpuid_avx512vbmi2_bit) { +host_isa |= CROARING_AVX512VBMI2; } -/* Negation across the entire range of container - * Compute the negation of src and write the result - * to *dst. Return values are the *_TYPECODES as defined * in containers.h - * We assume that dst is not pre-allocated. In - * case of failure, *dst will be NULL. - */ -int run_container_negation(const run_container_t *src, void **dst) { - return run_container_negation_range(src, 0, (1 << 16), dst); +if (ecx & cpuid_avx512bitalg_bit) { +host_isa |= CROARING_AVX512BITALG; } -/* - * Same as run_container_negation except that if the output is to - * be a - * run_container_t, and has the capacity to hold the result, - * then src is modified and no allocation is made. - * In all cases, the result is in *dst. - */ -int run_container_negation_inplace(run_container_t *src, void **dst) { - return run_container_negation_range_inplace(src, 0, (1 << 16), dst); +if (ecx & cpuid_avx512vpopcntdq_bit) { +host_isa |= CROARING_AVX512VPOPCNTDQ; } -/* Negation across a range of the container. - * Compute the negation of src and write the result - * to *dst. Returns true if the result is a bitset container - * and false for an array container. *dst is not preallocated. - */ -bool array_container_negation_range(const array_container_t *src, - const int range_start, const int range_end, - void **dst) { - /* close port of the Java implementation */ - if (range_start >= range_end) { - *dst = array_container_clone(src); - return false; - } +return host_isa; +} - int32_t start_index = - binarySearch(src->array, src->cardinality, (uint16_t)range_start); - if (start_index < 0) start_index = -start_index - 1; - - int32_t last_index = - binarySearch(src->array, src->cardinality, (uint16_t)(range_end - 1)); - if (last_index < 0) last_index = -last_index - 2; - - const int32_t current_values_in_range = last_index - start_index + 1; - const int32_t span_to_be_flipped = range_end - range_start; - const int32_t new_values_in_range = - span_to_be_flipped - current_values_in_range; - const int32_t cardinality_change = - new_values_in_range - current_values_in_range; - const int32_t new_cardinality = src->cardinality + cardinality_change; - - if (new_cardinality > DEFAULT_MAX_SIZE) { - bitset_container_t *temp = bitset_container_from_array(src); - bitset_flip_range(temp->array, (uint32_t)range_start, - (uint32_t)range_end); - temp->cardinality = new_cardinality; - *dst = temp; - return true; - } +#endif // end SIMD extension detection code - array_container_t *arr = - array_container_create_given_capacity(new_cardinality); - *dst = (void *)arr; - if(new_cardinality == 0) { - arr->cardinality = new_cardinality; - return false; // we are done. - } - // copy stuff before the active area - memcpy(arr->array, src->array, start_index * sizeof(uint16_t)); - - // work on the range - int32_t out_pos = start_index, in_pos = start_index; - int32_t val_in_range = range_start; - for (; val_in_range < range_end && in_pos <= last_index; ++val_in_range) { - if ((uint16_t)val_in_range != src->array[in_pos]) { - arr->array[out_pos++] = (uint16_t)val_in_range; - } else { - ++in_pos; - } - } - for (; val_in_range < range_end; ++val_in_range) - arr->array[out_pos++] = (uint16_t)val_in_range; - // content after the active range - memcpy(arr->array + out_pos, src->array + (last_index + 1), - (src->cardinality - (last_index + 1)) * sizeof(uint16_t)); - arr->cardinality = new_cardinality; - return false; +#if defined(__x86_64__) || defined(_M_AMD64) // x64 + +#if CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_CPP +static inline uint32_t croaring_detect_supported_architectures(void) { +// thread-safe as per the C++11 standard. +static uint32_t buffer = dynamic_croaring_detect_supported_architectures(); +return buffer; +} +#elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_C +static uint32_t croaring_detect_supported_architectures(void) { +// we use an atomic for thread safety +static _Atomic uint32_t buffer = CROARING_UNINITIALIZED; +if (buffer == CROARING_UNINITIALIZED) { +// atomicity is sufficient +buffer = dynamic_croaring_detect_supported_architectures(); +} +return buffer; +} +#else +// If we do not have atomics, we do the best we can. +static inline uint32_t croaring_detect_supported_architectures(void) { +static uint32_t buffer = CROARING_UNINITIALIZED; +if (buffer == CROARING_UNINITIALIZED) { +buffer = dynamic_croaring_detect_supported_architectures(); +} +return buffer; } +#endif // CROARING_C_ATOMIC -/* Even when the result would fit, it is unclear how to make an - * inplace version without inefficient copying. - */ +#ifdef ROARING_DISABLE_AVX -bool array_container_negation_range_inplace(array_container_t *src, - const int range_start, - const int range_end, void **dst) { - bool ans = array_container_negation_range(src, range_start, range_end, dst); - // TODO : try a real inplace version - array_container_free(src); - return ans; +int croaring_hardware_support(void) { +return 0; } -/* Negation across a range of the container - * Compute the negation of src and write the result - * to *dst. A true return value indicates a bitset result, - * otherwise the result is an array container. - * We assume that dst is not pre-allocated. In - * case of failure, *dst will be NULL. - */ -bool bitset_container_negation_range(const bitset_container_t *src, - const int range_start, const int range_end, - void **dst) { - // TODO maybe consider density-based estimate - // and sometimes build result directly as array, with - // conversion back to bitset if wrong. Or determine - // actual result cardinality, then go directly for the known final cont. - - // keep computation using bitsets as long as possible. - bitset_container_t *t = bitset_container_clone(src); - bitset_flip_range(t->array, (uint32_t)range_start, (uint32_t)range_end); - t->cardinality = bitset_container_compute_cardinality(t); - - if (t->cardinality > DEFAULT_MAX_SIZE) { - *dst = t; - return true; - } else { - *dst = array_container_from_bitset(t); - bitset_container_free(t); - return false; - } +#elif defined(__AVX512F__) && defined(__AVX512DQ__) && defined(__AVX512BW__) && defined(__AVX512VBMI2__) && defined(__AVX512BITALG__) && defined(__AVX512VPOPCNTDQ__) +int croaring_hardware_support(void) { +return ROARING_SUPPORTS_AVX2 | ROARING_SUPPORTS_AVX512; } +#elif defined(__AVX2__) -/* inplace version */ -/* - * Same as bitset_container_negation except that if the output is to - * be a - * bitset_container_t, then src is modified and no allocation is made. - * If the output is to be an array_container_t, then caller is responsible - * to free the container. - * In all cases, the result is in *dst. - */ -bool bitset_container_negation_range_inplace(bitset_container_t *src, - const int range_start, - const int range_end, void **dst) { - bitset_flip_range(src->array, (uint32_t)range_start, (uint32_t)range_end); - src->cardinality = bitset_container_compute_cardinality(src); - if (src->cardinality > DEFAULT_MAX_SIZE) { - *dst = src; - return true; - } - *dst = array_container_from_bitset(src); - bitset_container_free(src); - return false; +int croaring_hardware_support(void) { +static int support = 0xFFFFFFF; +if(support == 0xFFFFFFF) { +bool avx512_support = false; +#if CROARING_COMPILER_SUPPORTS_AVX512 +avx512_support = ( (croaring_detect_supported_architectures() & CROARING_AVX512_REQUIRED) +== CROARING_AVX512_REQUIRED); +#endif +support = ROARING_SUPPORTS_AVX2 | (avx512_support ? ROARING_SUPPORTS_AVX512 : 0); } +return support; +} +#else -/* Negation across a range of container - * Compute the negation of src and write the result - * to *dst. Return values are the *_TYPECODES as defined * in containers.h - * We assume that dst is not pre-allocated. In - * case of failure, *dst will be NULL. - */ -int run_container_negation_range(const run_container_t *src, - const int range_start, const int range_end, - void **dst) { - uint8_t return_typecode; +int croaring_hardware_support(void) { +static int support = 0xFFFFFFF; +if(support == 0xFFFFFFF) { +bool has_avx2 = (croaring_detect_supported_architectures() & CROARING_AVX2) == CROARING_AVX2; +bool has_avx512 = false; +#if CROARING_COMPILER_SUPPORTS_AVX512 +has_avx512 = (croaring_detect_supported_architectures() & CROARING_AVX512_REQUIRED) == CROARING_AVX512_REQUIRED; +#endif // CROARING_COMPILER_SUPPORTS_AVX512 +support = (has_avx2 ? ROARING_SUPPORTS_AVX2 : 0) | (has_avx512 ? ROARING_SUPPORTS_AVX512 : 0); +} +return support; +} +#endif - // follows the Java implementation - if (range_end <= range_start) { - *dst = run_container_clone(src); - return RUN_CONTAINER_TYPE_CODE; - } +#endif // defined(__x86_64__) || defined(_M_AMD64) // x64 +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace internal { +#endif +/* end file src/isadetection.c */ +/* begin file src/memory.c */ +#include - run_container_t *ans = run_container_create_given_capacity( - src->n_runs + 1); // src->n_runs + 1); - int k = 0; - for (; k < src->n_runs && src->runs[k].value < range_start; ++k) { - ans->runs[k] = src->runs[k]; - ans->n_runs++; - } +// without the following, we get lots of warnings about posix_memalign +#ifndef __cplusplus +extern int posix_memalign(void **__memptr, size_t __alignment, size_t __size); +#endif //__cplusplus // C++ does not have a well defined signature + +// portable version of posix_memalign +static void *roaring_bitmap_aligned_malloc(size_t alignment, size_t size) { +void *p; +#ifdef _MSC_VER +p = _aligned_malloc(size, alignment); +#elif defined(__MINGW32__) || defined(__MINGW64__) +p = __mingw_aligned_malloc(size, alignment); +#else +// somehow, if this is used before including "x86intrin.h", it creates an +// implicit defined warning. +if (posix_memalign(&p, alignment, size) != 0) return NULL; +#endif +return p; +} - run_container_smart_append_exclusive( - ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - 1)); +static void roaring_bitmap_aligned_free(void *memblock) { +#ifdef _MSC_VER +_aligned_free(memblock); +#elif defined(__MINGW32__) || defined(__MINGW64__) +__mingw_aligned_free(memblock); +#else +free(memblock); +#endif +} - for (; k < src->n_runs; ++k) { - run_container_smart_append_exclusive(ans, src->runs[k].value, - src->runs[k].length); - } +static roaring_memory_t global_memory_hook = { +.malloc = malloc, +.realloc = realloc, +.calloc = calloc, +.free = free, +.aligned_malloc = roaring_bitmap_aligned_malloc, +.aligned_free = roaring_bitmap_aligned_free, +}; - *dst = convert_run_to_efficient_container(ans, &return_typecode); - if (return_typecode != RUN_CONTAINER_TYPE_CODE) run_container_free(ans); +void roaring_init_memory_hook(roaring_memory_t memory_hook) { +global_memory_hook = memory_hook; +} - return return_typecode; +void* roaring_malloc(size_t n) { +return global_memory_hook.malloc(n); } -/* - * Same as run_container_negation except that if the output is to - * be a - * run_container_t, and has the capacity to hold the result, - * then src is modified and no allocation is made. - * In all cases, the result is in *dst. - */ -int run_container_negation_range_inplace(run_container_t *src, - const int range_start, - const int range_end, void **dst) { - uint8_t return_typecode; +void* roaring_realloc(void* p, size_t new_sz) { +return global_memory_hook.realloc(p, new_sz); +} - if (range_end <= range_start) { - *dst = src; - return RUN_CONTAINER_TYPE_CODE; - } +void* roaring_calloc(size_t n_elements, size_t element_size) { +return global_memory_hook.calloc(n_elements, element_size); +} - // TODO: efficient special case when range is 0 to 65535 inclusive - - if (src->capacity == src->n_runs) { - // no excess room. More checking to see if result can fit - bool last_val_before_range = false; - bool first_val_in_range = false; - bool last_val_in_range = false; - bool first_val_past_range = false; - - if (range_start > 0) - last_val_before_range = - run_container_contains(src, (uint16_t)(range_start - 1)); - first_val_in_range = run_container_contains(src, (uint16_t)range_start); - - if (last_val_before_range == first_val_in_range) { - last_val_in_range = - run_container_contains(src, (uint16_t)(range_end - 1)); - if (range_end != 0x10000) - first_val_past_range = - run_container_contains(src, (uint16_t)range_end); - - if (last_val_in_range == - first_val_past_range) { // no space for inplace - int ans = run_container_negation_range(src, range_start, - range_end, dst); - run_container_free(src); - return ans; - } - } - } - // all other cases: result will fit +void roaring_free(void* p) { +global_memory_hook.free(p); +} - run_container_t *ans = src; - int my_nbr_runs = src->n_runs; +void* roaring_aligned_malloc(size_t alignment, size_t size) { +return global_memory_hook.aligned_malloc(alignment, size); +} - ans->n_runs = 0; - int k = 0; - for (; (k < my_nbr_runs) && (src->runs[k].value < range_start); ++k) { - // ans->runs[k] = src->runs[k]; (would be self-copy) - ans->n_runs++; - } +void roaring_aligned_free(void* p) { +global_memory_hook.aligned_free(p); +} +/* end file src/memory.c */ +/* begin file src/roaring.c */ +#include +#include +#include +#include +#include +#include - // as with Java implementation, use locals to give self a buffer of depth 1 - rle16_t buffered = (rle16_t){.value = (uint16_t)0, .length = (uint16_t)0}; - rle16_t next = buffered; - if (k < my_nbr_runs) buffered = src->runs[k]; - run_container_smart_append_exclusive( - ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - 1)); - for (; k < my_nbr_runs; ++k) { - if (k + 1 < my_nbr_runs) next = src->runs[k + 1]; +#ifdef __cplusplus +using namespace ::roaring::internal; - run_container_smart_append_exclusive(ans, buffered.value, - buffered.length); - buffered = next; - } +extern "C" { namespace roaring { namespace api { +#endif - *dst = convert_run_to_efficient_container(ans, &return_typecode); - if (return_typecode != RUN_CONTAINER_TYPE_CODE) run_container_free(ans); +#define CROARING_SERIALIZATION_ARRAY_UINT32 1 +#define CROARING_SERIALIZATION_CONTAINER 2 - return return_typecode; +extern inline void roaring_bitmap_init_cleared(roaring_bitmap_t *r); +extern inline bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t* r); +extern inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t* r, bool cow); +extern inline roaring_bitmap_t *roaring_bitmap_create(void); +extern inline void roaring_bitmap_add_range(roaring_bitmap_t *r, uint64_t min, uint64_t max); +extern inline void roaring_bitmap_remove_range(roaring_bitmap_t *r, uint64_t min, uint64_t max); + +static inline bool is_cow(const roaring_bitmap_t *r) { +return r->high_low_container.flags & ROARING_FLAG_COW; +} +static inline bool is_frozen(const roaring_bitmap_t *r) { +return r->high_low_container.flags & ROARING_FLAG_FROZEN; } -/* end file src/containers/mixed_negation.c */ -/* begin file src/containers/mixed_subset.c */ -bool array_container_is_subset_bitset(const array_container_t* container1, - const bitset_container_t* container2) { - if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) { - if (container2->cardinality < container1->cardinality) { - return false; - } - } - for (int i = 0; i < container1->cardinality; ++i) { - if (!bitset_container_contains(container2, container1->array[i])) { - return false; - } - } - return true; +// this is like roaring_bitmap_add, but it populates pointer arguments in such a +// way +// that we can recover the container touched, which, in turn can be used to +// accelerate some functions (when you repeatedly need to add to the same +// container) +static inline container_t *containerptr_roaring_bitmap_add( +roaring_bitmap_t *r, uint32_t val, +uint8_t *type, int *index +){ +roaring_array_t *ra = &r->high_low_container; + +uint16_t hb = val >> 16; +const int i = ra_get_index(ra, hb); +if (i >= 0) { +ra_unshare_container_at_index(ra, i); +container_t *c = ra_get_container_at_index(ra, i, type); +uint8_t new_type = *type; +container_t *c2 = container_add(c, val & 0xFFFF, *type, &new_type); +*index = i; +if (c2 != c) { +container_free(c, *type); +ra_set_container_at_index(ra, i, c2, new_type); +*type = new_type; +return c2; +} else { +return c; +} +} else { +array_container_t *new_ac = array_container_create(); +container_t *c = container_add(new_ac, val & 0xFFFF, +ARRAY_CONTAINER_TYPE, type); +// we could just assume that it stays an array container +ra_insert_new_key_value_at(ra, -i - 1, hb, c, *type); +*index = -i - 1; +return c; +} } -bool run_container_is_subset_array(const run_container_t* container1, - const array_container_t* container2) { - if (run_container_cardinality(container1) > container2->cardinality) - return false; - int32_t start_pos = -1, stop_pos = -1; - for (int i = 0; i < container1->n_runs; ++i) { - int32_t start = container1->runs[i].value; - int32_t stop = start + container1->runs[i].length; - start_pos = advanceUntil(container2->array, stop_pos, - container2->cardinality, start); - stop_pos = advanceUntil(container2->array, stop_pos, - container2->cardinality, stop); - if (start_pos == container2->cardinality) { - return false; - } else if (stop_pos - start_pos != stop - start || - container2->array[start_pos] != start || - container2->array[stop_pos] != stop) { - return false; - } - } - return true; +roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap) { +roaring_bitmap_t *ans = +(roaring_bitmap_t *)roaring_malloc(sizeof(roaring_bitmap_t)); +if (!ans) { +return NULL; +} +bool is_ok = ra_init_with_capacity(&ans->high_low_container, cap); +if (!is_ok) { +roaring_free(ans); +return NULL; +} +return ans; +} + +bool roaring_bitmap_init_with_capacity(roaring_bitmap_t *r, uint32_t cap) { +return ra_init_with_capacity(&r->high_low_container, cap); +} + +static inline void add_bulk_impl(roaring_bitmap_t *r, +roaring_bulk_context_t *context, +uint32_t val) { +uint16_t key = val >> 16; +if (context->container == NULL || context->key != key) { +uint8_t typecode; +int idx; +context->container = containerptr_roaring_bitmap_add( +r, val, &typecode, &idx); +context->typecode = typecode; +context->idx = idx; +context->key = key; +} else { +// no need to seek the container, it is at hand +// because we already have the container at hand, we can do the +// insertion directly, bypassing the roaring_bitmap_add call +uint8_t new_typecode; +container_t *container2 = container_add( +context->container, val & 0xFFFF, context->typecode, &new_typecode); +if (container2 != context->container) { +// rare instance when we need to change the container type +container_free(context->container, context->typecode); +ra_set_container_at_index(&r->high_low_container, context->idx, +container2, new_typecode); +context->typecode = new_typecode; +context->container = container2; +} +} } -bool array_container_is_subset_run(const array_container_t* container1, - const run_container_t* container2) { - if (container1->cardinality > run_container_cardinality(container2)) - return false; - int i_array = 0, i_run = 0; - while (i_array < container1->cardinality && i_run < container2->n_runs) { - uint32_t start = container2->runs[i_run].value; - uint32_t stop = start + container2->runs[i_run].length; - if (container1->array[i_array] < start) { - return false; - } else if (container1->array[i_array] > stop) { - i_run++; - } else { // the value of the array is in the run - i_array++; - } - } - if (i_array == container1->cardinality) { - return true; - } else { - return false; - } +void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args, +const uint32_t *vals) { +uint32_t val; +const uint32_t *start = vals; +const uint32_t *end = vals + n_args; +const uint32_t *current_val = start; + +if (n_args == 0) { +return; } -bool run_container_is_subset_bitset(const run_container_t* container1, - const bitset_container_t* container2) { - // todo: this code could be much faster - if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) { - if (container2->cardinality < run_container_cardinality(container1)) { - return false; - } - } else { - int32_t card = bitset_container_compute_cardinality( - container2); // modify container2? - if (card < run_container_cardinality(container1)) { - return false; - } - } - for (int i = 0; i < container1->n_runs; ++i) { - uint32_t run_start = container1->runs[i].value; - uint32_t le = container1->runs[i].length; - for (uint32_t j = run_start; j <= run_start + le; ++j) { - if (!bitset_container_contains(container2, j)) { - return false; - } - } - } - return true; +uint8_t typecode; +int idx; +container_t *container; +val = *current_val; +container = containerptr_roaring_bitmap_add(r, val, &typecode, &idx); +roaring_bulk_context_t context = {container, idx, (uint16_t)(val >> 16), typecode}; + +for (; current_val != end; current_val++) { +memcpy(&val, current_val, sizeof(val)); +add_bulk_impl(r, &context, val); +} } -bool bitset_container_is_subset_run(const bitset_container_t* container1, - const run_container_t* container2) { - // todo: this code could be much faster - if (container1->cardinality != BITSET_UNKNOWN_CARDINALITY) { - if (container1->cardinality > run_container_cardinality(container2)) { - return false; - } - } - int32_t i_bitset = 0, i_run = 0; - while (i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS && - i_run < container2->n_runs) { - uint64_t w = container1->array[i_bitset]; - while (w != 0 && i_run < container2->n_runs) { - uint32_t start = container2->runs[i_run].value; - uint32_t stop = start + container2->runs[i_run].length; - uint64_t t = w & (~w + 1); - uint16_t r = i_bitset * 64 + __builtin_ctzll(w); - if (r < start) { - return false; - } else if (r > stop) { - i_run++; - continue; - } else { - w ^= t; - } - } - if (w == 0) { - i_bitset++; - } else { - return false; - } - } - if (i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS) { - // terminated iterating on the run containers, check that rest of bitset - // is empty - for (; i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS; i_bitset++) { - if (container1->array[i_bitset] != 0) { - return false; - } - } - } - return true; +void roaring_bitmap_add_bulk(roaring_bitmap_t *r, +roaring_bulk_context_t *context, uint32_t val) { +add_bulk_impl(r, context, val); } -/* end file src/containers/mixed_subset.c */ -/* begin file src/containers/mixed_union.c */ -/* - * mixed_union.c - * - */ -#include -#include +bool roaring_bitmap_contains_bulk(const roaring_bitmap_t *r, +roaring_bulk_context_t *context, +uint32_t val) +{ +uint16_t key = val >> 16; +if (context->container == NULL || context->key != key) { +int32_t start_idx = -1; +if (context->container != NULL && context->key < key) { +start_idx = context->idx; +} +int idx = ra_advance_until(&r->high_low_container, key, start_idx); +if (idx == ra_get_size(&r->high_low_container)) { +return false; +} +uint8_t typecode; +context->container = ra_get_container_at_index(&r->high_low_container, idx, &typecode); +context->typecode = typecode; +context->idx = idx; +context->key = ra_get_key_at_index(&r->high_low_container, idx); +// ra_advance_until finds the next key >= the target, we found a later container. +if (context->key != key) { +return false; +} +} +// context is now set up +return container_contains(context->container, val & 0xFFFF, context->typecode); +} +roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals) { +roaring_bitmap_t *answer = roaring_bitmap_create(); +roaring_bitmap_add_many(answer, n_args, vals); +return answer; +} -/* Compute the union of src_1 and src_2 and write the result to - * dst. */ -void array_bitset_container_union(const array_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst) { - if (src_2 != dst) bitset_container_copy(src_2, dst); - dst->cardinality = (int32_t)bitset_set_list_withcard( - dst->array, dst->cardinality, src_1->array, src_1->cardinality); +roaring_bitmap_t *roaring_bitmap_of(size_t n_args, ...) { +// todo: could be greatly optimized but we do not expect this call to ever +// include long lists +roaring_bitmap_t *answer = roaring_bitmap_create(); +roaring_bulk_context_t context = {0}; +va_list ap; +va_start(ap, n_args); +for (size_t i = 0; i < n_args; i++) { +uint32_t val = va_arg(ap, uint32_t); +roaring_bitmap_add_bulk(answer, &context, val); +} +va_end(ap); +return answer; } -/* Compute the union of src_1 and src_2 and write the result to - * dst. It is allowed for src_2 to be dst. This version does not - * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). */ -void array_bitset_container_lazy_union(const array_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst) { - if (src_2 != dst) bitset_container_copy(src_2, dst); - bitset_set_list(dst->array, src_1->array, src_1->cardinality); - dst->cardinality = BITSET_UNKNOWN_CARDINALITY; +static inline uint32_t minimum_uint32(uint32_t a, uint32_t b) { +return (a < b) ? a : b; } -void run_bitset_container_union(const run_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst) { - assert(!run_container_is_full(src_1)); // catch this case upstream - if (src_2 != dst) bitset_container_copy(src_2, dst); - for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { - rle16_t rle = src_1->runs[rlepos]; - bitset_set_lenrange(dst->array, rle.value, rle.length); - } - dst->cardinality = bitset_container_compute_cardinality(dst); +static inline uint64_t minimum_uint64(uint64_t a, uint64_t b) { +return (a < b) ? a : b; } -void run_bitset_container_lazy_union(const run_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst) { - assert(!run_container_is_full(src_1)); // catch this case upstream - if (src_2 != dst) bitset_container_copy(src_2, dst); - for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { - rle16_t rle = src_1->runs[rlepos]; - bitset_set_lenrange(dst->array, rle.value, rle.length); - } - dst->cardinality = BITSET_UNKNOWN_CARDINALITY; +roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max, +uint32_t step) { +if(max >= UINT64_C(0x100000000)) { +max = UINT64_C(0x100000000); +} +if (step == 0) return NULL; +if (max <= min) return NULL; +roaring_bitmap_t *answer = roaring_bitmap_create(); +if (step >= (1 << 16)) { +for (uint32_t value = (uint32_t)min; value < max; value += step) { +roaring_bitmap_add(answer, value); +} +return answer; +} +uint64_t min_tmp = min; +do { +uint32_t key = (uint32_t)min_tmp >> 16; +uint32_t container_min = min_tmp & 0xFFFF; +uint32_t container_max = (uint32_t)minimum_uint64(max - (key << 16), 1 << 16); +uint8_t type; +container_t *container = container_from_range(&type, container_min, +container_max, (uint16_t)step); +ra_append(&answer->high_low_container, key, container, type); +uint32_t gap = container_max - container_min + step - 1; +min_tmp += gap - (gap % step); +} while (min_tmp < max); +// cardinality of bitmap will be ((uint64_t) max - min + step - 1 ) / step +return answer; +} + +void roaring_bitmap_add_range_closed(roaring_bitmap_t *r, uint32_t min, uint32_t max) { +if (min > max) { +return; } -// why do we leave the result as a run container?? -void array_run_container_union(const array_container_t *src_1, - const run_container_t *src_2, - run_container_t *dst) { - if (run_container_is_full(src_2)) { - run_container_copy(src_2, dst); - return; - } - // TODO: see whether the "2*" is spurious - run_container_grow(dst, 2 * (src_1->cardinality + src_2->n_runs), false); - int32_t rlepos = 0; - int32_t arraypos = 0; - rle16_t previousrle; - if (src_2->runs[rlepos].value <= src_1->array[arraypos]) { - previousrle = run_container_append_first(dst, src_2->runs[rlepos]); - rlepos++; - } else { - previousrle = - run_container_append_value_first(dst, src_1->array[arraypos]); - arraypos++; - } - while ((rlepos < src_2->n_runs) && (arraypos < src_1->cardinality)) { - if (src_2->runs[rlepos].value <= src_1->array[arraypos]) { - run_container_append(dst, src_2->runs[rlepos], &previousrle); - rlepos++; - } else { - run_container_append_value(dst, src_1->array[arraypos], - &previousrle); - arraypos++; - } - } - if (arraypos < src_1->cardinality) { - while (arraypos < src_1->cardinality) { - run_container_append_value(dst, src_1->array[arraypos], - &previousrle); - arraypos++; - } - } else { - while (rlepos < src_2->n_runs) { - run_container_append(dst, src_2->runs[rlepos], &previousrle); - rlepos++; - } - } +roaring_array_t *ra = &r->high_low_container; + +uint32_t min_key = min >> 16; +uint32_t max_key = max >> 16; + +int32_t num_required_containers = max_key - min_key + 1; +int32_t suffix_length = count_greater(ra->keys, ra->size, max_key); +int32_t prefix_length = count_less(ra->keys, ra->size - suffix_length, +min_key); +int32_t common_length = ra->size - prefix_length - suffix_length; + +if (num_required_containers > common_length) { +ra_shift_tail(ra, suffix_length, +num_required_containers - common_length); } -void array_run_container_inplace_union(const array_container_t *src_1, - run_container_t *src_2) { - if (run_container_is_full(src_2)) { - return; - } - const int32_t maxoutput = src_1->cardinality + src_2->n_runs; - const int32_t neededcapacity = maxoutput + src_2->n_runs; - if (src_2->capacity < neededcapacity) - run_container_grow(src_2, neededcapacity, true); - memmove(src_2->runs + maxoutput, src_2->runs, - src_2->n_runs * sizeof(rle16_t)); - rle16_t *inputsrc2 = src_2->runs + maxoutput; - int32_t rlepos = 0; - int32_t arraypos = 0; - int src2nruns = src_2->n_runs; - src_2->n_runs = 0; - - rle16_t previousrle; - - if (inputsrc2[rlepos].value <= src_1->array[arraypos]) { - previousrle = run_container_append_first(src_2, inputsrc2[rlepos]); - rlepos++; - } else { - previousrle = - run_container_append_value_first(src_2, src_1->array[arraypos]); - arraypos++; - } +int32_t src = prefix_length + common_length - 1; +int32_t dst = ra->size - suffix_length - 1; +for (uint32_t key = max_key; key != min_key-1; key--) { // beware of min_key==0 +uint32_t container_min = (min_key == key) ? (min & 0xffff) : 0; +uint32_t container_max = (max_key == key) ? (max & 0xffff) : 0xffff; +container_t* new_container; +uint8_t new_type; - while ((rlepos < src2nruns) && (arraypos < src_1->cardinality)) { - if (inputsrc2[rlepos].value <= src_1->array[arraypos]) { - run_container_append(src_2, inputsrc2[rlepos], &previousrle); - rlepos++; - } else { - run_container_append_value(src_2, src_1->array[arraypos], - &previousrle); - arraypos++; - } - } - if (arraypos < src_1->cardinality) { - while (arraypos < src_1->cardinality) { - run_container_append_value(src_2, src_1->array[arraypos], - &previousrle); - arraypos++; - } - } else { - while (rlepos < src2nruns) { - run_container_append(src_2, inputsrc2[rlepos], &previousrle); - rlepos++; - } - } +if (src >= 0 && ra->keys[src] == key) { +ra_unshare_container_at_index(ra, src); +new_container = container_add_range(ra->containers[src], +ra->typecodes[src], +container_min, container_max, +&new_type); +if (new_container != ra->containers[src]) { +container_free(ra->containers[src], +ra->typecodes[src]); +} +src--; +} else { +new_container = container_from_range(&new_type, container_min, +container_max+1, 1); +} +ra_replace_key_and_container_at_index(ra, dst, key, new_container, +new_type); +dst--; +} } -bool array_array_container_union(const array_container_t *src_1, - const array_container_t *src_2, void **dst) { - int totalCardinality = src_1->cardinality + src_2->cardinality; - if (totalCardinality <= DEFAULT_MAX_SIZE) { - *dst = array_container_create_given_capacity(totalCardinality); - if (*dst != NULL) { - array_container_union(src_1, src_2, (array_container_t *)*dst); - } else { - return true; // otherwise failure won't be caught - } - return false; // not a bitset - } - *dst = bitset_container_create(); - bool returnval = true; // expect a bitset - if (*dst != NULL) { - bitset_container_t *ourbitset = (bitset_container_t *)*dst; - bitset_set_list(ourbitset->array, src_1->array, src_1->cardinality); - ourbitset->cardinality = (int32_t)bitset_set_list_withcard( - ourbitset->array, src_1->cardinality, src_2->array, - src_2->cardinality); - if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) { - // need to convert! - *dst = array_container_from_bitset(ourbitset); - bitset_container_free(ourbitset); - returnval = false; // not going to be a bitset - } - } - return returnval; -} - -bool array_array_container_inplace_union(array_container_t *src_1, - const array_container_t *src_2, void **dst) { - int totalCardinality = src_1->cardinality + src_2->cardinality; - *dst = NULL; - if (totalCardinality <= DEFAULT_MAX_SIZE) { - if(src_1->capacity < totalCardinality) { - *dst = array_container_create_given_capacity(2 * totalCardinality); // be purposefully generous - if (*dst != NULL) { - array_container_union(src_1, src_2, (array_container_t *)*dst); - } else { - return true; // otherwise failure won't be caught - } - return false; // not a bitset - } else { - memmove(src_1->array + src_2->cardinality, src_1->array, src_1->cardinality * sizeof(uint16_t)); - src_1->cardinality = (int32_t)union_uint16(src_1->array + src_2->cardinality, src_1->cardinality, - src_2->array, src_2->cardinality, src_1->array); - return false; // not a bitset - } - } - *dst = bitset_container_create(); - bool returnval = true; // expect a bitset - if (*dst != NULL) { - bitset_container_t *ourbitset = (bitset_container_t *)*dst; - bitset_set_list(ourbitset->array, src_1->array, src_1->cardinality); - ourbitset->cardinality = (int32_t)bitset_set_list_withcard( - ourbitset->array, src_1->cardinality, src_2->array, - src_2->cardinality); - if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) { - // need to convert! - if(src_1->capacity < ourbitset->cardinality) { - array_container_grow(src_1, ourbitset->cardinality, false); - } - - bitset_extract_setbits_uint16(ourbitset->array, BITSET_CONTAINER_SIZE_IN_WORDS, - src_1->array, 0); - src_1->cardinality = ourbitset->cardinality; - *dst = src_1; - bitset_container_free(ourbitset); - returnval = false; // not going to be a bitset - } - } - return returnval; -} - - -bool array_array_container_lazy_union(const array_container_t *src_1, - const array_container_t *src_2, - void **dst) { - int totalCardinality = src_1->cardinality + src_2->cardinality; - if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) { - *dst = array_container_create_given_capacity(totalCardinality); - if (*dst != NULL) { - array_container_union(src_1, src_2, (array_container_t *)*dst); - } else { - return true; // otherwise failure won't be caught - } - return false; // not a bitset - } - *dst = bitset_container_create(); - bool returnval = true; // expect a bitset - if (*dst != NULL) { - bitset_container_t *ourbitset = (bitset_container_t *)*dst; - bitset_set_list(ourbitset->array, src_1->array, src_1->cardinality); - bitset_set_list(ourbitset->array, src_2->array, src_2->cardinality); - ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY; - } - return returnval; -} - - -bool array_array_container_lazy_inplace_union(array_container_t *src_1, - const array_container_t *src_2, - void **dst) { - int totalCardinality = src_1->cardinality + src_2->cardinality; - *dst = NULL; - if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) { - if(src_1->capacity < totalCardinality) { - *dst = array_container_create_given_capacity(2 * totalCardinality); // be purposefully generous - if (*dst != NULL) { - array_container_union(src_1, src_2, (array_container_t *)*dst); - } else { - return true; // otherwise failure won't be caught - } - return false; // not a bitset - } else { - memmove(src_1->array + src_2->cardinality, src_1->array, src_1->cardinality * sizeof(uint16_t)); - src_1->cardinality = (int32_t)union_uint16(src_1->array + src_2->cardinality, src_1->cardinality, - src_2->array, src_2->cardinality, src_1->array); - return false; // not a bitset - } - } - *dst = bitset_container_create(); - bool returnval = true; // expect a bitset - if (*dst != NULL) { - bitset_container_t *ourbitset = (bitset_container_t *)*dst; - bitset_set_list(ourbitset->array, src_1->array, src_1->cardinality); - bitset_set_list(ourbitset->array, src_2->array, src_2->cardinality); - ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY; - } - return returnval; +void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r, uint32_t min, uint32_t max) { +if (min > max) { +return; } -/* end file src/containers/mixed_union.c */ -/* begin file src/containers/mixed_xor.c */ -/* - * mixed_xor.c - */ -#include -#include +roaring_array_t *ra = &r->high_low_container; +uint32_t min_key = min >> 16; +uint32_t max_key = max >> 16; -/* Compute the xor of src_1 and src_2 and write the result to - * dst (which has no container initially). - * Result is true iff dst is a bitset */ -bool array_bitset_container_xor(const array_container_t *src_1, - const bitset_container_t *src_2, void **dst) { - bitset_container_t *result = bitset_container_create(); - bitset_container_copy(src_2, result); - result->cardinality = (int32_t)bitset_flip_list_withcard( - result->array, result->cardinality, src_1->array, src_1->cardinality); - - // do required type conversions. - if (result->cardinality <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(result); - bitset_container_free(result); - return false; // not bitset - } - *dst = result; - return true; // bitset +int32_t src = count_less(ra->keys, ra->size, min_key); +int32_t dst = src; +while (src < ra->size && ra->keys[src] <= max_key) { +uint32_t container_min = (min_key == ra->keys[src]) ? (min & 0xffff) : 0; +uint32_t container_max = (max_key == ra->keys[src]) ? (max & 0xffff) : 0xffff; +ra_unshare_container_at_index(ra, src); +container_t *new_container; +uint8_t new_type; +new_container = container_remove_range(ra->containers[src], +ra->typecodes[src], +container_min, container_max, +&new_type); +if (new_container != ra->containers[src]) { +container_free(ra->containers[src], +ra->typecodes[src]); +} +if (new_container) { +ra_replace_key_and_container_at_index(ra, dst, ra->keys[src], +new_container, new_type); +dst++; +} +src++; +} +if (src > dst) { +ra_shift_tail(ra, ra->size - src, dst - src); +} } -/* Compute the xor of src_1 and src_2 and write the result to - * dst. It is allowed for src_2 to be dst. This version does not - * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). - */ +void roaring_bitmap_printf(const roaring_bitmap_t *r) { +const roaring_array_t *ra = &r->high_low_container; -void array_bitset_container_lazy_xor(const array_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst) { - if (src_2 != dst) bitset_container_copy(src_2, dst); - bitset_flip_list(dst->array, src_1->array, src_1->cardinality); - dst->cardinality = BITSET_UNKNOWN_CARDINALITY; +printf("{"); +for (int i = 0; i < ra->size; ++i) { +container_printf_as_uint32_array(ra->containers[i], ra->typecodes[i], +((uint32_t)ra->keys[i]) << 16); + +if (i + 1 < ra->size) { +printf(","); +} +} +printf("}"); } -/* Compute the xor of src_1 and src_2 and write the result to - * dst. Result may be either a bitset or an array container - * (returns "result is bitset"). dst does not initially have - * any container, but becomes either a bitset container (return - * result true) or an array container. - */ +void roaring_bitmap_printf_describe(const roaring_bitmap_t *r) { +const roaring_array_t *ra = &r->high_low_container; -bool run_bitset_container_xor(const run_container_t *src_1, - const bitset_container_t *src_2, void **dst) { - bitset_container_t *result = bitset_container_create(); +printf("{"); +for (int i = 0; i < ra->size; ++i) { +printf("%d: %s (%d)", ra->keys[i], +get_full_container_name(ra->containers[i], ra->typecodes[i]), +container_get_cardinality(ra->containers[i], ra->typecodes[i])); +if (ra->typecodes[i] == SHARED_CONTAINER_TYPE) { +printf("(shared count = %" PRIu32 " )", +croaring_refcount_get( +&(CAST_shared(ra->containers[i])->counter))); +} - bitset_container_copy(src_2, result); - for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { - rle16_t rle = src_1->runs[rlepos]; - bitset_flip_range(result->array, rle.value, - rle.value + rle.length + UINT32_C(1)); - } - result->cardinality = bitset_container_compute_cardinality(result); +if (i + 1 < ra->size) { +printf(", "); +} +} +printf("}"); +} - if (result->cardinality <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(result); - bitset_container_free(result); - return false; // not bitset - } - *dst = result; - return true; // bitset +typedef struct min_max_sum_s { +uint32_t min; +uint32_t max; +uint64_t sum; +} min_max_sum_t; + +static bool min_max_sum_fnc(uint32_t value, void *param) { +min_max_sum_t *mms = (min_max_sum_t *)param; +if (value > mms->max) mms->max = value; +if (value < mms->min) mms->min = value; +mms->sum += value; +return true; // we always process all data points } -/* lazy xor. Dst is initialized and may be equal to src_2. - * Result is left as a bitset container, even if actual - * cardinality would dictate an array container. +/** +* (For advanced users.) +* Collect statistics about the bitmap +*/ +void roaring_bitmap_statistics(const roaring_bitmap_t *r, +roaring_statistics_t *stat) { +const roaring_array_t *ra = &r->high_low_container; + +memset(stat, 0, sizeof(*stat)); +stat->n_containers = ra->size; +stat->cardinality = roaring_bitmap_get_cardinality(r); +min_max_sum_t mms; +mms.min = UINT32_C(0xFFFFFFFF); +mms.max = UINT32_C(0); +mms.sum = 0; +roaring_iterate(r, &min_max_sum_fnc, &mms); +stat->min_value = mms.min; +stat->max_value = mms.max; +stat->sum_value = mms.sum; + +for (int i = 0; i < ra->size; ++i) { +uint8_t truetype = +get_container_type(ra->containers[i], ra->typecodes[i]); +uint32_t card = +container_get_cardinality(ra->containers[i], ra->typecodes[i]); +uint32_t sbytes = +container_size_in_bytes(ra->containers[i], ra->typecodes[i]); +switch (truetype) { +case BITSET_CONTAINER_TYPE: +stat->n_bitset_containers++; +stat->n_values_bitset_containers += card; +stat->n_bytes_bitset_containers += sbytes; +break; +case ARRAY_CONTAINER_TYPE: +stat->n_array_containers++; +stat->n_values_array_containers += card; +stat->n_bytes_array_containers += sbytes; +break; +case RUN_CONTAINER_TYPE: +stat->n_run_containers++; +stat->n_values_run_containers += card; +stat->n_bytes_run_containers += sbytes; +break; +default: +assert(false); +roaring_unreachable; +} +} +} + +/* + * Checks that: + * - Array containers are sorted and contain no duplicates + * - Range containers are sorted and contain no overlapping ranges + * - Roaring containers are sorted by key and there are no duplicate keys + * - The correct container type is use for each container (e.g. bitmaps aren't used for small containers) */ +bool roaring_bitmap_internal_validate(const roaring_bitmap_t *r, const char **reason) { +const char *reason_local; +if (reason == NULL) { +// Always allow assigning through *reason +reason = &reason_local; +} +*reason = NULL; +const roaring_array_t *ra = &r->high_low_container; +if (ra->size < 0) { +*reason = "negative size"; +return false; +} +if (ra->allocation_size < 0) { +*reason = "negative allocation size"; +return false; +} +if (ra->size > ra->allocation_size) { +*reason = "more containers than allocated space"; +return false; +} +if (ra->flags & ~(ROARING_FLAG_COW | ROARING_FLAG_FROZEN)) { +*reason = "invalid flags"; +return false; +} +if (ra->size == 0) { +return true; +} -void run_bitset_container_lazy_xor(const run_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst) { - if (src_2 != dst) bitset_container_copy(src_2, dst); - for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { - rle16_t rle = src_1->runs[rlepos]; - bitset_flip_range(dst->array, rle.value, - rle.value + rle.length + UINT32_C(1)); - } - dst->cardinality = BITSET_UNKNOWN_CARDINALITY; +if (ra->keys == NULL) { +*reason = "keys is NULL"; +return false; +} +if (ra->typecodes == NULL) { +*reason = "typecodes is NULL"; +return false; +} +if (ra->containers == NULL) { +*reason = "containers is NULL"; +return false; } -/* dst does not indicate a valid container initially. Eventually it - * can become any kind of container. - */ +uint32_t prev_key = ra->keys[0]; +for (int32_t i = 1; i < ra->size; ++i) { +if (ra->keys[i] <= prev_key) { +*reason = "keys not strictly increasing"; +return false; +} +prev_key = ra->keys[i]; +} -int array_run_container_xor(const array_container_t *src_1, - const run_container_t *src_2, void **dst) { - // semi following Java XOR implementation as of May 2016 - // the C OR implementation works quite differently and can return a run - // container - // TODO could optimize for full run containers. - - // use of lazy following Java impl. - const int arbitrary_threshold = 32; - if (src_1->cardinality < arbitrary_threshold) { - run_container_t *ans = run_container_create(); - array_run_container_lazy_xor(src_1, src_2, ans); // keeps runs. - uint8_t typecode_after; - *dst = - convert_run_to_efficient_container_and_free(ans, &typecode_after); - return typecode_after; - } +for (int32_t i = 0; i < ra->size; ++i) { +if (!container_internal_validate(ra->containers[i], ra->typecodes[i], reason)) { +// reason should already be set +if (*reason == NULL) { +*reason = "container failed to validate but no reason given"; +} +return false; +} +} - int card = run_container_cardinality(src_2); - if (card <= DEFAULT_MAX_SIZE) { - // Java implementation works with the array, xoring the run elements via - // iterator - array_container_t *temp = array_container_from_run(src_2); - bool ret_is_bitset = array_array_container_xor(temp, src_1, dst); - array_container_free(temp); - return ret_is_bitset ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - - } else { // guess that it will end up as a bitset - bitset_container_t *result = bitset_container_from_run(src_2); - bool is_bitset = bitset_array_container_ixor(result, src_1, dst); - // any necessary type conversion has been done by the ixor - int retval = (is_bitset ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE); - return retval; - } +return true; } -/* Dst is a valid run container. (Can it be src_2? Let's say not.) - * Leaves result as run container, even if other options are - * smaller. - */ +roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r) { +roaring_bitmap_t *ans = +(roaring_bitmap_t *)roaring_malloc(sizeof(roaring_bitmap_t)); +if (!ans) { +return NULL; +} +if (!ra_init_with_capacity( // allocation of list of containers can fail +&ans->high_low_container, r->high_low_container.size) +){ +roaring_free(ans); +return NULL; +} +if (!ra_overwrite( // memory allocation of individual containers may fail +&r->high_low_container, &ans->high_low_container, is_cow(r)) +){ +roaring_bitmap_free(ans); // overwrite should leave in freeable state +return NULL; +} +roaring_bitmap_set_copy_on_write(ans, is_cow(r)); +return ans; +} + +bool roaring_bitmap_overwrite(roaring_bitmap_t *dest, +const roaring_bitmap_t *src) { +roaring_bitmap_set_copy_on_write(dest, is_cow(src)); +return ra_overwrite(&src->high_low_container, &dest->high_low_container, +is_cow(src)); +} + +void roaring_bitmap_free(const roaring_bitmap_t *r) { +if(r == NULL) { return; } +if (!is_frozen(r)) { +ra_clear((roaring_array_t*)&r->high_low_container); +} +roaring_free((roaring_bitmap_t*)r); +} -void array_run_container_lazy_xor(const array_container_t *src_1, - const run_container_t *src_2, - run_container_t *dst) { - run_container_grow(dst, src_1->cardinality + src_2->n_runs, false); - int32_t rlepos = 0; - int32_t arraypos = 0; - dst->n_runs = 0; - - while ((rlepos < src_2->n_runs) && (arraypos < src_1->cardinality)) { - if (src_2->runs[rlepos].value <= src_1->array[arraypos]) { - run_container_smart_append_exclusive(dst, src_2->runs[rlepos].value, - src_2->runs[rlepos].length); - rlepos++; - } else { - run_container_smart_append_exclusive(dst, src_1->array[arraypos], - 0); - arraypos++; - } - } - while (arraypos < src_1->cardinality) { - run_container_smart_append_exclusive(dst, src_1->array[arraypos], 0); - arraypos++; - } - while (rlepos < src_2->n_runs) { - run_container_smart_append_exclusive(dst, src_2->runs[rlepos].value, - src_2->runs[rlepos].length); - rlepos++; - } +void roaring_bitmap_clear(roaring_bitmap_t *r) { +ra_reset(&r->high_low_container); } -/* dst does not indicate a valid container initially. Eventually it - * can become any kind of container. - */ +void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t val) { +roaring_array_t *ra = &r->high_low_container; + +const uint16_t hb = val >> 16; +const int i = ra_get_index(ra, hb); +uint8_t typecode; +if (i >= 0) { +ra_unshare_container_at_index(ra, i); +container_t *container = +ra_get_container_at_index(ra, i, &typecode); +uint8_t newtypecode = typecode; +container_t *container2 = +container_add(container, val & 0xFFFF, typecode, &newtypecode); +if (container2 != container) { +container_free(container, typecode); +ra_set_container_at_index(&r->high_low_container, i, container2, +newtypecode); +} +} else { +array_container_t *newac = array_container_create(); +container_t *container = container_add(newac, val & 0xFFFF, +ARRAY_CONTAINER_TYPE, &typecode); +// we could just assume that it stays an array container +ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb, +container, typecode); +} +} -int run_run_container_xor(const run_container_t *src_1, - const run_container_t *src_2, void **dst) { - run_container_t *ans = run_container_create(); - run_container_xor(src_1, src_2, ans); - uint8_t typecode_after; - *dst = convert_run_to_efficient_container_and_free(ans, &typecode_after); - return typecode_after; +bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t val) { +const uint16_t hb = val >> 16; +const int i = ra_get_index(&r->high_low_container, hb); +uint8_t typecode; +bool result = false; +if (i >= 0) { +ra_unshare_container_at_index(&r->high_low_container, i); +container_t *container = +ra_get_container_at_index(&r->high_low_container, i, &typecode); + +const int oldCardinality = +container_get_cardinality(container, typecode); + +uint8_t newtypecode = typecode; +container_t *container2 = +container_add(container, val & 0xFFFF, typecode, &newtypecode); +if (container2 != container) { +container_free(container, typecode); +ra_set_container_at_index(&r->high_low_container, i, container2, +newtypecode); +result = true; +} else { +const int newCardinality = +container_get_cardinality(container, newtypecode); + +result = oldCardinality != newCardinality; +} +} else { +array_container_t *newac = array_container_create(); +container_t *container = container_add(newac, val & 0xFFFF, +ARRAY_CONTAINER_TYPE, &typecode); +// we could just assume that it stays an array container +ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb, +container, typecode); +result = true; +} + +return result; } -/* - * Java implementation (as of May 2016) for array_run, run_run - * and bitset_run don't do anything different for inplace. - * Could adopt the mixed_union.c approach instead (ie, using - * smart_append_exclusive) - * - */ +void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t val) { +const uint16_t hb = val >> 16; +const int i = ra_get_index(&r->high_low_container, hb); +uint8_t typecode; +if (i >= 0) { +ra_unshare_container_at_index(&r->high_low_container, i); +container_t *container = +ra_get_container_at_index(&r->high_low_container, i, &typecode); +uint8_t newtypecode = typecode; +container_t *container2 = +container_remove(container, val & 0xFFFF, typecode, &newtypecode); +if (container2 != container) { +container_free(container, typecode); +ra_set_container_at_index(&r->high_low_container, i, container2, +newtypecode); +} +if (container_get_cardinality(container2, newtypecode) != 0) { +ra_set_container_at_index(&r->high_low_container, i, container2, +newtypecode); +} else { +ra_remove_at_index_and_free(&r->high_low_container, i); +} +} +} -bool array_array_container_xor(const array_container_t *src_1, - const array_container_t *src_2, void **dst) { - int totalCardinality = - src_1->cardinality + src_2->cardinality; // upper bound - if (totalCardinality <= DEFAULT_MAX_SIZE) { - *dst = array_container_create_given_capacity(totalCardinality); - array_container_xor(src_1, src_2, (array_container_t *)*dst); - return false; // not a bitset - } - *dst = bitset_container_from_array(src_1); - bool returnval = true; // expect a bitset - bitset_container_t *ourbitset = (bitset_container_t *)*dst; - ourbitset->cardinality = (uint32_t)bitset_flip_list_withcard( - ourbitset->array, src_1->cardinality, src_2->array, src_2->cardinality); - if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) { - // need to convert! - *dst = array_container_from_bitset(ourbitset); - bitset_container_free(ourbitset); - returnval = false; // not going to be a bitset - } +bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t val) { +const uint16_t hb = val >> 16; +const int i = ra_get_index(&r->high_low_container, hb); +uint8_t typecode; +bool result = false; +if (i >= 0) { +ra_unshare_container_at_index(&r->high_low_container, i); +container_t *container = +ra_get_container_at_index(&r->high_low_container, i, &typecode); - return returnval; +const int oldCardinality = +container_get_cardinality(container, typecode); + +uint8_t newtypecode = typecode; +container_t *container2 = +container_remove(container, val & 0xFFFF, typecode, &newtypecode); +if (container2 != container) { +container_free(container, typecode); +ra_set_container_at_index(&r->high_low_container, i, container2, +newtypecode); } -bool array_array_container_lazy_xor(const array_container_t *src_1, - const array_container_t *src_2, - void **dst) { - int totalCardinality = src_1->cardinality + src_2->cardinality; - // upper bound, but probably poor estimate for xor - if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) { - *dst = array_container_create_given_capacity(totalCardinality); - if (*dst != NULL) - array_container_xor(src_1, src_2, (array_container_t *)*dst); - return false; // not a bitset - } - *dst = bitset_container_from_array(src_1); - bool returnval = true; // expect a bitset (maybe, for XOR??) - if (*dst != NULL) { - bitset_container_t *ourbitset = (bitset_container_t *)*dst; - bitset_flip_list(ourbitset->array, src_2->array, src_2->cardinality); - ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY; - } - return returnval; +const int newCardinality = +container_get_cardinality(container2, newtypecode); + +if (newCardinality != 0) { +ra_set_container_at_index(&r->high_low_container, i, container2, +newtypecode); +} else { +ra_remove_at_index_and_free(&r->high_low_container, i); } -/* Compute the xor of src_1 and src_2 and write the result to - * dst (which has no container initially). Return value is - * "dst is a bitset" - */ +result = oldCardinality != newCardinality; +} +return result; +} -bool bitset_bitset_container_xor(const bitset_container_t *src_1, - const bitset_container_t *src_2, void **dst) { - bitset_container_t *ans = bitset_container_create(); - int card = bitset_container_xor(src_1, src_2, ans); - if (card <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(ans); - bitset_container_free(ans); - return false; // not bitset - } else { - *dst = ans; - return true; - } +void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args, +const uint32_t *vals) { +if (n_args == 0 || r->high_low_container.size == 0) { +return; +} +int32_t pos = -1; // position of the container used in the previous iteration +for (size_t i = 0; i < n_args; i++) { +uint16_t key = (uint16_t)(vals[i] >> 16); +if (pos < 0 || key != r->high_low_container.keys[pos]) { +pos = ra_get_index(&r->high_low_container, key); +} +if (pos >= 0) { +uint8_t new_typecode; +container_t *new_container; +new_container = container_remove(r->high_low_container.containers[pos], +vals[i] & 0xffff, +r->high_low_container.typecodes[pos], +&new_typecode); +if (new_container != r->high_low_container.containers[pos]) { +container_free(r->high_low_container.containers[pos], +r->high_low_container.typecodes[pos]); +ra_replace_key_and_container_at_index(&r->high_low_container, +pos, key, new_container, +new_typecode); +} +if (!container_nonzero_cardinality(new_container, new_typecode)) { +container_free(new_container, new_typecode); +ra_remove_at_index(&r->high_low_container, pos); +pos = -1; +} +} +} } -/* Compute the xor of src_1 and src_2 and write the result to - * dst (which has no container initially). It will modify src_1 - * to be dst if the result is a bitset. Otherwise, it will - * free src_1 and dst will be a new array container. In both - * cases, the caller is responsible for deallocating dst. - * Returns true iff dst is a bitset */ +// there should be some SIMD optimizations possible here +roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *x1, +const roaring_bitmap_t *x2) { +uint8_t result_type = 0; +const int length1 = x1->high_low_container.size, +length2 = x2->high_low_container.size; +uint32_t neededcap = length1 > length2 ? length2 : length1; +roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap); +roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); -bool bitset_array_container_ixor(bitset_container_t *src_1, - const array_container_t *src_2, void **dst) { - *dst = src_1; - src_1->cardinality = (uint32_t)bitset_flip_list_withcard( - src_1->array, src_1->cardinality, src_2->array, src_2->cardinality); +int pos1 = 0, pos2 = 0; - if (src_1->cardinality <= DEFAULT_MAX_SIZE) { - *dst = array_container_from_bitset(src_1); - bitset_container_free(src_1); - return false; // not bitset - } else - return true; -} +while (pos1 < length1 && pos2 < length2) { +const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); +const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); -/* a bunch of in-place, some of which may not *really* be inplace. - * TODO: write actual inplace routine if efficiency warrants it - * Anything inplace with a bitset is a good candidate - */ +if (s1 == s2) { +uint8_t type1, type2; +container_t *c1 = ra_get_container_at_index( +&x1->high_low_container, pos1, &type1); +container_t *c2 = ra_get_container_at_index( +&x2->high_low_container, pos2, &type2); +container_t *c = container_and(c1, type1, c2, type2, &result_type); -bool bitset_bitset_container_ixor(bitset_container_t *src_1, - const bitset_container_t *src_2, void **dst) { - bool ans = bitset_bitset_container_xor(src_1, src_2, dst); - bitset_container_free(src_1); - return ans; +if (container_nonzero_cardinality(c, result_type)) { +ra_append(&answer->high_low_container, s1, c, result_type); +} else { +container_free(c, result_type); // otherwise: memory leak! } - -bool array_bitset_container_ixor(array_container_t *src_1, - const bitset_container_t *src_2, void **dst) { - bool ans = array_bitset_container_xor(src_1, src_2, dst); - array_container_free(src_1); - return ans; +++pos1; +++pos2; +} else if (s1 < s2) { // s1 < s2 +pos1 = ra_advance_until(&x1->high_low_container, s2, pos1); +} else { // s1 > s2 +pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); +} +} +return answer; } -/* Compute the xor of src_1 and src_2 and write the result to - * dst. Result may be either a bitset or an array container - * (returns "result is bitset"). dst does not initially have - * any container, but becomes either a bitset container (return - * result true) or an array container. +/** + * Compute the union of 'number' bitmaps. */ - -bool run_bitset_container_ixor(run_container_t *src_1, - const bitset_container_t *src_2, void **dst) { - bool ans = run_bitset_container_xor(src_1, src_2, dst); - run_container_free(src_1); - return ans; +roaring_bitmap_t *roaring_bitmap_or_many(size_t number, +const roaring_bitmap_t **x) { +if (number == 0) { +return roaring_bitmap_create(); } - -bool bitset_run_container_ixor(bitset_container_t *src_1, - const run_container_t *src_2, void **dst) { - bool ans = run_bitset_container_xor(src_2, src_1, dst); - bitset_container_free(src_1); - return ans; +if (number == 1) { +return roaring_bitmap_copy(x[0]); +} +roaring_bitmap_t *answer = +roaring_bitmap_lazy_or(x[0], x[1], LAZY_OR_BITSET_CONVERSION); +for (size_t i = 2; i < number; i++) { +roaring_bitmap_lazy_or_inplace(answer, x[i], LAZY_OR_BITSET_CONVERSION); +} +roaring_bitmap_repair_after_lazy(answer); +return answer; } -/* dst does not indicate a valid container initially. Eventually it - * can become any kind of container. +/** + * Compute the xor of 'number' bitmaps. */ +roaring_bitmap_t *roaring_bitmap_xor_many(size_t number, +const roaring_bitmap_t **x) { +if (number == 0) { +return roaring_bitmap_create(); +} +if (number == 1) { +return roaring_bitmap_copy(x[0]); +} +roaring_bitmap_t *answer = roaring_bitmap_lazy_xor(x[0], x[1]); +for (size_t i = 2; i < number; i++) { +roaring_bitmap_lazy_xor_inplace(answer, x[i]); +} +roaring_bitmap_repair_after_lazy(answer); +return answer; +} -int array_run_container_ixor(array_container_t *src_1, - const run_container_t *src_2, void **dst) { - int ans = array_run_container_xor(src_1, src_2, dst); - array_container_free(src_1); - return ans; +// inplace and (modifies its first argument). +void roaring_bitmap_and_inplace(roaring_bitmap_t *x1, +const roaring_bitmap_t *x2) { +if (x1 == x2) return; +int pos1 = 0, pos2 = 0, intersection_size = 0; +const int length1 = ra_get_size(&x1->high_low_container); +const int length2 = ra_get_size(&x2->high_low_container); + +// any skipped-over or newly emptied containers in x1 +// have to be freed. +while (pos1 < length1 && pos2 < length2) { +const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); +const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + +if (s1 == s2) { +uint8_t type1, type2, result_type; +container_t *c1 = ra_get_container_at_index( +&x1->high_low_container, pos1, &type1); +container_t *c2 = ra_get_container_at_index( +&x2->high_low_container, pos2, &type2); + +// We do the computation "in place" only when c1 is not a shared container. +// Rationale: using a shared container safely with in place computation would +// require making a copy and then doing the computation in place which is likely +// less efficient than avoiding in place entirely and always generating a new +// container. +container_t *c = +(type1 == SHARED_CONTAINER_TYPE) +? container_and(c1, type1, c2, type2, &result_type) +: container_iand(c1, type1, c2, type2, &result_type); + +if (c != c1) { // in this instance a new container was created, and +// we need to free the old one +container_free(c1, type1); +} +if (container_nonzero_cardinality(c, result_type)) { +ra_replace_key_and_container_at_index(&x1->high_low_container, +intersection_size, s1, c, +result_type); +intersection_size++; +} else { +container_free(c, result_type); +} +++pos1; +++pos2; +} else if (s1 < s2) { +pos1 = ra_advance_until_freeing(&x1->high_low_container, s2, pos1); +} else { // s1 > s2 +pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); +} +} + +// if we ended early because x2 ran out, then all remaining in x1 should be +// freed +while (pos1 < length1) { +container_free(x1->high_low_container.containers[pos1], +x1->high_low_container.typecodes[pos1]); +++pos1; +} + +// all containers after this have either been copied or freed +ra_downsize(&x1->high_low_container, intersection_size); } -int run_array_container_ixor(run_container_t *src_1, - const array_container_t *src_2, void **dst) { - int ans = array_run_container_xor(src_2, src_1, dst); - run_container_free(src_1); - return ans; +roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *x1, +const roaring_bitmap_t *x2) { +uint8_t result_type = 0; +const int length1 = x1->high_low_container.size, +length2 = x2->high_low_container.size; +if (0 == length1) { +return roaring_bitmap_copy(x2); +} +if (0 == length2) { +return roaring_bitmap_copy(x1); +} +roaring_bitmap_t *answer = +roaring_bitmap_create_with_capacity(length1 + length2); +roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); +int pos1 = 0, pos2 = 0; +uint8_t type1, type2; +uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); +uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); +while (true) { +if (s1 == s2) { +container_t *c1 = ra_get_container_at_index( +&x1->high_low_container, pos1, &type1); +container_t *c2 = ra_get_container_at_index( +&x2->high_low_container, pos2, &type2); +container_t *c = container_or(c1, type1, c2, type2, &result_type); + +// since we assume that the initial containers are non-empty, the +// result here +// can only be non-empty +ra_append(&answer->high_low_container, s1, c, result_type); +++pos1; +++pos2; +if (pos1 == length1) break; +if (pos2 == length2) break; +s1 = ra_get_key_at_index(&x1->high_low_container, pos1); +s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + +} else if (s1 < s2) { // s1 < s2 +container_t *c1 = ra_get_container_at_index( +&x1->high_low_container, pos1, &type1); +// c1 = container_clone(c1, type1); +c1 = get_copy_of_container(c1, &type1, is_cow(x1)); +if (is_cow(x1)) { +ra_set_container_at_index(&x1->high_low_container, pos1, c1, +type1); +} +ra_append(&answer->high_low_container, s1, c1, type1); +pos1++; +if (pos1 == length1) break; +s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + +} else { // s1 > s2 +container_t *c2 = ra_get_container_at_index( +&x2->high_low_container, pos2, &type2); +// c2 = container_clone(c2, type2); +c2 = get_copy_of_container(c2, &type2, is_cow(x2)); +if (is_cow(x2)) { +ra_set_container_at_index(&x2->high_low_container, pos2, c2, +type2); +} +ra_append(&answer->high_low_container, s2, c2, type2); +pos2++; +if (pos2 == length2) break; +s2 = ra_get_key_at_index(&x2->high_low_container, pos2); +} +} +if (pos1 == length1) { +ra_append_copy_range(&answer->high_low_container, +&x2->high_low_container, pos2, length2, +is_cow(x2)); +} else if (pos2 == length2) { +ra_append_copy_range(&answer->high_low_container, +&x1->high_low_container, pos1, length1, +is_cow(x1)); +} +return answer; } -bool array_array_container_ixor(array_container_t *src_1, - const array_container_t *src_2, void **dst) { - bool ans = array_array_container_xor(src_1, src_2, dst); - array_container_free(src_1); - return ans; +// inplace or (modifies its first argument). +void roaring_bitmap_or_inplace(roaring_bitmap_t *x1, +const roaring_bitmap_t *x2) { +uint8_t result_type = 0; +int length1 = x1->high_low_container.size; +const int length2 = x2->high_low_container.size; + +if (0 == length2) return; + +if (0 == length1) { +roaring_bitmap_overwrite(x1, x2); +return; +} +int pos1 = 0, pos2 = 0; +uint8_t type1, type2; +uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); +uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); +while (true) { +if (s1 == s2) { +container_t *c1 = ra_get_container_at_index( +&x1->high_low_container, pos1, &type1); +if (!container_is_full(c1, type1)) { +container_t *c2 = ra_get_container_at_index( +&x2->high_low_container, pos2, &type2); +container_t *c = +(type1 == SHARED_CONTAINER_TYPE) +? container_or(c1, type1, c2, type2, &result_type) +: container_ior(c1, type1, c2, type2, &result_type); + +if (c != c1) { // in this instance a new container was created, +// and we need to free the old one +container_free(c1, type1); +} +ra_set_container_at_index(&x1->high_low_container, pos1, c, +result_type); +} +++pos1; +++pos2; +if (pos1 == length1) break; +if (pos2 == length2) break; +s1 = ra_get_key_at_index(&x1->high_low_container, pos1); +s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + +} else if (s1 < s2) { // s1 < s2 +pos1++; +if (pos1 == length1) break; +s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + +} else { // s1 > s2 +container_t *c2 = ra_get_container_at_index(&x2->high_low_container, +pos2, &type2); +c2 = get_copy_of_container(c2, &type2, is_cow(x2)); +if (is_cow(x2)) { +ra_set_container_at_index(&x2->high_low_container, pos2, c2, +type2); +} + +// container_t *c2_clone = container_clone(c2, type2); +ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, +type2); +pos1++; +length1++; +pos2++; +if (pos2 == length2) break; +s2 = ra_get_key_at_index(&x2->high_low_container, pos2); +} +} +if (pos1 == length1) { +ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, +pos2, length2, is_cow(x2)); +} } -int run_run_container_ixor(run_container_t *src_1, const run_container_t *src_2, - void **dst) { - int ans = run_run_container_xor(src_1, src_2, dst); - run_container_free(src_1); - return ans; +roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *x1, +const roaring_bitmap_t *x2) { +uint8_t result_type = 0; +const int length1 = x1->high_low_container.size, +length2 = x2->high_low_container.size; +if (0 == length1) { +return roaring_bitmap_copy(x2); +} +if (0 == length2) { +return roaring_bitmap_copy(x1); +} +roaring_bitmap_t *answer = +roaring_bitmap_create_with_capacity(length1 + length2); +roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); +int pos1 = 0, pos2 = 0; +uint8_t type1, type2; +uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); +uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); +while (true) { +if (s1 == s2) { +container_t *c1 = ra_get_container_at_index( +&x1->high_low_container, pos1, &type1); +container_t *c2 = ra_get_container_at_index( +&x2->high_low_container, pos2, &type2); +container_t *c = container_xor(c1, type1, c2, type2, &result_type); + +if (container_nonzero_cardinality(c, result_type)) { +ra_append(&answer->high_low_container, s1, c, result_type); +} else { +container_free(c, result_type); +} +++pos1; +++pos2; +if (pos1 == length1) break; +if (pos2 == length2) break; +s1 = ra_get_key_at_index(&x1->high_low_container, pos1); +s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + +} else if (s1 < s2) { // s1 < s2 +container_t *c1 = ra_get_container_at_index( +&x1->high_low_container, pos1, &type1); +c1 = get_copy_of_container(c1, &type1, is_cow(x1)); +if (is_cow(x1)) { +ra_set_container_at_index(&x1->high_low_container, pos1, c1, +type1); +} +ra_append(&answer->high_low_container, s1, c1, type1); +pos1++; +if (pos1 == length1) break; +s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + +} else { // s1 > s2 +container_t *c2 = ra_get_container_at_index( +&x2->high_low_container, pos2, &type2); +c2 = get_copy_of_container(c2, &type2, is_cow(x2)); +if (is_cow(x2)) { +ra_set_container_at_index(&x2->high_low_container, pos2, c2, +type2); +} +ra_append(&answer->high_low_container, s2, c2, type2); +pos2++; +if (pos2 == length2) break; +s2 = ra_get_key_at_index(&x2->high_low_container, pos2); +} +} +if (pos1 == length1) { +ra_append_copy_range(&answer->high_low_container, +&x2->high_low_container, pos2, length2, +is_cow(x2)); +} else if (pos2 == length2) { +ra_append_copy_range(&answer->high_low_container, +&x1->high_low_container, pos1, length1, +is_cow(x1)); +} +return answer; } -/* end file src/containers/mixed_xor.c */ -/* begin file src/containers/run.c */ -#include -#include +// inplace xor (modifies its first argument). -extern inline uint16_t run_container_minimum(const run_container_t *run); -extern inline uint16_t run_container_maximum(const run_container_t *run); -extern inline int32_t interleavedBinarySearch(const rle16_t *array, - int32_t lenarray, uint16_t ikey); -extern inline bool run_container_contains(const run_container_t *run, - uint16_t pos); -extern inline int run_container_index_equalorlarger(const run_container_t *arr, uint16_t x); -extern inline bool run_container_is_full(const run_container_t *run); -extern inline bool run_container_nonzero_cardinality(const run_container_t *r); -extern inline void run_container_clear(run_container_t *run); -extern inline int32_t run_container_serialized_size_in_bytes(int32_t num_runs); -extern inline run_container_t *run_container_create_range(uint32_t start, - uint32_t stop); +void roaring_bitmap_xor_inplace(roaring_bitmap_t *x1, +const roaring_bitmap_t *x2) { +assert(x1 != x2); +uint8_t result_type = 0; +int length1 = x1->high_low_container.size; +const int length2 = x2->high_low_container.size; -bool run_container_add(run_container_t *run, uint16_t pos) { - int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos); - if (index >= 0) return false; // already there - index = -index - 2; // points to preceding value, possibly -1 - if (index >= 0) { // possible match - int32_t offset = pos - run->runs[index].value; - int32_t le = run->runs[index].length; - if (offset <= le) return false; // already there - if (offset == le + 1) { - // we may need to fuse - if (index + 1 < run->n_runs) { - if (run->runs[index + 1].value == pos + 1) { - // indeed fusion is needed - run->runs[index].length = run->runs[index + 1].value + - run->runs[index + 1].length - - run->runs[index].value; - recoverRoomAtIndex(run, (uint16_t)(index + 1)); - return true; - } - } - run->runs[index].length++; - return true; - } - if (index + 1 < run->n_runs) { - // we may need to fuse - if (run->runs[index + 1].value == pos + 1) { - // indeed fusion is needed - run->runs[index + 1].value = pos; - run->runs[index + 1].length = run->runs[index + 1].length + 1; - return true; - } - } - } - if (index == -1) { - // we may need to extend the first run - if (0 < run->n_runs) { - if (run->runs[0].value == pos + 1) { - run->runs[0].length++; - run->runs[0].value--; - return true; - } - } - } - makeRoomAtIndex(run, (uint16_t)(index + 1)); - run->runs[index + 1].value = pos; - run->runs[index + 1].length = 0; - return true; -} +if (0 == length2) return; -/* Create a new run container. Return NULL in case of failure. */ -run_container_t *run_container_create_given_capacity(int32_t size) { - run_container_t *run; - /* Allocate the run container itself. */ - if ((run = (run_container_t *)malloc(sizeof(run_container_t))) == NULL) { - return NULL; - } - if (size <= 0 ) { // we don't want to rely on malloc(0) - run->runs = NULL; - } else if ((run->runs = (rle16_t *)malloc(sizeof(rle16_t) * size)) == NULL) { - free(run); - return NULL; - } - run->capacity = size; - run->n_runs = 0; - return run; +if (0 == length1) { +roaring_bitmap_overwrite(x1, x2); +return; } -int run_container_shrink_to_fit(run_container_t *src) { - if (src->n_runs == src->capacity) return 0; // nothing to do - int savings = src->capacity - src->n_runs; - src->capacity = src->n_runs; - rle16_t *oldruns = src->runs; - src->runs = (rle16_t *)realloc(oldruns, src->capacity * sizeof(rle16_t)); - if (src->runs == NULL) free(oldruns); // should never happen? - return savings; +// XOR can have new containers inserted from x2, but can also +// lose containers when x1 and x2 are nonempty and identical. + +int pos1 = 0, pos2 = 0; +uint8_t type1, type2; +uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); +uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); +while (true) { +if (s1 == s2) { +container_t *c1 = ra_get_container_at_index( +&x1->high_low_container, pos1, &type1); +container_t *c2 = ra_get_container_at_index( +&x2->high_low_container, pos2, &type2); + +// We do the computation "in place" only when c1 is not a shared container. +// Rationale: using a shared container safely with in place computation would +// require making a copy and then doing the computation in place which is likely +// less efficient than avoiding in place entirely and always generating a new +// container. + +container_t *c; +if (type1 == SHARED_CONTAINER_TYPE) { +c = container_xor(c1, type1, c2, type2, &result_type); +shared_container_free(CAST_shared(c1)); // so release } -/* Create a new run container. Return NULL in case of failure. */ -run_container_t *run_container_create(void) { - return run_container_create_given_capacity(RUN_DEFAULT_INIT_SIZE); +else { +c = container_ixor(c1, type1, c2, type2, &result_type); } -run_container_t *run_container_clone(const run_container_t *src) { - run_container_t *run = run_container_create_given_capacity(src->capacity); - if (run == NULL) return NULL; - run->capacity = src->capacity; - run->n_runs = src->n_runs; - memcpy(run->runs, src->runs, src->n_runs * sizeof(rle16_t)); - return run; +if (container_nonzero_cardinality(c, result_type)) { +ra_set_container_at_index(&x1->high_low_container, pos1, c, +result_type); +++pos1; +} else { +container_free(c, result_type); +ra_remove_at_index(&x1->high_low_container, pos1); +--length1; } -/* Free memory. */ -void run_container_free(run_container_t *run) { - if(run->runs != NULL) {// Jon Strabala reports that some tools complain otherwise - free(run->runs); - run->runs = NULL; // pedantic - } - free(run); +++pos2; +if (pos1 == length1) break; +if (pos2 == length2) break; +s1 = ra_get_key_at_index(&x1->high_low_container, pos1); +s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + +} else if (s1 < s2) { // s1 < s2 +pos1++; +if (pos1 == length1) break; +s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + +} else { // s1 > s2 +container_t *c2 = ra_get_container_at_index( +&x2->high_low_container, pos2, &type2); +c2 = get_copy_of_container(c2, &type2, is_cow(x2)); +if (is_cow(x2)) { +ra_set_container_at_index(&x2->high_low_container, pos2, c2, +type2); } -void run_container_grow(run_container_t *run, int32_t min, bool copy) { - int32_t newCapacity = - (run->capacity == 0) - ? RUN_DEFAULT_INIT_SIZE - : run->capacity < 64 ? run->capacity * 2 - : run->capacity < 1024 ? run->capacity * 3 / 2 - : run->capacity * 5 / 4; - if (newCapacity < min) newCapacity = min; - run->capacity = newCapacity; - assert(run->capacity >= min); - if (copy) { - rle16_t *oldruns = run->runs; - run->runs = - (rle16_t *)realloc(oldruns, run->capacity * sizeof(rle16_t)); - if (run->runs == NULL) free(oldruns); - } else { - // Jon Strabala reports that some tools complain otherwise - if (run->runs != NULL) { - free(run->runs); - } - run->runs = (rle16_t *)malloc(run->capacity * sizeof(rle16_t)); - } - // handle the case where realloc fails - if (run->runs == NULL) { - fprintf(stderr, "could not allocate memory\n"); - } - assert(run->runs != NULL); +ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, +type2); +pos1++; +length1++; +pos2++; +if (pos2 == length2) break; +s2 = ra_get_key_at_index(&x2->high_low_container, pos2); +} +} +if (pos1 == length1) { +ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, +pos2, length2, is_cow(x2)); +} } -/* copy one container into another */ -void run_container_copy(const run_container_t *src, run_container_t *dst) { - const int32_t n_runs = src->n_runs; - if (src->n_runs > dst->capacity) { - run_container_grow(dst, n_runs, false); - } - dst->n_runs = n_runs; - memcpy(dst->runs, src->runs, sizeof(rle16_t) * n_runs); +roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *x1, +const roaring_bitmap_t *x2) { +uint8_t result_type = 0; +const int length1 = x1->high_low_container.size, +length2 = x2->high_low_container.size; +if (0 == length1) { +roaring_bitmap_t *empty_bitmap = roaring_bitmap_create(); +roaring_bitmap_set_copy_on_write(empty_bitmap, is_cow(x1) || is_cow(x2)); +return empty_bitmap; +} +if (0 == length2) { +return roaring_bitmap_copy(x1); +} +roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(length1); +roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); + +int pos1 = 0, pos2 = 0; +uint8_t type1, type2; +uint16_t s1 = 0; +uint16_t s2 = 0; +while (true) { +s1 = ra_get_key_at_index(&x1->high_low_container, pos1); +s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + +if (s1 == s2) { +container_t *c1 = ra_get_container_at_index( +&x1->high_low_container, pos1, &type1); +container_t *c2 = ra_get_container_at_index( +&x2->high_low_container, pos2, &type2); +container_t *c = container_andnot(c1, type1, c2, type2, +&result_type); + +if (container_nonzero_cardinality(c, result_type)) { +ra_append(&answer->high_low_container, s1, c, result_type); +} else { +container_free(c, result_type); +} +++pos1; +++pos2; +if (pos1 == length1) break; +if (pos2 == length2) break; +} else if (s1 < s2) { // s1 < s2 +const int next_pos1 = +ra_advance_until(&x1->high_low_container, s2, pos1); +ra_append_copy_range(&answer->high_low_container, +&x1->high_low_container, pos1, next_pos1, +is_cow(x1)); +// TODO : perhaps some of the copy_on_write should be based on +// answer rather than x1 (more stringent?). Many similar cases +pos1 = next_pos1; +if (pos1 == length1) break; +} else { // s1 > s2 +pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); +if (pos2 == length2) break; +} +} +if (pos2 == length2) { +ra_append_copy_range(&answer->high_low_container, +&x1->high_low_container, pos1, length1, +is_cow(x1)); +} +return answer; } -/* Compute the union of `src_1' and `src_2' and write the result to `dst' - * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ -void run_container_union(const run_container_t *src_1, - const run_container_t *src_2, run_container_t *dst) { - // TODO: this could be a lot more efficient - - // we start out with inexpensive checks - const bool if1 = run_container_is_full(src_1); - const bool if2 = run_container_is_full(src_2); - if (if1 || if2) { - if (if1) { - run_container_copy(src_1, dst); - return; - } - if (if2) { - run_container_copy(src_2, dst); - return; - } - } - const int32_t neededcapacity = src_1->n_runs + src_2->n_runs; - if (dst->capacity < neededcapacity) - run_container_grow(dst, neededcapacity, false); - dst->n_runs = 0; - int32_t rlepos = 0; - int32_t xrlepos = 0; - - rle16_t previousrle; - if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) { - previousrle = run_container_append_first(dst, src_1->runs[rlepos]); - rlepos++; - } else { - previousrle = run_container_append_first(dst, src_2->runs[xrlepos]); - xrlepos++; - } +// inplace andnot (modifies its first argument). - while ((xrlepos < src_2->n_runs) && (rlepos < src_1->n_runs)) { - rle16_t newrl; - if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) { - newrl = src_1->runs[rlepos]; - rlepos++; - } else { - newrl = src_2->runs[xrlepos]; - xrlepos++; - } - run_container_append(dst, newrl, &previousrle); - } - while (xrlepos < src_2->n_runs) { - run_container_append(dst, src_2->runs[xrlepos], &previousrle); - xrlepos++; - } - while (rlepos < src_1->n_runs) { - run_container_append(dst, src_1->runs[rlepos], &previousrle); - rlepos++; - } -} +void roaring_bitmap_andnot_inplace(roaring_bitmap_t *x1, +const roaring_bitmap_t *x2) { +assert(x1 != x2); -/* Compute the union of `src_1' and `src_2' and write the result to `src_1' - */ -void run_container_union_inplace(run_container_t *src_1, - const run_container_t *src_2) { - // TODO: this could be a lot more efficient - - // we start out with inexpensive checks - const bool if1 = run_container_is_full(src_1); - const bool if2 = run_container_is_full(src_2); - if (if1 || if2) { - if (if1) { - return; - } - if (if2) { - run_container_copy(src_2, src_1); - return; - } - } - // we move the data to the end of the current array - const int32_t maxoutput = src_1->n_runs + src_2->n_runs; - const int32_t neededcapacity = maxoutput + src_1->n_runs; - if (src_1->capacity < neededcapacity) - run_container_grow(src_1, neededcapacity, true); - memmove(src_1->runs + maxoutput, src_1->runs, - src_1->n_runs * sizeof(rle16_t)); - rle16_t *inputsrc1 = src_1->runs + maxoutput; - const int32_t input1nruns = src_1->n_runs; - src_1->n_runs = 0; - int32_t rlepos = 0; - int32_t xrlepos = 0; - - rle16_t previousrle; - if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) { - previousrle = run_container_append_first(src_1, inputsrc1[rlepos]); - rlepos++; - } else { - previousrle = run_container_append_first(src_1, src_2->runs[xrlepos]); - xrlepos++; - } - while ((xrlepos < src_2->n_runs) && (rlepos < input1nruns)) { - rle16_t newrl; - if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) { - newrl = inputsrc1[rlepos]; - rlepos++; - } else { - newrl = src_2->runs[xrlepos]; - xrlepos++; - } - run_container_append(src_1, newrl, &previousrle); - } - while (xrlepos < src_2->n_runs) { - run_container_append(src_1, src_2->runs[xrlepos], &previousrle); - xrlepos++; - } - while (rlepos < input1nruns) { - run_container_append(src_1, inputsrc1[rlepos], &previousrle); - rlepos++; - } +uint8_t result_type = 0; +int length1 = x1->high_low_container.size; +const int length2 = x2->high_low_container.size; +int intersection_size = 0; + +if (0 == length2) return; + +if (0 == length1) { +roaring_bitmap_clear(x1); +return; } -/* Compute the symmetric difference of `src_1' and `src_2' and write the result - * to `dst' - * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ -void run_container_xor(const run_container_t *src_1, - const run_container_t *src_2, run_container_t *dst) { - // don't bother to convert xor with full range into negation - // since negation is implemented similarly - - const int32_t neededcapacity = src_1->n_runs + src_2->n_runs; - if (dst->capacity < neededcapacity) - run_container_grow(dst, neededcapacity, false); - - int32_t pos1 = 0; - int32_t pos2 = 0; - dst->n_runs = 0; - - while ((pos1 < src_1->n_runs) && (pos2 < src_2->n_runs)) { - if (src_1->runs[pos1].value <= src_2->runs[pos2].value) { - run_container_smart_append_exclusive(dst, src_1->runs[pos1].value, - src_1->runs[pos1].length); - pos1++; - } else { - run_container_smart_append_exclusive(dst, src_2->runs[pos2].value, - src_2->runs[pos2].length); - pos2++; - } - } - while (pos1 < src_1->n_runs) { - run_container_smart_append_exclusive(dst, src_1->runs[pos1].value, - src_1->runs[pos1].length); - pos1++; - } +int pos1 = 0, pos2 = 0; +uint8_t type1, type2; +uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); +uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); +while (true) { +if (s1 == s2) { +container_t *c1 = ra_get_container_at_index( +&x1->high_low_container, pos1, &type1); +container_t *c2 = ra_get_container_at_index( +&x2->high_low_container, pos2, &type2); - while (pos2 < src_2->n_runs) { - run_container_smart_append_exclusive(dst, src_2->runs[pos2].value, - src_2->runs[pos2].length); - pos2++; - } +// We do the computation "in place" only when c1 is not a shared container. +// Rationale: using a shared container safely with in place computation would +// require making a copy and then doing the computation in place which is likely +// less efficient than avoiding in place entirely and always generating a new +// container. + +container_t *c; +if (type1 == SHARED_CONTAINER_TYPE) { +c = container_andnot(c1, type1, c2, type2, &result_type); +shared_container_free(CAST_shared(c1)); // release +} +else { +c = container_iandnot(c1, type1, c2, type2, &result_type); } -/* Compute the intersection of src_1 and src_2 and write the result to - * dst. It is assumed that dst is distinct from both src_1 and src_2. */ -void run_container_intersection(const run_container_t *src_1, - const run_container_t *src_2, - run_container_t *dst) { - const bool if1 = run_container_is_full(src_1); - const bool if2 = run_container_is_full(src_2); - if (if1 || if2) { - if (if1) { - run_container_copy(src_2, dst); - return; - } - if (if2) { - run_container_copy(src_1, dst); - return; - } - } - // TODO: this could be a lot more efficient, could use SIMD optimizations - const int32_t neededcapacity = src_1->n_runs + src_2->n_runs; - if (dst->capacity < neededcapacity) - run_container_grow(dst, neededcapacity, false); - dst->n_runs = 0; - int32_t rlepos = 0; - int32_t xrlepos = 0; - int32_t start = src_1->runs[rlepos].value; - int32_t end = start + src_1->runs[rlepos].length + 1; - int32_t xstart = src_2->runs[xrlepos].value; - int32_t xend = xstart + src_2->runs[xrlepos].length + 1; - while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) { - if (end <= xstart) { - ++rlepos; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - } else if (xend <= start) { - ++xrlepos; - if (xrlepos < src_2->n_runs) { - xstart = src_2->runs[xrlepos].value; - xend = xstart + src_2->runs[xrlepos].length + 1; - } - } else { // they overlap - const int32_t lateststart = start > xstart ? start : xstart; - int32_t earliestend; - if (end == xend) { // improbable - earliestend = end; - rlepos++; - xrlepos++; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - if (xrlepos < src_2->n_runs) { - xstart = src_2->runs[xrlepos].value; - xend = xstart + src_2->runs[xrlepos].length + 1; - } - } else if (end < xend) { - earliestend = end; - rlepos++; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - - } else { // end > xend - earliestend = xend; - xrlepos++; - if (xrlepos < src_2->n_runs) { - xstart = src_2->runs[xrlepos].value; - xend = xstart + src_2->runs[xrlepos].length + 1; - } - } - dst->runs[dst->n_runs].value = (uint16_t)lateststart; - dst->runs[dst->n_runs].length = - (uint16_t)(earliestend - lateststart - 1); - dst->n_runs++; - } - } +if (container_nonzero_cardinality(c, result_type)) { +ra_replace_key_and_container_at_index(&x1->high_low_container, +intersection_size++, s1, +c, result_type); +} else { +container_free(c, result_type); +} + +++pos1; +++pos2; +if (pos1 == length1) break; +if (pos2 == length2) break; +s1 = ra_get_key_at_index(&x1->high_low_container, pos1); +s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + +} else if (s1 < s2) { // s1 < s2 +if (pos1 != intersection_size) { +container_t *c1 = ra_get_container_at_index( +&x1->high_low_container, pos1, &type1); + +ra_replace_key_and_container_at_index(&x1->high_low_container, +intersection_size, s1, c1, +type1); } +intersection_size++; +pos1++; +if (pos1 == length1) break; +s1 = ra_get_key_at_index(&x1->high_low_container, pos1); -/* Compute the size of the intersection of src_1 and src_2 . */ -int run_container_intersection_cardinality(const run_container_t *src_1, - const run_container_t *src_2) { - const bool if1 = run_container_is_full(src_1); - const bool if2 = run_container_is_full(src_2); - if (if1 || if2) { - if (if1) { - return run_container_cardinality(src_2); - } - if (if2) { - return run_container_cardinality(src_1); - } - } - int answer = 0; - int32_t rlepos = 0; - int32_t xrlepos = 0; - int32_t start = src_1->runs[rlepos].value; - int32_t end = start + src_1->runs[rlepos].length + 1; - int32_t xstart = src_2->runs[xrlepos].value; - int32_t xend = xstart + src_2->runs[xrlepos].length + 1; - while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) { - if (end <= xstart) { - ++rlepos; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - } else if (xend <= start) { - ++xrlepos; - if (xrlepos < src_2->n_runs) { - xstart = src_2->runs[xrlepos].value; - xend = xstart + src_2->runs[xrlepos].length + 1; - } - } else { // they overlap - const int32_t lateststart = start > xstart ? start : xstart; - int32_t earliestend; - if (end == xend) { // improbable - earliestend = end; - rlepos++; - xrlepos++; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - if (xrlepos < src_2->n_runs) { - xstart = src_2->runs[xrlepos].value; - xend = xstart + src_2->runs[xrlepos].length + 1; - } - } else if (end < xend) { - earliestend = end; - rlepos++; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - - } else { // end > xend - earliestend = xend; - xrlepos++; - if (xrlepos < src_2->n_runs) { - xstart = src_2->runs[xrlepos].value; - xend = xstart + src_2->runs[xrlepos].length + 1; - } - } - answer += earliestend - lateststart; - } - } - return answer; +} else { // s1 > s2 +pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); +if (pos2 == length2) break; +s2 = ra_get_key_at_index(&x2->high_low_container, pos2); +} } -bool run_container_intersect(const run_container_t *src_1, - const run_container_t *src_2) { - const bool if1 = run_container_is_full(src_1); - const bool if2 = run_container_is_full(src_2); - if (if1 || if2) { - if (if1) { - return !run_container_empty(src_2); - } - if (if2) { - return !run_container_empty(src_1); - } - } - int32_t rlepos = 0; - int32_t xrlepos = 0; - int32_t start = src_1->runs[rlepos].value; - int32_t end = start + src_1->runs[rlepos].length + 1; - int32_t xstart = src_2->runs[xrlepos].value; - int32_t xend = xstart + src_2->runs[xrlepos].length + 1; - while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) { - if (end <= xstart) { - ++rlepos; - if (rlepos < src_1->n_runs) { - start = src_1->runs[rlepos].value; - end = start + src_1->runs[rlepos].length + 1; - } - } else if (xend <= start) { - ++xrlepos; - if (xrlepos < src_2->n_runs) { - xstart = src_2->runs[xrlepos].value; - xend = xstart + src_2->runs[xrlepos].length + 1; - } - } else { // they overlap - return true; - } - } - return false; +if (pos1 < length1) { +// all containers between intersection_size and +// pos1 are junk. However, they have either been moved +// (thus still referenced) or involved in an iandnot +// that will clean up all containers that could not be reused. +// Thus we should not free the junk containers between +// intersection_size and pos1. +if (pos1 > intersection_size) { +// left slide of remaining items +ra_copy_range(&x1->high_low_container, pos1, length1, +intersection_size); +} +// else current placement is fine +intersection_size += (length1 - pos1); +} +ra_downsize(&x1->high_low_container, intersection_size); } +uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *r) { +const roaring_array_t *ra = &r->high_low_container; -/* Compute the difference of src_1 and src_2 and write the result to - * dst. It is assumed that dst is distinct from both src_1 and src_2. */ -void run_container_andnot(const run_container_t *src_1, - const run_container_t *src_2, run_container_t *dst) { - // following Java implementation as of June 2016 - - if (dst->capacity < src_1->n_runs + src_2->n_runs) - run_container_grow(dst, src_1->n_runs + src_2->n_runs, false); - - dst->n_runs = 0; - - int rlepos1 = 0; - int rlepos2 = 0; - int32_t start = src_1->runs[rlepos1].value; - int32_t end = start + src_1->runs[rlepos1].length + 1; - int32_t start2 = src_2->runs[rlepos2].value; - int32_t end2 = start2 + src_2->runs[rlepos2].length + 1; - - while ((rlepos1 < src_1->n_runs) && (rlepos2 < src_2->n_runs)) { - if (end <= start2) { - // output the first run - dst->runs[dst->n_runs++] = - (rle16_t){.value = (uint16_t)start, - .length = (uint16_t)(end - start - 1)}; - rlepos1++; - if (rlepos1 < src_1->n_runs) { - start = src_1->runs[rlepos1].value; - end = start + src_1->runs[rlepos1].length + 1; - } - } else if (end2 <= start) { - // exit the second run - rlepos2++; - if (rlepos2 < src_2->n_runs) { - start2 = src_2->runs[rlepos2].value; - end2 = start2 + src_2->runs[rlepos2].length + 1; - } - } else { - if (start < start2) { - dst->runs[dst->n_runs++] = - (rle16_t){.value = (uint16_t)start, - .length = (uint16_t)(start2 - start - 1)}; - } - if (end2 < end) { - start = end2; - } else { - rlepos1++; - if (rlepos1 < src_1->n_runs) { - start = src_1->runs[rlepos1].value; - end = start + src_1->runs[rlepos1].length + 1; - } - } - } - } - if (rlepos1 < src_1->n_runs) { - dst->runs[dst->n_runs++] = (rle16_t){ - .value = (uint16_t)start, .length = (uint16_t)(end - start - 1)}; - rlepos1++; - if (rlepos1 < src_1->n_runs) { - memcpy(dst->runs + dst->n_runs, src_1->runs + rlepos1, - sizeof(rle16_t) * (src_1->n_runs - rlepos1)); - dst->n_runs += src_1->n_runs - rlepos1; - } - } +uint64_t card = 0; +for (int i = 0; i < ra->size; ++i) +card += container_get_cardinality(ra->containers[i], ra->typecodes[i]); +return card; } -int run_container_to_uint32_array(void *vout, const run_container_t *cont, - uint32_t base) { - int outpos = 0; - uint32_t *out = (uint32_t *)vout; - for (int i = 0; i < cont->n_runs; ++i) { - uint32_t run_start = base + cont->runs[i].value; - uint16_t le = cont->runs[i].length; - for (int j = 0; j <= le; ++j) { - uint32_t val = run_start + j; - memcpy(out + outpos, &val, - sizeof(uint32_t)); // should be compiled as a MOV on x64 - outpos++; - } - } - return outpos; -} +uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *r, +uint64_t range_start, +uint64_t range_end) { +const roaring_array_t *ra = &r->high_low_container; -/* - * Print this container using printf (useful for debugging). - */ -void run_container_printf(const run_container_t *cont) { - for (int i = 0; i < cont->n_runs; ++i) { - uint16_t run_start = cont->runs[i].value; - uint16_t le = cont->runs[i].length; - printf("[%d,%d]", run_start, run_start + le); - } +if (range_end > UINT32_MAX) { +range_end = UINT32_MAX + UINT64_C(1); } - -/* - * Print this container using printf as a comma-separated list of 32-bit - * integers starting at base. - */ -void run_container_printf_as_uint32_array(const run_container_t *cont, - uint32_t base) { - if (cont->n_runs == 0) return; - { - uint32_t run_start = base + cont->runs[0].value; - uint16_t le = cont->runs[0].length; - printf("%u", run_start); - for (uint32_t j = 1; j <= le; ++j) printf(",%u", run_start + j); - } - for (int32_t i = 1; i < cont->n_runs; ++i) { - uint32_t run_start = base + cont->runs[i].value; - uint16_t le = cont->runs[i].length; - for (uint32_t j = 0; j <= le; ++j) printf(",%u", run_start + j); - } +if (range_start >= range_end) { +return 0; } +range_end--; // make range_end inclusive +// now we have: 0 <= range_start <= range_end <= UINT32_MAX -int32_t run_container_serialize(const run_container_t *container, char *buf) { - int32_t l, off; +uint16_t minhb = range_start >> 16; +uint16_t maxhb = range_end >> 16; - memcpy(buf, &container->n_runs, off = sizeof(container->n_runs)); - memcpy(&buf[off], &container->capacity, sizeof(container->capacity)); - off += sizeof(container->capacity); +uint64_t card = 0; - l = sizeof(rle16_t) * container->n_runs; - memcpy(&buf[off], container->runs, l); - return (off + l); +int i = ra_get_index(ra, minhb); +if (i >= 0) { +if (minhb == maxhb) { +card += container_rank(ra->containers[i], ra->typecodes[i], +range_end & 0xffff); +} else { +card += container_get_cardinality(ra->containers[i], +ra->typecodes[i]); } - -int32_t run_container_write(const run_container_t *container, char *buf) { - memcpy(buf, &container->n_runs, sizeof(uint16_t)); - memcpy(buf + sizeof(uint16_t), container->runs, - container->n_runs * sizeof(rle16_t)); - return run_container_size_in_bytes(container); +if ((range_start & 0xffff) != 0) { +card -= container_rank(ra->containers[i], ra->typecodes[i], +(range_start & 0xffff) - 1); } - -int32_t run_container_read(int32_t cardinality, run_container_t *container, - const char *buf) { - (void)cardinality; - memcpy(&container->n_runs, buf, sizeof(uint16_t)); - if (container->n_runs > container->capacity) - run_container_grow(container, container->n_runs, false); - if(container->n_runs > 0) { - memcpy(container->runs, buf + sizeof(uint16_t), - container->n_runs * sizeof(rle16_t)); - } - return run_container_size_in_bytes(container); +i++; +} else { +i = -i - 1; } -uint32_t run_container_serialization_len(const run_container_t *container) { - return (sizeof(container->n_runs) + sizeof(container->capacity) + - sizeof(rle16_t) * container->n_runs); +for (; i < ra->size; i++) { +uint16_t key = ra->keys[i]; +if (key < maxhb) { +card += container_get_cardinality(ra->containers[i], +ra->typecodes[i]); +} else if (key == maxhb) { +card += container_rank(ra->containers[i], ra->typecodes[i], +range_end & 0xffff); +break; +} else { +break; +} } -void *run_container_deserialize(const char *buf, size_t buf_len) { - run_container_t *ptr; - - if (buf_len < 8 /* n_runs + capacity */) - return (NULL); - else - buf_len -= 8; +return card; +} - if ((ptr = (run_container_t *)malloc(sizeof(run_container_t))) != NULL) { - size_t len; - int32_t off; - memcpy(&ptr->n_runs, buf, off = 4); - memcpy(&ptr->capacity, &buf[off], 4); - off += 4; +bool roaring_bitmap_is_empty(const roaring_bitmap_t *r) { +return r->high_low_container.size == 0; +} - len = sizeof(rle16_t) * ptr->n_runs; +void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans) { +ra_to_uint32_array(&r->high_low_container, ans); +} - if (len != buf_len) { - free(ptr); - return (NULL); - } +bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *r, +size_t offset, size_t limit, +uint32_t *ans) { +return ra_range_uint32_array(&r->high_low_container, offset, limit, ans); +} - if ((ptr->runs = (rle16_t *)malloc(len)) == NULL) { - free(ptr); - return (NULL); - } +/** convert array and bitmap containers to run containers when it is more + * efficient; + * also convert from run containers when more space efficient. Returns + * true if the result has at least one run container. +*/ +bool roaring_bitmap_run_optimize(roaring_bitmap_t *r) { +bool answer = false; +for (int i = 0; i < r->high_low_container.size; i++) { +uint8_t type_original, type_after; +ra_unshare_container_at_index( +&r->high_low_container, i); // TODO: this introduces extra cloning! +container_t *c = ra_get_container_at_index(&r->high_low_container, i, +&type_original); +container_t *c1 = convert_run_optimize(c, type_original, &type_after); +if (type_after == RUN_CONTAINER_TYPE) { +answer = true; +} +ra_set_container_at_index(&r->high_low_container, i, c1, type_after); +} +return answer; +} - memcpy(ptr->runs, &buf[off], len); +size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r) { +size_t answer = 0; +for (int i = 0; i < r->high_low_container.size; i++) { +uint8_t type_original; +container_t *c = ra_get_container_at_index(&r->high_low_container, i, +&type_original); +answer += container_shrink_to_fit(c, type_original); +} +answer += ra_shrink_to_fit(&r->high_low_container); +return answer; +} - /* Check if returned values are monotonically increasing */ - for (int32_t i = 0, j = 0; i < ptr->n_runs; i++) { - if (ptr->runs[i].value < j) { - free(ptr->runs); - free(ptr); - return (NULL); - } else - j = ptr->runs[i].value; - } - } +/** + * Remove run-length encoding even when it is more space efficient + * return whether a change was applied + */ +bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r) { +bool answer = false; +for (int i = 0; i < r->high_low_container.size; i++) { +uint8_t type_original, type_after; +container_t *c = ra_get_container_at_index(&r->high_low_container, i, +&type_original); +if (get_container_type(c, type_original) == RUN_CONTAINER_TYPE) { +answer = true; +if (type_original == SHARED_CONTAINER_TYPE) { +run_container_t *truec = CAST_run(CAST_shared(c)->container); +int32_t card = run_container_cardinality(truec); +container_t *c1 = convert_to_bitset_or_array_container( +truec, card, &type_after); +shared_container_free(CAST_shared(c)); // frees run as needed +ra_set_container_at_index(&r->high_low_container, i, c1, +type_after); - return (ptr); +} else { +int32_t card = run_container_cardinality(CAST_run(c)); +container_t *c1 = convert_to_bitset_or_array_container( +CAST_run(c), card, &type_after); +run_container_free(CAST_run(c)); +ra_set_container_at_index(&r->high_low_container, i, c1, +type_after); +} +} +} +return answer; } -bool run_container_iterate(const run_container_t *cont, uint32_t base, - roaring_iterator iterator, void *ptr) { - for (int i = 0; i < cont->n_runs; ++i) { - uint32_t run_start = base + cont->runs[i].value; - uint16_t le = cont->runs[i].length; - - for (int j = 0; j <= le; ++j) - if (!iterator(run_start + j, ptr)) return false; - } - return true; +size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf) { +size_t portablesize = roaring_bitmap_portable_size_in_bytes(r); +uint64_t cardinality = roaring_bitmap_get_cardinality(r); +uint64_t sizeasarray = cardinality * sizeof(uint32_t) + sizeof(uint32_t); +if (portablesize < sizeasarray) { +buf[0] = CROARING_SERIALIZATION_CONTAINER; +return roaring_bitmap_portable_serialize(r, buf + 1) + 1; +} else { +buf[0] = CROARING_SERIALIZATION_ARRAY_UINT32; +memcpy(buf + 1, &cardinality, sizeof(uint32_t)); +roaring_bitmap_to_uint32_array( +r, (uint32_t *)(buf + 1 + sizeof(uint32_t))); +return 1 + (size_t)sizeasarray; +} } -bool run_container_iterate64(const run_container_t *cont, uint32_t base, - roaring_iterator64 iterator, uint64_t high_bits, - void *ptr) { - for (int i = 0; i < cont->n_runs; ++i) { - uint32_t run_start = base + cont->runs[i].value; - uint16_t le = cont->runs[i].length; - - for (int j = 0; j <= le; ++j) - if (!iterator(high_bits | (uint64_t)(run_start + j), ptr)) - return false; - } - return true; +size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *r) { +size_t portablesize = roaring_bitmap_portable_size_in_bytes(r); +uint64_t sizeasarray = roaring_bitmap_get_cardinality(r) * sizeof(uint32_t) + +sizeof(uint32_t); +return portablesize < sizeasarray ? portablesize + 1 : (size_t)sizeasarray + 1; } -bool run_container_is_subset(const run_container_t *container1, - const run_container_t *container2) { - int i1 = 0, i2 = 0; - while (i1 < container1->n_runs && i2 < container2->n_runs) { - int start1 = container1->runs[i1].value; - int stop1 = start1 + container1->runs[i1].length; - int start2 = container2->runs[i2].value; - int stop2 = start2 + container2->runs[i2].length; - if (start1 < start2) { - return false; - } else { // start1 >= start2 - if (stop1 < stop2) { - i1++; - } else if (stop1 == stop2) { - i1++; - i2++; - } else { // stop1 > stop2 - i2++; - } - } - } - if (i1 == container1->n_runs) { - return true; - } else { - return false; - } +size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r) { +return ra_portable_size_in_bytes(&r->high_low_container); } -// TODO: write smart_append_exclusive version to match the overloaded 1 param -// Java version (or is it even used?) -// follows the Java implementation closely -// length is the rle-value. Ie, run [10,12) uses a length value 1. -void run_container_smart_append_exclusive(run_container_t *src, - const uint16_t start, - const uint16_t length) { - int old_end; - rle16_t *last_run = src->n_runs ? src->runs + (src->n_runs - 1) : NULL; - rle16_t *appended_last_run = src->runs + src->n_runs; - - if (!src->n_runs || - (start > (old_end = last_run->value + last_run->length + 1))) { - *appended_last_run = (rle16_t){.value = start, .length = length}; - src->n_runs++; - return; - } - if (old_end == start) { - // we merge - last_run->length += (length + 1); - return; - } - int new_end = start + length + 1; - - if (start == last_run->value) { - // wipe out previous - if (new_end < old_end) { - *last_run = (rle16_t){.value = (uint16_t)new_end, - .length = (uint16_t)(old_end - new_end - 1)}; - return; - } else if (new_end > old_end) { - *last_run = (rle16_t){.value = (uint16_t)old_end, - .length = (uint16_t)(new_end - old_end - 1)}; - return; - } else { - src->n_runs--; - return; - } - } - last_run->length = start - last_run->value - 1; - if (new_end < old_end) { - *appended_last_run = - (rle16_t){.value = (uint16_t)new_end, - .length = (uint16_t)(old_end - new_end - 1)}; - src->n_runs++; - } else if (new_end > old_end) { - *appended_last_run = - (rle16_t){.value = (uint16_t)old_end, - .length = (uint16_t)(new_end - old_end - 1)}; - src->n_runs++; - } +roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes) { +roaring_bitmap_t *ans = +(roaring_bitmap_t *)roaring_malloc(sizeof(roaring_bitmap_t)); +if (ans == NULL) { +return NULL; +} +size_t bytesread; +bool is_ok = ra_portable_deserialize(&ans->high_low_container, buf, maxbytes, &bytesread); +if (!is_ok) { +roaring_free(ans); +return NULL; +} +roaring_bitmap_set_copy_on_write(ans, false); +if (!is_ok) { +roaring_free(ans); +return NULL; +} +return ans; +} + +roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf) { +return roaring_bitmap_portable_deserialize_safe(buf, SIZE_MAX); } -bool run_container_select(const run_container_t *container, - uint32_t *start_rank, uint32_t rank, - uint32_t *element) { - for (int i = 0; i < container->n_runs; i++) { - uint16_t length = container->runs[i].length; - if (rank <= *start_rank + length) { - uint16_t value = container->runs[i].value; - *element = value + rank - (*start_rank); - return true; - } else - *start_rank += length + 1; - } - return false; + +size_t roaring_bitmap_portable_deserialize_size(const char *buf, size_t maxbytes) { +return ra_portable_deserialize_size(buf, maxbytes); } -int run_container_rank(const run_container_t *container, uint16_t x) { - int sum = 0; - uint32_t x32 = x; - for (int i = 0; i < container->n_runs; i++) { - uint32_t startpoint = container->runs[i].value; - uint32_t length = container->runs[i].length; - uint32_t endpoint = length + startpoint; - if (x <= endpoint) { - if (x < startpoint) break; - return sum + (x32 - startpoint) + 1; - } else { - sum += length + 1; - } - } - return sum; + +size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, +char *buf) { +return ra_portable_serialize(&r->high_low_container, buf); } -/* end file src/containers/run.c */ -/* begin file src/roaring.c */ -#include -#include -#include -#include -#include -#include -extern inline bool roaring_bitmap_contains(const roaring_bitmap_t *r, - uint32_t val); -extern inline bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t* r); -extern inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t* r, bool cow); +roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf) { +const char *bufaschar = (const char *)buf; +if (bufaschar[0] == CROARING_SERIALIZATION_ARRAY_UINT32) { +/* This looks like a compressed set of uint32_t elements */ +uint32_t card; -static inline bool is_cow(const roaring_bitmap_t *r) { - return r->high_low_container.flags & ROARING_FLAG_COW; +memcpy(&card, bufaschar + 1, sizeof(uint32_t)); + +const uint32_t *elems = +(const uint32_t *)(bufaschar + 1 + sizeof(uint32_t)); + +roaring_bitmap_t *bitmap = roaring_bitmap_create(); +if (bitmap == NULL) { +return NULL; } -static inline bool is_frozen(const roaring_bitmap_t *r) { - return r->high_low_container.flags & ROARING_FLAG_FROZEN; +roaring_bulk_context_t context = {0}; +for (uint32_t i = 0; i < card; i++) { +// elems may not be aligned, read with memcpy +uint32_t elem; +memcpy(&elem, elems + i, sizeof(elem)); +roaring_bitmap_add_bulk(bitmap, &context, elem); } +return bitmap; -// this is like roaring_bitmap_add, but it populates pointer arguments in such a -// way -// that we can recover the container touched, which, in turn can be used to -// accelerate some functions (when you repeatedly need to add to the same -// container) -static inline void *containerptr_roaring_bitmap_add(roaring_bitmap_t *r, - uint32_t val, - uint8_t *typecode, - int *index) { - uint16_t hb = val >> 16; - const int i = ra_get_index(&r->high_low_container, hb); - if (i >= 0) { - ra_unshare_container_at_index(&r->high_low_container, i); - void *container = - ra_get_container_at_index(&r->high_low_container, i, typecode); - uint8_t newtypecode = *typecode; - void *container2 = - container_add(container, val & 0xFFFF, *typecode, &newtypecode); - *index = i; - if (container2 != container) { - container_free(container, *typecode); - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); - *typecode = newtypecode; - return container2; - } else { - return container; - } - } else { - array_container_t *newac = array_container_create(); - void *container = container_add(newac, val & 0xFFFF, - ARRAY_CONTAINER_TYPE_CODE, typecode); - // we could just assume that it stays an array container - ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb, - container, *typecode); - *index = -i - 1; - return container; - } +} else if (bufaschar[0] == CROARING_SERIALIZATION_CONTAINER) { +return roaring_bitmap_portable_deserialize(bufaschar + 1); +} else +return (NULL); } -roaring_bitmap_t *roaring_bitmap_create() { - roaring_bitmap_t *ans = - (roaring_bitmap_t *)malloc(sizeof(roaring_bitmap_t)); - if (!ans) { - return NULL; - } - ra_init(&ans->high_low_container); - return ans; +roaring_bitmap_t* roaring_bitmap_deserialize_safe(const void *buf, size_t maxbytes) { +if (maxbytes < 1) { +return NULL; } -roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap) { - roaring_bitmap_t *ans = - (roaring_bitmap_t *)malloc(sizeof(roaring_bitmap_t)); - if (!ans) { - return NULL; - } - bool is_ok = ra_init_with_capacity(&ans->high_low_container, cap); - if (!is_ok) { - free(ans); - return NULL; - } - return ans; +const char *bufaschar = (const char *)buf; +if (bufaschar[0] == CROARING_SERIALIZATION_ARRAY_UINT32) { +if (maxbytes < 1 + sizeof(uint32_t)) { +return NULL; } -void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args, - const uint32_t *vals) { - void *container = NULL; // hold value of last container touched - uint8_t typecode = 0; // typecode of last container touched - uint32_t prev = 0; // previous valued inserted - size_t i = 0; // index of value - int containerindex = 0; - if (n_args == 0) return; - uint32_t val; - memcpy(&val, vals + i, sizeof(val)); - container = - containerptr_roaring_bitmap_add(r, val, &typecode, &containerindex); - prev = val; - i++; - for (; i < n_args; i++) { - memcpy(&val, vals + i, sizeof(val)); - if (((prev ^ val) >> 16) == - 0) { // no need to seek the container, it is at hand - // because we already have the container at hand, we can do the - // insertion - // automatically, bypassing the roaring_bitmap_add call - uint8_t newtypecode = typecode; - void *container2 = - container_add(container, val & 0xFFFF, typecode, &newtypecode); - if (container2 != container) { // rare instance when we need to - // change the container type - container_free(container, typecode); - ra_set_container_at_index(&r->high_low_container, - containerindex, container2, - newtypecode); - typecode = newtypecode; - container = container2; - } - } else { - container = containerptr_roaring_bitmap_add(r, val, &typecode, - &containerindex); - } - prev = val; - } -} +/* This looks like a compressed set of uint32_t elements */ +uint32_t card; +memcpy(&card, bufaschar + 1, sizeof(uint32_t)); -roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals) { - roaring_bitmap_t *answer = roaring_bitmap_create(); - roaring_bitmap_add_many(answer, n_args, vals); - return answer; +// Check the buffer is big enough to contain card uint32_t elements +if (maxbytes < 1 + sizeof(uint32_t) + card * sizeof(uint32_t)) { +return NULL; } -roaring_bitmap_t *roaring_bitmap_of(size_t n_args, ...) { - // todo: could be greatly optimized but we do not expect this call to ever - // include long lists - roaring_bitmap_t *answer = roaring_bitmap_create(); - va_list ap; - va_start(ap, n_args); - for (size_t i = 1; i <= n_args; i++) { - uint32_t val = va_arg(ap, uint32_t); - roaring_bitmap_add(answer, val); - } - va_end(ap); - return answer; -} +const uint32_t *elems = +(const uint32_t *)(bufaschar + 1 + sizeof(uint32_t)); -static inline uint32_t minimum_uint32(uint32_t a, uint32_t b) { - return (a < b) ? a : b; +roaring_bitmap_t *bitmap = roaring_bitmap_create(); +if (bitmap == NULL) { +return NULL; } - -static inline uint64_t minimum_uint64(uint64_t a, uint64_t b) { - return (a < b) ? a : b; +roaring_bulk_context_t context = {0}; +for (uint32_t i = 0; i < card; i++) { +// elems may not be aligned, read with memcpy +uint32_t elem; +memcpy(&elem, elems + i, sizeof(elem)); +roaring_bitmap_add_bulk(bitmap, &context, elem); } +return bitmap; -roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max, - uint32_t step) { - if(max >= UINT64_C(0x100000000)) { - max = UINT64_C(0x100000000); - } - if (step == 0) return NULL; - if (max <= min) return NULL; - roaring_bitmap_t *answer = roaring_bitmap_create(); - if (step >= (1 << 16)) { - for (uint32_t value = (uint32_t)min; value < max; value += step) { - roaring_bitmap_add(answer, value); - } - return answer; - } - uint64_t min_tmp = min; - do { - uint32_t key = (uint32_t)min_tmp >> 16; - uint32_t container_min = min_tmp & 0xFFFF; - uint32_t container_max = (uint32_t)minimum_uint64(max - (key << 16), 1 << 16); - uint8_t type; - void *container = container_from_range(&type, container_min, - container_max, (uint16_t)step); - ra_append(&answer->high_low_container, key, container, type); - uint32_t gap = container_max - container_min + step - 1; - min_tmp += gap - (gap % step); - } while (min_tmp < max); - // cardinality of bitmap will be ((uint64_t) max - min + step - 1 ) / step - return answer; -} - -void roaring_bitmap_add_range_closed(roaring_bitmap_t *ra, uint32_t min, uint32_t max) { - if (min > max) { - return; - } +} else if (bufaschar[0] == CROARING_SERIALIZATION_CONTAINER) { +return roaring_bitmap_portable_deserialize_safe(bufaschar + 1, maxbytes - 1); +} else +return (NULL); +} - uint32_t min_key = min >> 16; - uint32_t max_key = max >> 16; - - int32_t num_required_containers = max_key - min_key + 1; - int32_t suffix_length = count_greater(ra->high_low_container.keys, - ra->high_low_container.size, - max_key); - int32_t prefix_length = count_less(ra->high_low_container.keys, - ra->high_low_container.size - suffix_length, - min_key); - int32_t common_length = ra->high_low_container.size - prefix_length - suffix_length; - - if (num_required_containers > common_length) { - ra_shift_tail(&ra->high_low_container, suffix_length, - num_required_containers - common_length); - } +bool roaring_iterate(const roaring_bitmap_t *r, roaring_iterator iterator, +void *ptr) { +const roaring_array_t *ra = &r->high_low_container; - int32_t src = prefix_length + common_length - 1; - int32_t dst = ra->high_low_container.size - suffix_length - 1; - for (uint32_t key = max_key; key != min_key-1; key--) { // beware of min_key==0 - uint32_t container_min = (min_key == key) ? (min & 0xffff) : 0; - uint32_t container_max = (max_key == key) ? (max & 0xffff) : 0xffff; - void* new_container; - uint8_t new_type; - - if (src >= 0 && ra->high_low_container.keys[src] == key) { - ra_unshare_container_at_index(&ra->high_low_container, src); - new_container = container_add_range(ra->high_low_container.containers[src], - ra->high_low_container.typecodes[src], - container_min, container_max, &new_type); - if (new_container != ra->high_low_container.containers[src]) { - container_free(ra->high_low_container.containers[src], - ra->high_low_container.typecodes[src]); - } - src--; - } else { - new_container = container_from_range(&new_type, container_min, - container_max+1, 1); - } - ra_replace_key_and_container_at_index(&ra->high_low_container, dst, - key, new_container, new_type); - dst--; - } +for (int i = 0; i < ra->size; ++i) +if (!container_iterate(ra->containers[i], ra->typecodes[i], +((uint32_t)ra->keys[i]) << 16, +iterator, ptr)) { +return false; +} +return true; } -void roaring_bitmap_remove_range_closed(roaring_bitmap_t *ra, uint32_t min, uint32_t max) { - if (min > max) { - return; - } +bool roaring_iterate64(const roaring_bitmap_t *r, roaring_iterator64 iterator, +uint64_t high_bits, void *ptr) { +const roaring_array_t *ra = &r->high_low_container; - uint32_t min_key = min >> 16; - uint32_t max_key = max >> 16; - - int32_t src = count_less(ra->high_low_container.keys, ra->high_low_container.size, min_key); - int32_t dst = src; - while (src < ra->high_low_container.size && ra->high_low_container.keys[src] <= max_key) { - uint32_t container_min = (min_key == ra->high_low_container.keys[src]) ? (min & 0xffff) : 0; - uint32_t container_max = (max_key == ra->high_low_container.keys[src]) ? (max & 0xffff) : 0xffff; - ra_unshare_container_at_index(&ra->high_low_container, src); - void *new_container; - uint8_t new_type; - new_container = container_remove_range(ra->high_low_container.containers[src], - ra->high_low_container.typecodes[src], - container_min, container_max, - &new_type); - if (new_container != ra->high_low_container.containers[src]) { - container_free(ra->high_low_container.containers[src], - ra->high_low_container.typecodes[src]); - } - if (new_container) { - ra_replace_key_and_container_at_index(&ra->high_low_container, dst, - ra->high_low_container.keys[src], - new_container, new_type); - dst++; - } - src++; - } - if (src > dst) { - ra_shift_tail(&ra->high_low_container, ra->high_low_container.size - src, dst - src); - } +for (int i = 0; i < ra->size; ++i) +if (!container_iterate64( +ra->containers[i], ra->typecodes[i], +((uint32_t)ra->keys[i]) << 16, iterator, +high_bits, ptr)) { +return false; +} +return true; } -extern inline void roaring_bitmap_add_range(roaring_bitmap_t *ra, uint64_t min, uint64_t max); -extern inline void roaring_bitmap_remove_range(roaring_bitmap_t *ra, uint64_t min, uint64_t max); +/**** +* begin roaring_uint32_iterator_t +*****/ -void roaring_bitmap_printf(const roaring_bitmap_t *ra) { - printf("{"); - for (int i = 0; i < ra->high_low_container.size; ++i) { - container_printf_as_uint32_array( - ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i], - ((uint32_t)ra->high_low_container.keys[i]) << 16); - if (i + 1 < ra->high_low_container.size) printf(","); - } - printf("}"); -} - -void roaring_bitmap_printf_describe(const roaring_bitmap_t *ra) { - printf("{"); - for (int i = 0; i < ra->high_low_container.size; ++i) { - printf("%d: %s (%d)", ra->high_low_container.keys[i], - get_full_container_name(ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i]), - container_get_cardinality(ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i])); - if (ra->high_low_container.typecodes[i] == SHARED_CONTAINER_TYPE_CODE) { - printf( - "(shared count = %" PRIu32 " )", - ((shared_container_t *)(ra->high_low_container.containers[i])) - ->counter); - } - - if (i + 1 < ra->high_low_container.size) printf(", "); - } - printf("}"); +// Partially initializes the roaring iterator when it begins looking at +// a new container. +static bool iter_new_container_partial_init(roaring_uint32_iterator_t *newit) { +newit->in_container_index = 0; +newit->run_index = 0; +newit->current_value = 0; +if (newit->container_index >= newit->parent->high_low_container.size || +newit->container_index < 0) { +newit->current_value = UINT32_MAX; +return (newit->has_value = false); +} +// assume not empty +newit->has_value = true; +// we precompute container, typecode and highbits so that successive +// iterators do not have to grab them from odd memory locations +// and have to worry about the (easily predicted) container_unwrap_shared +// call. +newit->container = +newit->parent->high_low_container.containers[newit->container_index]; +newit->typecode = +newit->parent->high_low_container.typecodes[newit->container_index]; +newit->highbits = +((uint32_t) +newit->parent->high_low_container.keys[newit->container_index]) +<< 16; +newit->container = +container_unwrap_shared(newit->container, &(newit->typecode)); +return newit->has_value; } -typedef struct min_max_sum_s { - uint32_t min; - uint32_t max; - uint64_t sum; -} min_max_sum_t; +static bool loadfirstvalue(roaring_uint32_iterator_t *newit) { +if (!iter_new_container_partial_init(newit)) +return newit->has_value; -static bool min_max_sum_fnc(uint32_t value, void *param) { - min_max_sum_t *mms = (min_max_sum_t *)param; - if (value > mms->max) mms->max = value; - if (value < mms->min) mms->min = value; - mms->sum += value; - return true; // we always process all data points -} +switch (newit->typecode) { +case BITSET_CONTAINER_TYPE: { +const bitset_container_t *bc = const_CAST_bitset(newit->container); -/** -* (For advanced users.) -* Collect statistics about the bitmap -*/ -void roaring_bitmap_statistics(const roaring_bitmap_t *ra, - roaring_statistics_t *stat) { - memset(stat, 0, sizeof(*stat)); - stat->n_containers = ra->high_low_container.size; - stat->cardinality = roaring_bitmap_get_cardinality(ra); - min_max_sum_t mms; - mms.min = UINT32_C(0xFFFFFFFF); - mms.max = UINT32_C(0); - mms.sum = 0; - roaring_iterate(ra, &min_max_sum_fnc, &mms); - stat->min_value = mms.min; - stat->max_value = mms.max; - stat->sum_value = mms.sum; - - for (int i = 0; i < ra->high_low_container.size; ++i) { - uint8_t truetype = - get_container_type(ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i]); - uint32_t card = - container_get_cardinality(ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i]); - uint32_t sbytes = - container_size_in_bytes(ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i]); - switch (truetype) { - case BITSET_CONTAINER_TYPE_CODE: - stat->n_bitset_containers++; - stat->n_values_bitset_containers += card; - stat->n_bytes_bitset_containers += sbytes; - break; - case ARRAY_CONTAINER_TYPE_CODE: - stat->n_array_containers++; - stat->n_values_array_containers += card; - stat->n_bytes_array_containers += sbytes; - break; - case RUN_CONTAINER_TYPE_CODE: - stat->n_run_containers++; - stat->n_values_run_containers += card; - stat->n_bytes_run_containers += sbytes; - break; - default: - assert(false); - __builtin_unreachable(); - } - } +uint32_t wordindex = 0; +uint64_t word; +while ((word = bc->words[wordindex]) == 0) { +wordindex++; // advance } +// here "word" is non-zero +newit->in_container_index = wordindex * 64 + roaring_trailing_zeroes(word); +newit->current_value = newit->highbits | newit->in_container_index; +break; } -roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r) { - roaring_bitmap_t *ans = - (roaring_bitmap_t *)malloc(sizeof(roaring_bitmap_t)); - if (!ans) { - return NULL; - } - bool is_ok = ra_copy(&r->high_low_container, &ans->high_low_container, - is_cow(r)); - if (!is_ok) { - free(ans); - return NULL; - } - roaring_bitmap_set_copy_on_write(ans, is_cow(r)); - return ans; -} +case ARRAY_CONTAINER_TYPE: { +const array_container_t *ac = const_CAST_array(newit->container); +newit->current_value = newit->highbits | ac->array[0]; +break; } -bool roaring_bitmap_overwrite(roaring_bitmap_t *dest, - const roaring_bitmap_t *src) { - return ra_overwrite(&src->high_low_container, &dest->high_low_container, - is_cow(src)); -} +case RUN_CONTAINER_TYPE: { +const run_container_t *rc = const_CAST_run(newit->container); +newit->current_value = newit->highbits | rc->runs[0].value; +break; } -void roaring_bitmap_free(const roaring_bitmap_t *r) { - if (!is_frozen(r)) { - ra_clear((roaring_array_t*)&r->high_low_container); - } - free((roaring_bitmap_t*)r); +default: +// if this ever happens, bug! +assert(false); +} // switch (typecode) +return true; } -void roaring_bitmap_clear(roaring_bitmap_t *r) { - ra_reset(&r->high_low_container); +static bool loadlastvalue(roaring_uint32_iterator_t* newit) { +if (!iter_new_container_partial_init(newit)) +return newit->has_value; + +switch(newit->typecode) { +case BITSET_CONTAINER_TYPE: { +uint32_t wordindex = BITSET_CONTAINER_SIZE_IN_WORDS - 1; +uint64_t word; +const bitset_container_t* bitset_container = (const bitset_container_t*)newit->container; +while ((word = bitset_container->words[wordindex]) == 0) +--wordindex; + +int num_leading_zeros = roaring_leading_zeroes(word); +newit->in_container_index = (wordindex * 64) + (63 - num_leading_zeros); +newit->current_value = newit->highbits | newit->in_container_index; +break; +} +case ARRAY_CONTAINER_TYPE: { +const array_container_t* array_container = (const array_container_t*)newit->container; +newit->in_container_index = array_container->cardinality - 1; +newit->current_value = newit->highbits | array_container->array[newit->in_container_index]; +break; +} +case RUN_CONTAINER_TYPE: { +const run_container_t* run_container = (const run_container_t*)newit->container; +newit->run_index = run_container->n_runs - 1; +const rle16_t* last_run = &run_container->runs[newit->run_index]; +newit->current_value = newit->highbits | (last_run->value + last_run->length); +break; +} +default: +// if this ever happens, bug! +assert(false); +} +return true; } -void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t val) { - const uint16_t hb = val >> 16; - const int i = ra_get_index(&r->high_low_container, hb); - uint8_t typecode; - if (i >= 0) { - ra_unshare_container_at_index(&r->high_low_container, i); - void *container = - ra_get_container_at_index(&r->high_low_container, i, &typecode); - uint8_t newtypecode = typecode; - void *container2 = - container_add(container, val & 0xFFFF, typecode, &newtypecode); - if (container2 != container) { - container_free(container, typecode); - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); - } - } else { - array_container_t *newac = array_container_create(); - void *container = container_add(newac, val & 0xFFFF, - ARRAY_CONTAINER_TYPE_CODE, &typecode); - // we could just assume that it stays an array container - ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb, - container, typecode); - } -} +// prerequesite: the value should be in range of the container +static bool loadfirstvalue_largeorequal(roaring_uint32_iterator_t *newit, uint32_t val) { +// Don't have to check return value because of prerequisite +iter_new_container_partial_init(newit); +uint16_t lb = val & 0xFFFF; -bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t val) { - const uint16_t hb = val >> 16; - const int i = ra_get_index(&r->high_low_container, hb); - uint8_t typecode; - bool result = false; - if (i >= 0) { - ra_unshare_container_at_index(&r->high_low_container, i); - void *container = - ra_get_container_at_index(&r->high_low_container, i, &typecode); - - const int oldCardinality = - container_get_cardinality(container, typecode); - - uint8_t newtypecode = typecode; - void *container2 = - container_add(container, val & 0xFFFF, typecode, &newtypecode); - if (container2 != container) { - container_free(container, typecode); - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); - result = true; - } else { - const int newCardinality = - container_get_cardinality(container, newtypecode); - - result = oldCardinality != newCardinality; - } - } else { - array_container_t *newac = array_container_create(); - void *container = container_add(newac, val & 0xFFFF, - ARRAY_CONTAINER_TYPE_CODE, &typecode); - // we could just assume that it stays an array container - ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb, - container, typecode); - result = true; - } +switch (newit->typecode) { +case BITSET_CONTAINER_TYPE: { +const bitset_container_t *bc = const_CAST_bitset(newit->container); +newit->in_container_index = +bitset_container_index_equalorlarger(bc, lb); +newit->current_value = newit->highbits | newit->in_container_index; +break; } + +case ARRAY_CONTAINER_TYPE: { +const array_container_t *ac = const_CAST_array(newit->container); +newit->in_container_index = +array_container_index_equalorlarger(ac, lb); +newit->current_value = +newit->highbits | ac->array[newit->in_container_index]; +break; } - return result; +case RUN_CONTAINER_TYPE: { +const run_container_t *rc = const_CAST_run(newit->container); +newit->run_index = run_container_index_equalorlarger(rc, lb); +if (rc->runs[newit->run_index].value <= lb) { +newit->current_value = val; +} else { +newit->current_value = +newit->highbits | rc->runs[newit->run_index].value; } +break; } -void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t val) { - const uint16_t hb = val >> 16; - const int i = ra_get_index(&r->high_low_container, hb); - uint8_t typecode; - if (i >= 0) { - ra_unshare_container_at_index(&r->high_low_container, i); - void *container = - ra_get_container_at_index(&r->high_low_container, i, &typecode); - uint8_t newtypecode = typecode; - void *container2 = - container_remove(container, val & 0xFFFF, typecode, &newtypecode); - if (container2 != container) { - container_free(container, typecode); - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); - } - if (container_get_cardinality(container2, newtypecode) != 0) { - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); - } else { - ra_remove_at_index_and_free(&r->high_low_container, i); - } - } +default: +roaring_unreachable; } -bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t val) { - const uint16_t hb = val >> 16; - const int i = ra_get_index(&r->high_low_container, hb); - uint8_t typecode; - bool result = false; - if (i >= 0) { - ra_unshare_container_at_index(&r->high_low_container, i); - void *container = - ra_get_container_at_index(&r->high_low_container, i, &typecode); - - const int oldCardinality = - container_get_cardinality(container, typecode); - - uint8_t newtypecode = typecode; - void *container2 = - container_remove(container, val & 0xFFFF, typecode, &newtypecode); - if (container2 != container) { - container_free(container, typecode); - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); - } - - const int newCardinality = - container_get_cardinality(container2, newtypecode); - - if (newCardinality != 0) { - ra_set_container_at_index(&r->high_low_container, i, container2, - newtypecode); - } else { - ra_remove_at_index_and_free(&r->high_low_container, i); - } - - result = oldCardinality != newCardinality; - } - return result; +return true; } -void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args, - const uint32_t *vals) { - if (n_args == 0 || r->high_low_container.size == 0) { - return; - } - int32_t pos = -1; // position of the container used in the previous iteration - for (size_t i = 0; i < n_args; i++) { - uint16_t key = (uint16_t)(vals[i] >> 16); - if (pos < 0 || key != r->high_low_container.keys[pos]) { - pos = ra_get_index(&r->high_low_container, key); - } - if (pos >= 0) { - uint8_t new_typecode; - void *new_container; - new_container = container_remove(r->high_low_container.containers[pos], - vals[i] & 0xffff, - r->high_low_container.typecodes[pos], - &new_typecode); - if (new_container != r->high_low_container.containers[pos]) { - container_free(r->high_low_container.containers[pos], - r->high_low_container.typecodes[pos]); - ra_replace_key_and_container_at_index(&r->high_low_container, - pos, key, new_container, - new_typecode); - } - if (!container_nonzero_cardinality(new_container, new_typecode)) { - container_free(new_container, new_typecode); - ra_remove_at_index(&r->high_low_container, pos); - pos = -1; - } - } - } +void roaring_init_iterator(const roaring_bitmap_t *r, +roaring_uint32_iterator_t *newit) { +newit->parent = r; +newit->container_index = 0; +newit->has_value = loadfirstvalue(newit); } -// there should be some SIMD optimizations possible here -roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - uint8_t container_result_type = 0; - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - uint32_t neededcap = length1 > length2 ? length2 : length1; - roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap); - roaring_bitmap_set_copy_on_write(answer, is_cow(x1) && is_cow(x2)); - - int pos1 = 0, pos2 = 0; - - while (pos1 < length1 && pos2 < length2) { - const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - if (s1 == s2) { - uint8_t container_type_1, container_type_2; - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - void *c = container_and(c1, container_type_1, c2, container_type_2, - &container_result_type); - if (container_nonzero_cardinality(c, container_result_type)) { - ra_append(&answer->high_low_container, s1, c, - container_result_type); - } else { - container_free( - c, container_result_type); // otherwise:memory leak! - } - ++pos1; - ++pos2; - } else if (s1 < s2) { // s1 < s2 - pos1 = ra_advance_until(&x1->high_low_container, s2, pos1); - } else { // s1 > s2 - pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); - } - } - return answer; +void roaring_init_iterator_last(const roaring_bitmap_t *r, +roaring_uint32_iterator_t *newit) { +newit->parent = r; +newit->container_index = newit->parent->high_low_container.size - 1; +newit->has_value = loadlastvalue(newit); } -/** - * Compute the union of 'number' bitmaps. - */ -roaring_bitmap_t *roaring_bitmap_or_many(size_t number, - const roaring_bitmap_t **x) { - if (number == 0) { - return roaring_bitmap_create(); - } - if (number == 1) { - return roaring_bitmap_copy(x[0]); - } - roaring_bitmap_t *answer = - roaring_bitmap_lazy_or(x[0], x[1], LAZY_OR_BITSET_CONVERSION); - for (size_t i = 2; i < number; i++) { - roaring_bitmap_lazy_or_inplace(answer, x[i], LAZY_OR_BITSET_CONVERSION); - } - roaring_bitmap_repair_after_lazy(answer); - return answer; +roaring_uint32_iterator_t *roaring_create_iterator(const roaring_bitmap_t *r) { +roaring_uint32_iterator_t *newit = +(roaring_uint32_iterator_t *)roaring_malloc(sizeof(roaring_uint32_iterator_t)); +if (newit == NULL) return NULL; +roaring_init_iterator(r, newit); +return newit; } -/** - * Compute the xor of 'number' bitmaps. - */ -roaring_bitmap_t *roaring_bitmap_xor_many(size_t number, - const roaring_bitmap_t **x) { - if (number == 0) { - return roaring_bitmap_create(); - } - if (number == 1) { - return roaring_bitmap_copy(x[0]); - } - roaring_bitmap_t *answer = roaring_bitmap_lazy_xor(x[0], x[1]); - for (size_t i = 2; i < number; i++) { - roaring_bitmap_lazy_xor_inplace(answer, x[i]); - } - roaring_bitmap_repair_after_lazy(answer); - return answer; +roaring_uint32_iterator_t *roaring_copy_uint32_iterator( +const roaring_uint32_iterator_t *it) { +roaring_uint32_iterator_t *newit = +(roaring_uint32_iterator_t *)roaring_malloc(sizeof(roaring_uint32_iterator_t)); +memcpy(newit, it, sizeof(roaring_uint32_iterator_t)); +return newit; } -// inplace and (modifies its first argument). -void roaring_bitmap_and_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - if (x1 == x2) return; - int pos1 = 0, pos2 = 0, intersection_size = 0; - const int length1 = ra_get_size(&x1->high_low_container); - const int length2 = ra_get_size(&x2->high_low_container); - - // any skipped-over or newly emptied containers in x1 - // have to be freed. - while (pos1 < length1 && pos2 < length2) { - const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - if (s1 == s2) { - uint8_t typecode1, typecode2, typecode_result; - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &typecode1); - c1 = get_writable_copy_if_shared(c1, &typecode1); - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &typecode2); - void *c = - container_iand(c1, typecode1, c2, typecode2, &typecode_result); - if (c != c1) { // in this instance a new container was created, and - // we need to free the old one - container_free(c1, typecode1); - } - if (container_nonzero_cardinality(c, typecode_result)) { - ra_replace_key_and_container_at_index(&x1->high_low_container, - intersection_size, s1, c, - typecode_result); - intersection_size++; - } else { - container_free(c, typecode_result); - } - ++pos1; - ++pos2; - } else if (s1 < s2) { - pos1 = ra_advance_until_freeing(&x1->high_low_container, s2, pos1); - } else { // s1 > s2 - pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); - } - } +bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it, uint32_t val) { +uint16_t hb = val >> 16; +const int i = ra_get_index(& it->parent->high_low_container, hb); +if (i >= 0) { +uint32_t lowvalue = container_maximum(it->parent->high_low_container.containers[i], it->parent->high_low_container.typecodes[i]); +uint16_t lb = val & 0xFFFF; +if(lowvalue < lb ) { +it->container_index = i+1; // will have to load first value of next container +} else {// the value is necessarily within the range of the container +it->container_index = i; +it->has_value = loadfirstvalue_largeorequal(it, val); +return it->has_value; +} +} else { +// there is no matching, so we are going for the next container +it->container_index = -i-1; +} +it->has_value = loadfirstvalue(it); +return it->has_value; +} - // if we ended early because x2 ran out, then all remaining in x1 should be - // freed - while (pos1 < length1) { - container_free(x1->high_low_container.containers[pos1], - x1->high_low_container.typecodes[pos1]); - ++pos1; - } - // all containers after this have either been copied or freed - ra_downsize(&x1->high_low_container, intersection_size); +bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it) { +if (it->container_index >= it->parent->high_low_container.size) { +return (it->has_value = false); } - -roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - uint8_t container_result_type = 0; - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - if (0 == length1) { - return roaring_bitmap_copy(x2); - } - if (0 == length2) { - return roaring_bitmap_copy(x1); - } - roaring_bitmap_t *answer = - roaring_bitmap_create_with_capacity(length1 + length2); - roaring_bitmap_set_copy_on_write(answer, is_cow(x1) && is_cow(x2)); - int pos1 = 0, pos2 = 0; - uint8_t container_type_1, container_type_2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - void *c = container_or(c1, container_type_1, c2, container_type_2, - &container_result_type); - // since we assume that the initial containers are non-empty, the - // result here - // can only be non-empty - ra_append(&answer->high_low_container, s1, c, - container_result_type); - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - // c1 = container_clone(c1, container_type_1); - c1 = - get_copy_of_container(c1, &container_type_1, is_cow(x1)); - if (is_cow(x1)) { - ra_set_container_at_index(&x1->high_low_container, pos1, c1, - container_type_1); - } - ra_append(&answer->high_low_container, s1, c1, container_type_1); - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - // c2 = container_clone(c2, container_type_2); - c2 = - get_copy_of_container(c2, &container_type_2, is_cow(x2)); - if (is_cow(x2)) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - container_type_2); - } - ra_append(&answer->high_low_container, s2, c2, container_type_2); - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - if (pos1 == length1) { - ra_append_copy_range(&answer->high_low_container, - &x2->high_low_container, pos2, length2, - is_cow(x2)); - } else if (pos2 == length2) { - ra_append_copy_range(&answer->high_low_container, - &x1->high_low_container, pos1, length1, - is_cow(x1)); - } - return answer; +if (it->container_index < 0) { +it->container_index = 0; +return (it->has_value = loadfirstvalue(it)); } -// inplace or (modifies its first argument). -void roaring_bitmap_or_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - uint8_t container_result_type = 0; - int length1 = x1->high_low_container.size; - const int length2 = x2->high_low_container.size; +switch (it->typecode) { +case BITSET_CONTAINER_TYPE: { +const bitset_container_t *bc = const_CAST_bitset(it->container); +it->in_container_index++; - if (0 == length2) return; +uint32_t wordindex = it->in_container_index / 64; +if (wordindex >= BITSET_CONTAINER_SIZE_IN_WORDS) break; - if (0 == length1) { - roaring_bitmap_overwrite(x1, x2); - return; - } - int pos1 = 0, pos2 = 0; - uint8_t container_type_1, container_type_2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - if (!container_is_full(c1, container_type_1)) { - c1 = get_writable_copy_if_shared(c1, &container_type_1); - - void *c2 = ra_get_container_at_index(&x2->high_low_container, - pos2, &container_type_2); - void *c = - container_ior(c1, container_type_1, c2, container_type_2, - &container_result_type); - if (c != - c1) { // in this instance a new container was created, and - // we need to free the old one - container_free(c1, container_type_1); - } - - ra_set_container_at_index(&x1->high_low_container, pos1, c, - container_result_type); - } - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - c2 = - get_copy_of_container(c2, &container_type_2, is_cow(x2)); - if (is_cow(x2)) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - container_type_2); - } - - // void *c2_clone = container_clone(c2, container_type_2); - ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, - container_type_2); - pos1++; - length1++; - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - if (pos1 == length1) { - ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, - pos2, length2, is_cow(x2)); - } +uint64_t word = bc->words[wordindex] & +(UINT64_MAX << (it->in_container_index % 64)); +// next part could be optimized/simplified +while ((word == 0) && +(wordindex + 1 < BITSET_CONTAINER_SIZE_IN_WORDS)) { +wordindex++; +word = bc->words[wordindex]; +} +if (word != 0) { +it->in_container_index = wordindex * 64 + roaring_trailing_zeroes(word); +it->current_value = it->highbits | it->in_container_index; +return (it->has_value = true); } +break; } -roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - uint8_t container_result_type = 0; - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - if (0 == length1) { - return roaring_bitmap_copy(x2); - } - if (0 == length2) { - return roaring_bitmap_copy(x1); - } - roaring_bitmap_t *answer = - roaring_bitmap_create_with_capacity(length1 + length2); - roaring_bitmap_set_copy_on_write(answer, is_cow(x1) && is_cow(x2)); - int pos1 = 0, pos2 = 0; - uint8_t container_type_1, container_type_2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - void *c = container_xor(c1, container_type_1, c2, container_type_2, - &container_result_type); - - if (container_nonzero_cardinality(c, container_result_type)) { - ra_append(&answer->high_low_container, s1, c, - container_result_type); - } else { - container_free(c, container_result_type); - } - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - c1 = - get_copy_of_container(c1, &container_type_1, is_cow(x1)); - if (is_cow(x1)) { - ra_set_container_at_index(&x1->high_low_container, pos1, c1, - container_type_1); - } - ra_append(&answer->high_low_container, s1, c1, container_type_1); - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - c2 = - get_copy_of_container(c2, &container_type_2, is_cow(x2)); - if (is_cow(x2)) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - container_type_2); - } - ra_append(&answer->high_low_container, s2, c2, container_type_2); - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - if (pos1 == length1) { - ra_append_copy_range(&answer->high_low_container, - &x2->high_low_container, pos2, length2, - is_cow(x2)); - } else if (pos2 == length2) { - ra_append_copy_range(&answer->high_low_container, - &x1->high_low_container, pos1, length1, - is_cow(x1)); - } - return answer; +case ARRAY_CONTAINER_TYPE: { +const array_container_t *ac = const_CAST_array(it->container); +it->in_container_index++; +if (it->in_container_index < ac->cardinality) { +it->current_value = +it->highbits | ac->array[it->in_container_index]; +return (it->has_value = true); } +break; } -// inplace xor (modifies its first argument). +case RUN_CONTAINER_TYPE: { +if(it->current_value == UINT32_MAX) { // avoid overflow to zero +return (it->has_value = false); +} -void roaring_bitmap_xor_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - assert(x1 != x2); - uint8_t container_result_type = 0; - int length1 = x1->high_low_container.size; - const int length2 = x2->high_low_container.size; +const run_container_t* rc = const_CAST_run(it->container); +uint32_t limit = (it->highbits | (rc->runs[it->run_index].value + +rc->runs[it->run_index].length)); +if (++it->current_value <= limit) { +return (it->has_value = true); +} - if (0 == length2) return; +if (++it->run_index < rc->n_runs) { // Assume the run has a value +it->current_value = +it->highbits | rc->runs[it->run_index].value; +return (it->has_value = true); +} +break; +} - if (0 == length1) { - roaring_bitmap_overwrite(x1, x2); - return; - } +default: +roaring_unreachable; +} - // XOR can have new containers inserted from x2, but can also - // lose containers when x1 and x2 are nonempty and identical. - - int pos1 = 0, pos2 = 0; - uint8_t container_type_1, container_type_2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - c1 = get_writable_copy_if_shared(c1, &container_type_1); - - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - void *c = container_ixor(c1, container_type_1, c2, container_type_2, - &container_result_type); - - if (container_nonzero_cardinality(c, container_result_type)) { - ra_set_container_at_index(&x1->high_low_container, pos1, c, - container_result_type); - ++pos1; - } else { - container_free(c, container_result_type); - ra_remove_at_index(&x1->high_low_container, pos1); - --length1; - } - - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - c2 = - get_copy_of_container(c2, &container_type_2, is_cow(x2)); - if (is_cow(x2)) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - container_type_2); - } - - ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, - container_type_2); - pos1++; - length1++; - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - if (pos1 == length1) { - ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, - pos2, length2, is_cow(x2)); - } +// moving to next container +it->container_index++; +return (it->has_value = loadfirstvalue(it)); } -roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - uint8_t container_result_type = 0; - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - if (0 == length1) { - roaring_bitmap_t *empty_bitmap = roaring_bitmap_create(); - roaring_bitmap_set_copy_on_write(empty_bitmap, is_cow(x1) && is_cow(x2)); - return empty_bitmap; - } - if (0 == length2) { - return roaring_bitmap_copy(x1); - } - roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(length1); - roaring_bitmap_set_copy_on_write(answer, is_cow(x1) && is_cow(x2)); - - int pos1 = 0, pos2 = 0; - uint8_t container_type_1, container_type_2; - uint16_t s1 = 0; - uint16_t s2 = 0; - while (true) { - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - if (s1 == s2) { - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - void *c = - container_andnot(c1, container_type_1, c2, container_type_2, - &container_result_type); - - if (container_nonzero_cardinality(c, container_result_type)) { - ra_append(&answer->high_low_container, s1, c, - container_result_type); - } else { - container_free(c, container_result_type); - } - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - } else if (s1 < s2) { // s1 < s2 - const int next_pos1 = - ra_advance_until(&x1->high_low_container, s2, pos1); - ra_append_copy_range(&answer->high_low_container, - &x1->high_low_container, pos1, next_pos1, - is_cow(x1)); - // TODO : perhaps some of the copy_on_write should be based on - // answer rather than x1 (more stringent?). Many similar cases - pos1 = next_pos1; - if (pos1 == length1) break; - } else { // s1 > s2 - pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); - if (pos2 == length2) break; - } - } - if (pos2 == length2) { - ra_append_copy_range(&answer->high_low_container, - &x1->high_low_container, pos1, length1, - is_cow(x1)); - } - return answer; +bool roaring_previous_uint32_iterator(roaring_uint32_iterator_t *it) { +if (it->container_index < 0) { +return (it->has_value = false); +} +if (it->container_index >= it->parent->high_low_container.size) { +it->container_index = it->parent->high_low_container.size - 1; +return (it->has_value = loadlastvalue(it)); } -// inplace andnot (modifies its first argument). +switch (it->typecode) { +case BITSET_CONTAINER_TYPE: { +if (--it->in_container_index < 0) +break; -void roaring_bitmap_andnot_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - assert(x1 != x2); +const bitset_container_t* bitset_container = (const bitset_container_t*)it->container; +int32_t wordindex = it->in_container_index / 64; +uint64_t word = bitset_container->words[wordindex] & (UINT64_MAX >> (63 - (it->in_container_index % 64))); - uint8_t container_result_type = 0; - int length1 = x1->high_low_container.size; - const int length2 = x2->high_low_container.size; - int intersection_size = 0; +while (word == 0 && --wordindex >= 0) { +word = bitset_container->words[wordindex]; +} +if (word == 0) +break; - if (0 == length2) return; +int num_leading_zeros = roaring_leading_zeroes(word); +it->in_container_index = (wordindex * 64) + (63 - num_leading_zeros); +it->current_value = it->highbits | it->in_container_index; +return (it->has_value = true); +} +case ARRAY_CONTAINER_TYPE: { +if (--it->in_container_index < 0) +break; - if (0 == length1) { - roaring_bitmap_clear(x1); - return; - } +const array_container_t* array_container = (const array_container_t*)it->container; +it->current_value = it->highbits | array_container->array[it->in_container_index]; +return (it->has_value = true); +} +case RUN_CONTAINER_TYPE: { +if(it->current_value == 0) +return (it->has_value = false); - int pos1 = 0, pos2 = 0; - uint8_t container_type_1, container_type_2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - c1 = get_writable_copy_if_shared(c1, &container_type_1); - - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - void *c = - container_iandnot(c1, container_type_1, c2, container_type_2, - &container_result_type); - - if (container_nonzero_cardinality(c, container_result_type)) { - ra_replace_key_and_container_at_index(&x1->high_low_container, - intersection_size++, s1, - c, container_result_type); - } else { - container_free(c, container_result_type); - } - - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - if (pos1 != intersection_size) { - void *c1 = ra_get_container_at_index(&x1->high_low_container, - pos1, &container_type_1); - - ra_replace_key_and_container_at_index(&x1->high_low_container, - intersection_size, s1, c1, - container_type_1); - } - intersection_size++; - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } +const run_container_t* run_container = (const run_container_t*)it->container; +if (--it->current_value >= (it->highbits | run_container->runs[it->run_index].value)) { +return (it->has_value = true); +} - if (pos1 < length1) { - // all containers between intersection_size and - // pos1 are junk. However, they have either been moved - // (thus still referenced) or involved in an iandnot - // that will clean up all containers that could not be reused. - // Thus we should not free the junk containers between - // intersection_size and pos1. - if (pos1 > intersection_size) { - // left slide of remaining items - ra_copy_range(&x1->high_low_container, pos1, length1, - intersection_size); - } - // else current placement is fine - intersection_size += (length1 - pos1); - } - ra_downsize(&x1->high_low_container, intersection_size); +if (--it->run_index < 0) +break; + +it->current_value = it->highbits | (run_container->runs[it->run_index].value + +run_container->runs[it->run_index].length); +return (it->has_value = true); } +default: +// if this ever happens, bug! +assert(false); +} // switch (typecode) -uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *ra) { - uint64_t card = 0; - for (int i = 0; i < ra->high_low_container.size; ++i) - card += container_get_cardinality(ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i]); - return card; +// moving to previous container +it->container_index--; +return (it->has_value = loadlastvalue(it)); } -uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *ra, - uint64_t range_start, - uint64_t range_end) { - if (range_end > UINT32_MAX) { - range_end = UINT32_MAX + UINT64_C(1); - } - if (range_start >= range_end) { - return 0; - } - range_end--; // make range_end inclusive - // now we have: 0 <= range_start <= range_end <= UINT32_MAX - - uint16_t minhb = range_start >> 16; - uint16_t maxhb = range_end >> 16; - - uint64_t card = 0; - - int i = ra_get_index(&ra->high_low_container, minhb); - if (i >= 0) { - if (minhb == maxhb) { - card += container_rank(ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i], - range_end & 0xffff); - } else { - card += container_get_cardinality(ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i]); - } - if ((range_start & 0xffff) != 0) { - card -= container_rank(ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i], - (range_start & 0xffff) - 1); - } - i++; - } else { - i = -i - 1; - } +uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it, uint32_t* buf, uint32_t count) { +uint32_t ret = 0; +uint32_t num_values; +uint32_t wordindex; // used for bitsets +uint64_t word; // used for bitsets +const array_container_t* acont; //TODO remove +const run_container_t* rcont; //TODO remove +const bitset_container_t* bcont; //TODO remove + +while (it->has_value && ret < count) { +switch (it->typecode) { +case BITSET_CONTAINER_TYPE: +bcont = const_CAST_bitset(it->container); +wordindex = it->in_container_index / 64; +word = bcont->words[wordindex] & (UINT64_MAX << (it->in_container_index % 64)); +do { +while (word != 0 && ret < count) { +buf[0] = it->highbits | (wordindex * 64 + roaring_trailing_zeroes(word)); +word = word & (word - 1); +buf++; +ret++; +} +while (word == 0 && wordindex+1 < BITSET_CONTAINER_SIZE_IN_WORDS) { +wordindex++; +word = bcont->words[wordindex]; +} +} while (word != 0 && ret < count); +it->has_value = (word != 0); +if (it->has_value) { +it->in_container_index = wordindex * 64 + roaring_trailing_zeroes(word); +it->current_value = it->highbits | it->in_container_index; +} +break; +case ARRAY_CONTAINER_TYPE: +acont = const_CAST_array(it->container); +num_values = minimum_uint32(acont->cardinality - it->in_container_index, count - ret); +for (uint32_t i = 0; i < num_values; i++) { +buf[i] = it->highbits | acont->array[it->in_container_index + i]; +} +buf += num_values; +ret += num_values; +it->in_container_index += num_values; +it->has_value = (it->in_container_index < acont->cardinality); +if (it->has_value) { +it->current_value = it->highbits | acont->array[it->in_container_index]; +} +break; +case RUN_CONTAINER_TYPE: +rcont = const_CAST_run(it->container); +//"in_run_index" name is misleading, read it as "max_value_in_current_run" +do { +uint32_t largest_run_value = it->highbits | (rcont->runs[it->run_index].value + rcont->runs[it->run_index].length); +num_values = minimum_uint32(largest_run_value - it->current_value + 1, count - ret); +for (uint32_t i = 0; i < num_values; i++) { +buf[i] = it->current_value + i; +} +it->current_value += num_values; // this can overflow to zero: UINT32_MAX+1=0 +buf += num_values; +ret += num_values; + +if (it->current_value > largest_run_value || it->current_value == 0) { +it->run_index++; +if (it->run_index < rcont->n_runs) { +it->current_value = it->highbits | rcont->runs[it->run_index].value; +} else { +it->has_value = false; +} +} +} while ((ret < count) && it->has_value); +break; +default: +assert(false); +} +if (it->has_value) { +assert(ret == count); +return ret; +} +it->container_index++; +it->has_value = loadfirstvalue(it); +} +return ret; +} + + + +void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it) { roaring_free(it); } - for (; i < ra->high_low_container.size; i++) { - uint16_t key = ra->high_low_container.keys[i]; - if (key < maxhb) { - card += container_get_cardinality(ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i]); - } else if (key == maxhb) { - card += container_rank(ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i], - range_end & 0xffff); - break; - } else { - break; - } - } +/**** +* end of roaring_uint32_iterator_t +*****/ - return card; +bool roaring_bitmap_equals(const roaring_bitmap_t *r1, +const roaring_bitmap_t *r2) { +const roaring_array_t *ra1 = &r1->high_low_container; +const roaring_array_t *ra2 = &r2->high_low_container; + +if (ra1->size != ra2->size) { +return false; +} +for (int i = 0; i < ra1->size; ++i) { +if (ra1->keys[i] != ra2->keys[i]) { +return false; +} +} +for (int i = 0; i < ra1->size; ++i) { +bool areequal = container_equals(ra1->containers[i], +ra1->typecodes[i], +ra2->containers[i], +ra2->typecodes[i]); +if (!areequal) { +return false; +} +} +return true; } +bool roaring_bitmap_is_subset(const roaring_bitmap_t *r1, +const roaring_bitmap_t *r2) { +const roaring_array_t *ra1 = &r1->high_low_container; +const roaring_array_t *ra2 = &r2->high_low_container; -bool roaring_bitmap_is_empty(const roaring_bitmap_t *ra) { - return ra->high_low_container.size == 0; -} +const int length1 = ra1->size, +length2 = ra2->size; -void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *ra, uint32_t *ans) { - ra_to_uint32_array(&ra->high_low_container, ans); -} +int pos1 = 0, pos2 = 0; -bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *ra, size_t offset, size_t limit, uint32_t *ans) { - return ra_range_uint32_array(&ra->high_low_container, offset, limit, ans); -} +while (pos1 < length1 && pos2 < length2) { +const uint16_t s1 = ra_get_key_at_index(ra1, pos1); +const uint16_t s2 = ra_get_key_at_index(ra2, pos2); -/** convert array and bitmap containers to run containers when it is more - * efficient; - * also convert from run containers when more space efficient. Returns - * true if the result has at least one run container. -*/ -bool roaring_bitmap_run_optimize(roaring_bitmap_t *r) { - bool answer = false; - for (int i = 0; i < r->high_low_container.size; i++) { - uint8_t typecode_original, typecode_after; - ra_unshare_container_at_index( - &r->high_low_container, i); // TODO: this introduces extra cloning! - void *c = ra_get_container_at_index(&r->high_low_container, i, - &typecode_original); - void *c1 = convert_run_optimize(c, typecode_original, &typecode_after); - if (typecode_after == RUN_CONTAINER_TYPE_CODE) answer = true; - ra_set_container_at_index(&r->high_low_container, i, c1, - typecode_after); - } - return answer; +if (s1 == s2) { +uint8_t type1, type2; +container_t *c1 = ra_get_container_at_index(ra1, pos1, &type1); +container_t *c2 = ra_get_container_at_index(ra2, pos2, &type2); +if (!container_is_subset(c1, type1, c2, type2)) +return false; +++pos1; +++pos2; +} else if (s1 < s2) { // s1 < s2 +return false; +} else { // s1 > s2 +pos2 = ra_advance_until(ra2, s1, pos2); } - -size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r) { - size_t answer = 0; - for (int i = 0; i < r->high_low_container.size; i++) { - uint8_t typecode_original; - void *c = ra_get_container_at_index(&r->high_low_container, i, - &typecode_original); - answer += container_shrink_to_fit(c, typecode_original); - } - answer += ra_shrink_to_fit(&r->high_low_container); - return answer; +} +if (pos1 == length1) +return true; +else +return false; } -/** - * Remove run-length encoding even when it is more space efficient - * return whether a change was applied - */ -bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r) { - bool answer = false; - for (int i = 0; i < r->high_low_container.size; i++) { - uint8_t typecode_original, typecode_after; - void *c = ra_get_container_at_index(&r->high_low_container, i, - &typecode_original); - if (get_container_type(c, typecode_original) == - RUN_CONTAINER_TYPE_CODE) { - answer = true; - if (typecode_original == SHARED_CONTAINER_TYPE_CODE) { - run_container_t *truec = - (run_container_t *)((shared_container_t *)c)->container; - int32_t card = run_container_cardinality(truec); - void *c1 = convert_to_bitset_or_array_container( - truec, card, &typecode_after); - shared_container_free((shared_container_t *)c);// will free the run container as needed - ra_set_container_at_index(&r->high_low_container, i, c1, - typecode_after); - - } else { - int32_t card = run_container_cardinality((run_container_t *)c); - void *c1 = convert_to_bitset_or_array_container( - (run_container_t *)c, card, &typecode_after); - run_container_free((run_container_t *)c); - ra_set_container_at_index(&r->high_low_container, i, c1, - typecode_after); - } - } - } - return answer; -} - -size_t roaring_bitmap_serialize(const roaring_bitmap_t *ra, char *buf) { - size_t portablesize = roaring_bitmap_portable_size_in_bytes(ra); - uint64_t cardinality = roaring_bitmap_get_cardinality(ra); - uint64_t sizeasarray = cardinality * sizeof(uint32_t) + sizeof(uint32_t); - if (portablesize < sizeasarray) { - buf[0] = SERIALIZATION_CONTAINER; - return roaring_bitmap_portable_serialize(ra, buf + 1) + 1; - } else { - buf[0] = SERIALIZATION_ARRAY_UINT32; - memcpy(buf + 1, &cardinality, sizeof(uint32_t)); - roaring_bitmap_to_uint32_array( - ra, (uint32_t *)(buf + 1 + sizeof(uint32_t))); - return 1 + (size_t)sizeasarray; - } +static void insert_flipped_container(roaring_array_t *ans_arr, +const roaring_array_t *x1_arr, uint16_t hb, +uint16_t lb_start, uint16_t lb_end) { +const int i = ra_get_index(x1_arr, hb); +const int j = ra_get_index(ans_arr, hb); +uint8_t ctype_in, ctype_out; +container_t *flipped_container = NULL; +if (i >= 0) { +container_t *container_to_flip = +ra_get_container_at_index(x1_arr, i, &ctype_in); +flipped_container = +container_not_range(container_to_flip, ctype_in, (uint32_t)lb_start, +(uint32_t)(lb_end + 1), &ctype_out); + +if (container_get_cardinality(flipped_container, ctype_out)) +ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, +ctype_out); +else { +container_free(flipped_container, ctype_out); +} +} else { +flipped_container = container_range_of_ones( +(uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out); +ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, +ctype_out); +} } -size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *ra) { - size_t portablesize = roaring_bitmap_portable_size_in_bytes(ra); - uint64_t sizeasarray = roaring_bitmap_get_cardinality(ra) * sizeof(uint32_t) + - sizeof(uint32_t); - return portablesize < sizeasarray ? portablesize + 1 : (size_t)sizeasarray + 1; +static void inplace_flip_container(roaring_array_t *x1_arr, uint16_t hb, +uint16_t lb_start, uint16_t lb_end) { +const int i = ra_get_index(x1_arr, hb); +uint8_t ctype_in, ctype_out; +container_t *flipped_container = NULL; +if (i >= 0) { +container_t *container_to_flip = +ra_get_container_at_index(x1_arr, i, &ctype_in); +flipped_container = container_inot_range( +container_to_flip, ctype_in, (uint32_t)lb_start, +(uint32_t)(lb_end + 1), &ctype_out); +// if a new container was created, the old one was already freed +if (container_get_cardinality(flipped_container, ctype_out)) { +ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out); +} else { +container_free(flipped_container, ctype_out); +ra_remove_at_index(x1_arr, i); +} + +} else { +flipped_container = container_range_of_ones( +(uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out); +ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container, +ctype_out); +} } -size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *ra) { - return ra_portable_size_in_bytes(&ra->high_low_container); +static void insert_fully_flipped_container(roaring_array_t *ans_arr, +const roaring_array_t *x1_arr, +uint16_t hb) { +const int i = ra_get_index(x1_arr, hb); +const int j = ra_get_index(ans_arr, hb); +uint8_t ctype_in, ctype_out; +container_t *flipped_container = NULL; +if (i >= 0) { +container_t *container_to_flip = +ra_get_container_at_index(x1_arr, i, &ctype_in); +flipped_container = +container_not(container_to_flip, ctype_in, &ctype_out); +if (container_get_cardinality(flipped_container, ctype_out)) +ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, +ctype_out); +else { +container_free(flipped_container, ctype_out); +} +} else { +flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out); +ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, +ctype_out); +} } +static void inplace_fully_flip_container(roaring_array_t *x1_arr, uint16_t hb) { +const int i = ra_get_index(x1_arr, hb); +uint8_t ctype_in, ctype_out; +container_t *flipped_container = NULL; +if (i >= 0) { +container_t *container_to_flip = +ra_get_container_at_index(x1_arr, i, &ctype_in); +flipped_container = +container_inot(container_to_flip, ctype_in, &ctype_out); -roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes) { - roaring_bitmap_t *ans = - (roaring_bitmap_t *)malloc(sizeof(roaring_bitmap_t)); - if (ans == NULL) { - return NULL; - } - size_t bytesread; - bool is_ok = ra_portable_deserialize(&ans->high_low_container, buf, maxbytes, &bytesread); - if(is_ok) assert(bytesread <= maxbytes); - roaring_bitmap_set_copy_on_write(ans, false); - if (!is_ok) { - free(ans); - return NULL; - } - return ans; +if (container_get_cardinality(flipped_container, ctype_out)) { +ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out); +} else { +container_free(flipped_container, ctype_out); +ra_remove_at_index(x1_arr, i); } -roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf) { - return roaring_bitmap_portable_deserialize_safe(buf, SIZE_MAX); +} else { +flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out); +ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container, +ctype_out); +} } - -size_t roaring_bitmap_portable_deserialize_size(const char *buf, size_t maxbytes) { - return ra_portable_deserialize_size(buf, maxbytes); +roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *x1, +uint64_t range_start, +uint64_t range_end) { +if (range_start >= range_end) { +return roaring_bitmap_copy(x1); +} +if(range_end >= UINT64_C(0x100000000)) { +range_end = UINT64_C(0x100000000); } +roaring_bitmap_t *ans = roaring_bitmap_create(); +roaring_bitmap_set_copy_on_write(ans, is_cow(x1)); -size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *ra, - char *buf) { - return ra_portable_serialize(&ra->high_low_container, buf); -} +uint16_t hb_start = (uint16_t)(range_start >> 16); +const uint16_t lb_start = (uint16_t)range_start; // & 0xFFFF; +uint16_t hb_end = (uint16_t)((range_end - 1) >> 16); +const uint16_t lb_end = (uint16_t)(range_end - 1); // & 0xFFFF; -roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf) { - const char *bufaschar = (const char *)buf; - if (*(const unsigned char *)buf == SERIALIZATION_ARRAY_UINT32) { - /* This looks like a compressed set of uint32_t elements */ - uint32_t card; - memcpy(&card, bufaschar + 1, sizeof(uint32_t)); - const uint32_t *elems = - (const uint32_t *)(bufaschar + 1 + sizeof(uint32_t)); - - return roaring_bitmap_of_ptr(card, elems); - } else if (bufaschar[0] == SERIALIZATION_CONTAINER) { - return roaring_bitmap_portable_deserialize(bufaschar + 1); - } else - return (NULL); -} - -bool roaring_iterate(const roaring_bitmap_t *ra, roaring_iterator iterator, - void *ptr) { - for (int i = 0; i < ra->high_low_container.size; ++i) - if (!container_iterate(ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i], - ((uint32_t)ra->high_low_container.keys[i]) << 16, - iterator, ptr)) { - return false; - } - return true; -} - -bool roaring_iterate64(const roaring_bitmap_t *ra, roaring_iterator64 iterator, - uint64_t high_bits, void *ptr) { - for (int i = 0; i < ra->high_low_container.size; ++i) - if (!container_iterate64( - ra->high_low_container.containers[i], - ra->high_low_container.typecodes[i], - ((uint32_t)ra->high_low_container.keys[i]) << 16, iterator, - high_bits, ptr)) { - return false; - } - return true; +ra_append_copies_until(&ans->high_low_container, &x1->high_low_container, +hb_start, is_cow(x1)); +if (hb_start == hb_end) { +insert_flipped_container(&ans->high_low_container, +&x1->high_low_container, hb_start, lb_start, +lb_end); +} else { +// start and end containers are distinct +if (lb_start > 0) { +// handle first (partial) container +insert_flipped_container(&ans->high_low_container, +&x1->high_low_container, hb_start, +lb_start, 0xFFFF); +++hb_start; // for the full containers. Can't wrap. } -/**** -* begin roaring_uint32_iterator_t -*****/ +if (lb_end != 0xFFFF) --hb_end; // later we'll handle the partial block -// Partially initializes the roaring iterator when it begins looking at -// a new container. -static bool iter_new_container_partial_init(roaring_uint32_iterator_t *newit) { - newit->in_container_index = 0; - newit->run_index = 0; - newit->current_value = 0; - if (newit->container_index >= newit->parent->high_low_container.size || - newit->container_index < 0) { - newit->current_value = UINT32_MAX; - return (newit->has_value = false); - } - // assume not empty - newit->has_value = true; - // we precompute container, typecode and highbits so that successive - // iterators do not have to grab them from odd memory locations - // and have to worry about the (easily predicted) container_unwrap_shared - // call. - newit->container = - newit->parent->high_low_container.containers[newit->container_index]; - newit->typecode = - newit->parent->high_low_container.typecodes[newit->container_index]; - newit->highbits = - ((uint32_t) - newit->parent->high_low_container.keys[newit->container_index]) - << 16; - newit->container = - container_unwrap_shared(newit->container, &(newit->typecode)); - return newit->has_value; +for (uint32_t hb = hb_start; hb <= hb_end; ++hb) { +insert_fully_flipped_container(&ans->high_low_container, +&x1->high_low_container, hb); } -static bool loadfirstvalue(roaring_uint32_iterator_t *newit) { - if (!iter_new_container_partial_init(newit)) - return newit->has_value; - - uint32_t wordindex; - uint64_t word; // used for bitsets - switch (newit->typecode) { - case BITSET_CONTAINER_TYPE_CODE: - wordindex = 0; - while ((word = ((const bitset_container_t *)(newit->container)) - ->array[wordindex]) == 0) - wordindex++; // advance - // here "word" is non-zero - newit->in_container_index = wordindex * 64 + __builtin_ctzll(word); - newit->current_value = newit->highbits | newit->in_container_index; - break; - case ARRAY_CONTAINER_TYPE_CODE: - newit->current_value = - newit->highbits | - ((const array_container_t *)(newit->container))->array[0]; - break; - case RUN_CONTAINER_TYPE_CODE: - newit->current_value = - newit->highbits | - (((const run_container_t *)(newit->container))->runs[0].value); - break; - default: - // if this ever happens, bug! - assert(false); - } // switch (typecode) - return true; +// handle a partial final container +if (lb_end != 0xFFFF) { +insert_flipped_container(&ans->high_low_container, +&x1->high_low_container, hb_end + 1, 0, +lb_end); +++hb_end; } - -static bool loadlastvalue(roaring_uint32_iterator_t* newit) { - if (!iter_new_container_partial_init(newit)) - return newit->has_value; - - switch(newit->typecode) { - case BITSET_CONTAINER_TYPE_CODE: { - uint32_t wordindex = BITSET_CONTAINER_SIZE_IN_WORDS - 1; - uint64_t word; - const bitset_container_t* bitset_container = (const bitset_container_t*)newit->container; - while ((word = bitset_container->array[wordindex]) == 0) - --wordindex; - - int num_leading_zeros = __builtin_clzll(word); - newit->in_container_index = (wordindex * 64) + (63 - num_leading_zeros); - newit->current_value = newit->highbits | newit->in_container_index; - break; - } - case ARRAY_CONTAINER_TYPE_CODE: { - const array_container_t* array_container = (const array_container_t*)newit->container; - newit->in_container_index = array_container->cardinality - 1; - newit->current_value = newit->highbits | array_container->array[newit->in_container_index]; - break; - } - case RUN_CONTAINER_TYPE_CODE: { - const run_container_t* run_container = (const run_container_t*)newit->container; - newit->run_index = run_container->n_runs - 1; - const rle16_t* last_run = &run_container->runs[newit->run_index]; - newit->current_value = newit->highbits | (last_run->value + last_run->length); - break; - } - default: - // if this ever happens, bug! - assert(false); - } - return true; } - -// prerequesite: the value should be in range of the container -static bool loadfirstvalue_largeorequal(roaring_uint32_iterator_t *newit, uint32_t val) { - // Don't have to check return value because of prerequisite - iter_new_container_partial_init(newit); - uint16_t lb = val & 0xFFFF; - - switch (newit->typecode) { - case BITSET_CONTAINER_TYPE_CODE: - newit->in_container_index = bitset_container_index_equalorlarger((const bitset_container_t *)(newit->container), lb); - newit->current_value = newit->highbits | newit->in_container_index; - break; - case ARRAY_CONTAINER_TYPE_CODE: - newit->in_container_index = array_container_index_equalorlarger((const array_container_t *)(newit->container), lb); - newit->current_value = - newit->highbits | - ((const array_container_t *)(newit->container))->array[newit->in_container_index]; - break; - case RUN_CONTAINER_TYPE_CODE: - newit->run_index = run_container_index_equalorlarger((const run_container_t *)(newit->container), lb); - if(((const run_container_t *)(newit->container))->runs[newit->run_index].value <= lb) { - newit->current_value = val; - } else { - newit->current_value = - newit->highbits | - (((const run_container_t *)(newit->container))->runs[newit->run_index].value); - } - break; - default: - // if this ever happens, bug! - assert(false); - } // switch (typecode) - return true; +ra_append_copies_after(&ans->high_low_container, &x1->high_low_container, +hb_end, is_cow(x1)); +return ans; } -void roaring_init_iterator(const roaring_bitmap_t *ra, - roaring_uint32_iterator_t *newit) { - newit->parent = ra; - newit->container_index = 0; - newit->has_value = loadfirstvalue(newit); +void roaring_bitmap_flip_inplace(roaring_bitmap_t *x1, uint64_t range_start, +uint64_t range_end) { +if (range_start >= range_end) { +return; // empty range } - -void roaring_init_iterator_last(const roaring_bitmap_t *ra, - roaring_uint32_iterator_t *newit) { - newit->parent = ra; - newit->container_index = newit->parent->high_low_container.size - 1; - newit->has_value = loadlastvalue(newit); +if(range_end >= UINT64_C(0x100000000)) { +range_end = UINT64_C(0x100000000); } -roaring_uint32_iterator_t *roaring_create_iterator(const roaring_bitmap_t *ra) { - roaring_uint32_iterator_t *newit = - (roaring_uint32_iterator_t *)malloc(sizeof(roaring_uint32_iterator_t)); - if (newit == NULL) return NULL; - roaring_init_iterator(ra, newit); - return newit; +uint16_t hb_start = (uint16_t)(range_start >> 16); +const uint16_t lb_start = (uint16_t)range_start; +uint16_t hb_end = (uint16_t)((range_end - 1) >> 16); +const uint16_t lb_end = (uint16_t)(range_end - 1); + +if (hb_start == hb_end) { +inplace_flip_container(&x1->high_low_container, hb_start, lb_start, +lb_end); +} else { +// start and end containers are distinct +if (lb_start > 0) { +// handle first (partial) container +inplace_flip_container(&x1->high_low_container, hb_start, lb_start, +0xFFFF); +++hb_start; // for the full containers. Can't wrap. } -roaring_uint32_iterator_t *roaring_copy_uint32_iterator( - const roaring_uint32_iterator_t *it) { - roaring_uint32_iterator_t *newit = - (roaring_uint32_iterator_t *)malloc(sizeof(roaring_uint32_iterator_t)); - memcpy(newit, it, sizeof(roaring_uint32_iterator_t)); - return newit; +if (lb_end != 0xFFFF) --hb_end; + +for (uint32_t hb = hb_start; hb <= hb_end; ++hb) { +inplace_fully_flip_container(&x1->high_low_container, hb); +} +// handle a partial final container +if (lb_end != 0xFFFF) { +inplace_flip_container(&x1->high_low_container, hb_end + 1, 0, +lb_end); +++hb_end; +} +} } -bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it, uint32_t val) { - uint16_t hb = val >> 16; - const int i = ra_get_index(& it->parent->high_low_container, hb); - if (i >= 0) { - uint32_t lowvalue = container_maximum(it->parent->high_low_container.containers[i], it->parent->high_low_container.typecodes[i]); - uint16_t lb = val & 0xFFFF; - if(lowvalue < lb ) { - it->container_index = i+1; // will have to load first value of next container - } else {// the value is necessarily within the range of the container - it->container_index = i; - it->has_value = loadfirstvalue_largeorequal(it, val); - return it->has_value; - } - } else { - // there is no matching, so we are going for the next container - it->container_index = -i-1; - } - it->has_value = loadfirstvalue(it); - return it->has_value; +static void offset_append_with_merge(roaring_array_t *ra, int k, container_t *c, uint8_t t) { +int size = ra_get_size(ra); +if (size == 0 || ra_get_key_at_index(ra, size-1) != k) { +// No merge. +ra_append(ra, k, c, t); +return; } +uint8_t last_t, new_t; +container_t *last_c, *new_c; -bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it) { - if (it->container_index >= it->parent->high_low_container.size) { - return (it->has_value = false); - } - if (it->container_index < 0) { - it->container_index = 0; - return (it->has_value = loadfirstvalue(it)); - } +// NOTE: we don't need to unwrap here, since we added last_c ourselves +// we have the certainty it's not a shared container. +// The same applies to c, as it's the result of calling container_offset. +last_c = ra_get_container_at_index(ra, size-1, &last_t); +new_c = container_ior(last_c, last_t, c, t, &new_t); - uint32_t wordindex; // used for bitsets - uint64_t word; // used for bitsets - switch (it->typecode) { - case BITSET_CONTAINER_TYPE_CODE: - it->in_container_index++; - wordindex = it->in_container_index / 64; - if (wordindex >= BITSET_CONTAINER_SIZE_IN_WORDS) break; - word = ((const bitset_container_t *)(it->container)) - ->array[wordindex] & - (UINT64_MAX << (it->in_container_index % 64)); - // next part could be optimized/simplified - while ((word == 0) && - (wordindex + 1 < BITSET_CONTAINER_SIZE_IN_WORDS)) { - wordindex++; - word = ((const bitset_container_t *)(it->container)) - ->array[wordindex]; - } - if (word != 0) { - it->in_container_index = wordindex * 64 + __builtin_ctzll(word); - it->current_value = it->highbits | it->in_container_index; - return (it->has_value = true); - } - break; - case ARRAY_CONTAINER_TYPE_CODE: - it->in_container_index++; - if (it->in_container_index < - ((const array_container_t *)(it->container))->cardinality) { - it->current_value = it->highbits | - ((const array_container_t *)(it->container)) - ->array[it->in_container_index]; - return (it->has_value = true); - } - break; - case RUN_CONTAINER_TYPE_CODE: { - if(it->current_value == UINT32_MAX) { - return (it->has_value = false); // without this, we risk an overflow to zero - } - - const run_container_t* run_container = (const run_container_t*)it->container; - if (++it->current_value <= (it->highbits | (run_container->runs[it->run_index].value + - run_container->runs[it->run_index].length))) { - return (it->has_value = true); - } - - if (++it->run_index < run_container->n_runs) { - // Assume the run has a value - it->current_value = it->highbits | run_container->runs[it->run_index].value; - return (it->has_value = true); - } - break; - } - default: - // if this ever happens, bug! - assert(false); - } // switch (typecode) - // moving to next container - it->container_index++; - return (it->has_value = loadfirstvalue(it)); +ra_set_container_at_index(ra, size-1, new_c, new_t); + +// Comparison of pointers of different origin is UB (or so claim some compiler +// makers), so we compare their bit representation only. +if ((uintptr_t)last_c != (uintptr_t)new_c) { +container_free(last_c, last_t); +} +container_free(c, t); } -bool roaring_previous_uint32_iterator(roaring_uint32_iterator_t *it) { - if (it->container_index < 0) { - return (it->has_value = false); - } - if (it->container_index >= it->parent->high_low_container.size) { - it->container_index = it->parent->high_low_container.size - 1; - return (it->has_value = loadlastvalue(it)); - } +// roaring_bitmap_add_offset adds the value 'offset' to each and every value in +// a bitmap, generating a new bitmap in the process. If offset + element is +// outside of the range [0,2^32), that the element will be dropped. +// We need "offset" to be 64 bits because we want to support values +// between -0xFFFFFFFF up to +0xFFFFFFFF. +roaring_bitmap_t *roaring_bitmap_add_offset(const roaring_bitmap_t *bm, +int64_t offset) { +roaring_bitmap_t *answer; +roaring_array_t *ans_ra; +int64_t container_offset; +uint16_t in_offset; - switch (it->typecode) { - case BITSET_CONTAINER_TYPE_CODE: { - if (--it->in_container_index < 0) - break; - - const bitset_container_t* bitset_container = (const bitset_container_t*)it->container; - int32_t wordindex = it->in_container_index / 64; - uint64_t word = bitset_container->array[wordindex] & (UINT64_MAX >> (63 - (it->in_container_index % 64))); - - while (word == 0 && --wordindex >= 0) { - word = bitset_container->array[wordindex]; - } - if (word == 0) - break; - - int num_leading_zeros = __builtin_clzll(word); - it->in_container_index = (wordindex * 64) + (63 - num_leading_zeros); - it->current_value = it->highbits | it->in_container_index; - return (it->has_value = true); - } - case ARRAY_CONTAINER_TYPE_CODE: { - if (--it->in_container_index < 0) - break; - - const array_container_t* array_container = (const array_container_t*)it->container; - it->current_value = it->highbits | array_container->array[it->in_container_index]; - return (it->has_value = true); - } - case RUN_CONTAINER_TYPE_CODE: { - if(it->current_value == 0) - return (it->has_value = false); - - const run_container_t* run_container = (const run_container_t*)it->container; - if (--it->current_value >= (it->highbits | run_container->runs[it->run_index].value)) { - return (it->has_value = true); - } - - if (--it->run_index < 0) - break; - - it->current_value = it->highbits | (run_container->runs[it->run_index].value + - run_container->runs[it->run_index].length); - return (it->has_value = true); - } - default: - // if this ever happens, bug! - assert(false); - } // switch (typecode) +const roaring_array_t *bm_ra = &bm->high_low_container; +int length = bm_ra->size; - // moving to previous container - it->container_index--; - return (it->has_value = loadlastvalue(it)); +if (offset == 0) { +return roaring_bitmap_copy(bm); } -uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it, uint32_t* buf, uint32_t count) { - uint32_t ret = 0; - uint32_t num_values; - uint32_t wordindex; // used for bitsets - uint64_t word; // used for bitsets - const array_container_t* acont; //TODO remove - const run_container_t* rcont; //TODO remove - const bitset_container_t* bcont; //TODO remove - - while (it->has_value && ret < count) { - switch (it->typecode) { - case BITSET_CONTAINER_TYPE_CODE: - bcont = (const bitset_container_t*)(it->container); - wordindex = it->in_container_index / 64; - word = bcont->array[wordindex] & (UINT64_MAX << (it->in_container_index % 64)); - do { - while (word != 0 && ret < count) { - buf[0] = it->highbits | (wordindex * 64 + __builtin_ctzll(word)); - word = word & (word - 1); - buf++; - ret++; - } - while (word == 0 && wordindex+1 < BITSET_CONTAINER_SIZE_IN_WORDS) { - wordindex++; - word = bcont->array[wordindex]; - } - } while (word != 0 && ret < count); - it->has_value = (word != 0); - if (it->has_value) { - it->in_container_index = wordindex * 64 + __builtin_ctzll(word); - it->current_value = it->highbits | it->in_container_index; - } - break; - case ARRAY_CONTAINER_TYPE_CODE: - acont = (const array_container_t *)(it->container); - num_values = minimum_uint32(acont->cardinality - it->in_container_index, count - ret); - for (uint32_t i = 0; i < num_values; i++) { - buf[i] = it->highbits | acont->array[it->in_container_index + i]; - } - buf += num_values; - ret += num_values; - it->in_container_index += num_values; - it->has_value = (it->in_container_index < acont->cardinality); - if (it->has_value) { - it->current_value = it->highbits | acont->array[it->in_container_index]; - } - break; - case RUN_CONTAINER_TYPE_CODE: - rcont = (const run_container_t*)(it->container); - //"in_run_index" name is misleading, read it as "max_value_in_current_run" - do { - uint32_t largest_run_value = it->highbits | (rcont->runs[it->run_index].value + rcont->runs[it->run_index].length); - num_values = minimum_uint32(largest_run_value - it->current_value + 1, count - ret); - for (uint32_t i = 0; i < num_values; i++) { - buf[i] = it->current_value + i; - } - it->current_value += num_values; // this can overflow to zero: UINT32_MAX+1=0 - buf += num_values; - ret += num_values; - - if (it->current_value > largest_run_value || it->current_value == 0) { - it->run_index++; - if (it->run_index < rcont->n_runs) { - it->current_value = it->highbits | rcont->runs[it->run_index].value; - } else { - it->has_value = false; - } - } - } while ((ret < count) && it->has_value); - break; - default: - assert(false); - } - if (it->has_value) { - assert(ret == count); - return ret; - } - it->container_index++; - it->has_value = loadfirstvalue(it); - } - return ret; -} +container_offset = offset >> 16; +in_offset = (uint16_t)(offset - container_offset * (1 << 16)); +answer = roaring_bitmap_create(); +roaring_bitmap_set_copy_on_write(answer, is_cow(bm)); +ans_ra = &answer->high_low_container; -void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it) { free(it); } +if (in_offset == 0) { +ans_ra = &answer->high_low_container; -/**** -* end of roaring_uint32_iterator_t -*****/ +for (int i = 0, j = 0; i < length; ++i) { +int64_t key = ra_get_key_at_index(bm_ra, i); +key += container_offset; -bool roaring_bitmap_equals(const roaring_bitmap_t *ra1, - const roaring_bitmap_t *ra2) { - if (ra1->high_low_container.size != ra2->high_low_container.size) { - return false; - } - for (int i = 0; i < ra1->high_low_container.size; ++i) { - if (ra1->high_low_container.keys[i] != - ra2->high_low_container.keys[i]) { - return false; - } - } - for (int i = 0; i < ra1->high_low_container.size; ++i) { - bool areequal = container_equals(ra1->high_low_container.containers[i], - ra1->high_low_container.typecodes[i], - ra2->high_low_container.containers[i], - ra2->high_low_container.typecodes[i]); - if (!areequal) { - return false; - } - } - return true; -} - -bool roaring_bitmap_is_subset(const roaring_bitmap_t *ra1, - const roaring_bitmap_t *ra2) { - const int length1 = ra1->high_low_container.size, - length2 = ra2->high_low_container.size; - - int pos1 = 0, pos2 = 0; - - while (pos1 < length1 && pos2 < length2) { - const uint16_t s1 = ra_get_key_at_index(&ra1->high_low_container, pos1); - const uint16_t s2 = ra_get_key_at_index(&ra2->high_low_container, pos2); - - if (s1 == s2) { - uint8_t container_type_1, container_type_2; - void *c1 = ra_get_container_at_index(&ra1->high_low_container, pos1, - &container_type_1); - void *c2 = ra_get_container_at_index(&ra2->high_low_container, pos2, - &container_type_2); - bool subset = - container_is_subset(c1, container_type_1, c2, container_type_2); - if (!subset) return false; - ++pos1; - ++pos2; - } else if (s1 < s2) { // s1 < s2 - return false; - } else { // s1 > s2 - pos2 = ra_advance_until(&ra2->high_low_container, s1, pos2); - } - } - if (pos1 == length1) - return true; - else - return false; +if (key < 0 || key >= (1 << 16)) { +continue; } -static void insert_flipped_container(roaring_array_t *ans_arr, - const roaring_array_t *x1_arr, uint16_t hb, - uint16_t lb_start, uint16_t lb_end) { - const int i = ra_get_index(x1_arr, hb); - const int j = ra_get_index(ans_arr, hb); - uint8_t ctype_in, ctype_out; - void *flipped_container = NULL; - if (i >= 0) { - void *container_to_flip = - ra_get_container_at_index(x1_arr, i, &ctype_in); - flipped_container = - container_not_range(container_to_flip, ctype_in, (uint32_t)lb_start, - (uint32_t)(lb_end + 1), &ctype_out); - - if (container_get_cardinality(flipped_container, ctype_out)) - ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, - ctype_out); - else { - container_free(flipped_container, ctype_out); - } - } else { - flipped_container = container_range_of_ones( - (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out); - ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, - ctype_out); - } +ra_append_copy(ans_ra, bm_ra, i, false); +ans_ra->keys[j++] = key; } -static void inplace_flip_container(roaring_array_t *x1_arr, uint16_t hb, - uint16_t lb_start, uint16_t lb_end) { - const int i = ra_get_index(x1_arr, hb); - uint8_t ctype_in, ctype_out; - void *flipped_container = NULL; - if (i >= 0) { - void *container_to_flip = - ra_get_container_at_index(x1_arr, i, &ctype_in); - flipped_container = container_inot_range( - container_to_flip, ctype_in, (uint32_t)lb_start, - (uint32_t)(lb_end + 1), &ctype_out); - // if a new container was created, the old one was already freed - if (container_get_cardinality(flipped_container, ctype_out)) { - ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out); - } else { - container_free(flipped_container, ctype_out); - ra_remove_at_index(x1_arr, i); - } - - } else { - flipped_container = container_range_of_ones( - (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out); - ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container, - ctype_out); - } +return answer; } -static void insert_fully_flipped_container(roaring_array_t *ans_arr, - const roaring_array_t *x1_arr, - uint16_t hb) { - const int i = ra_get_index(x1_arr, hb); - const int j = ra_get_index(ans_arr, hb); - uint8_t ctype_in, ctype_out; - void *flipped_container = NULL; - if (i >= 0) { - void *container_to_flip = - ra_get_container_at_index(x1_arr, i, &ctype_in); - flipped_container = - container_not(container_to_flip, ctype_in, &ctype_out); - if (container_get_cardinality(flipped_container, ctype_out)) - ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, - ctype_out); - else { - container_free(flipped_container, ctype_out); - } - } else { - flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out); - ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, - ctype_out); - } -} +uint8_t t; +const container_t *c; +container_t *lo, *hi, **lo_ptr, **hi_ptr; +int64_t k; -static void inplace_fully_flip_container(roaring_array_t *x1_arr, uint16_t hb) { - const int i = ra_get_index(x1_arr, hb); - uint8_t ctype_in, ctype_out; - void *flipped_container = NULL; - if (i >= 0) { - void *container_to_flip = - ra_get_container_at_index(x1_arr, i, &ctype_in); - flipped_container = - container_inot(container_to_flip, ctype_in, &ctype_out); - - if (container_get_cardinality(flipped_container, ctype_out)) { - ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out); - } else { - container_free(flipped_container, ctype_out); - ra_remove_at_index(x1_arr, i); - } - - } else { - flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out); - ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container, - ctype_out); - } +for (int i = 0; i < length; ++i) { +lo = hi = NULL; +lo_ptr = hi_ptr = NULL; + +k = ra_get_key_at_index(bm_ra, i)+container_offset; +if (k >= 0 && k < (1 << 16)) { +lo_ptr = &lo; +} +if (k+1 >= 0 && k+1 < (1 << 16)) { +hi_ptr = &hi; +} +if (lo_ptr == NULL && hi_ptr == NULL) { +continue; } -roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *x1, - uint64_t range_start, - uint64_t range_end) { - if (range_start >= range_end) { - return roaring_bitmap_copy(x1); - } - if(range_end >= UINT64_C(0x100000000)) { - range_end = UINT64_C(0x100000000); - } +c = ra_get_container_at_index(bm_ra, i, &t); +c = container_unwrap_shared(c, &t); - roaring_bitmap_t *ans = roaring_bitmap_create(); - roaring_bitmap_set_copy_on_write(ans, is_cow(x1)); - - uint16_t hb_start = (uint16_t)(range_start >> 16); - const uint16_t lb_start = (uint16_t)range_start; // & 0xFFFF; - uint16_t hb_end = (uint16_t)((range_end - 1) >> 16); - const uint16_t lb_end = (uint16_t)(range_end - 1); // & 0xFFFF; - - ra_append_copies_until(&ans->high_low_container, &x1->high_low_container, - hb_start, is_cow(x1)); - if (hb_start == hb_end) { - insert_flipped_container(&ans->high_low_container, - &x1->high_low_container, hb_start, lb_start, - lb_end); - } else { - // start and end containers are distinct - if (lb_start > 0) { - // handle first (partial) container - insert_flipped_container(&ans->high_low_container, - &x1->high_low_container, hb_start, - lb_start, 0xFFFF); - ++hb_start; // for the full containers. Can't wrap. - } - - if (lb_end != 0xFFFF) --hb_end; // later we'll handle the partial block - - for (uint32_t hb = hb_start; hb <= hb_end; ++hb) { - insert_fully_flipped_container(&ans->high_low_container, - &x1->high_low_container, hb); - } - - // handle a partial final container - if (lb_end != 0xFFFF) { - insert_flipped_container(&ans->high_low_container, - &x1->high_low_container, hb_end + 1, 0, - lb_end); - ++hb_end; - } - } - ra_append_copies_after(&ans->high_low_container, &x1->high_low_container, - hb_end, is_cow(x1)); - return ans; +container_add_offset(c, t, lo_ptr, hi_ptr, in_offset); +if (lo != NULL) { +offset_append_with_merge(ans_ra, k, lo, t); +} +if (hi != NULL) { +ra_append(ans_ra, k+1, hi, t); +} } -void roaring_bitmap_flip_inplace(roaring_bitmap_t *x1, uint64_t range_start, - uint64_t range_end) { - if (range_start >= range_end) { - return; // empty range - } - if(range_end >= UINT64_C(0x100000000)) { - range_end = UINT64_C(0x100000000); - } - - uint16_t hb_start = (uint16_t)(range_start >> 16); - const uint16_t lb_start = (uint16_t)range_start; - uint16_t hb_end = (uint16_t)((range_end - 1) >> 16); - const uint16_t lb_end = (uint16_t)(range_end - 1); - - if (hb_start == hb_end) { - inplace_flip_container(&x1->high_low_container, hb_start, lb_start, - lb_end); - } else { - // start and end containers are distinct - if (lb_start > 0) { - // handle first (partial) container - inplace_flip_container(&x1->high_low_container, hb_start, lb_start, - 0xFFFF); - ++hb_start; // for the full containers. Can't wrap. - } - - if (lb_end != 0xFFFF) --hb_end; - - for (uint32_t hb = hb_start; hb <= hb_end; ++hb) { - inplace_fully_flip_container(&x1->high_low_container, hb); - } - // handle a partial final container - if (lb_end != 0xFFFF) { - inplace_flip_container(&x1->high_low_container, hb_end + 1, 0, - lb_end); - ++hb_end; - } - } +return answer; } roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2, - const bool bitsetconversion) { - uint8_t container_result_type = 0; - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - if (0 == length1) { - return roaring_bitmap_copy(x2); - } - if (0 == length2) { - return roaring_bitmap_copy(x1); - } - roaring_bitmap_t *answer = - roaring_bitmap_create_with_capacity(length1 + length2); - roaring_bitmap_set_copy_on_write(answer, is_cow(x1) && is_cow(x2)); - int pos1 = 0, pos2 = 0; - uint8_t container_type_1, container_type_2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - void *c; - if (bitsetconversion && (get_container_type(c1, container_type_1) != - BITSET_CONTAINER_TYPE_CODE) && - (get_container_type(c2, container_type_2) != - BITSET_CONTAINER_TYPE_CODE)) { - void *newc1 = - container_mutable_unwrap_shared(c1, &container_type_1); - newc1 = container_to_bitset(newc1, container_type_1); - container_type_1 = BITSET_CONTAINER_TYPE_CODE; - c = container_lazy_ior(newc1, container_type_1, c2, - container_type_2, - &container_result_type); - if (c != newc1) { // should not happen - container_free(newc1, container_type_1); - } - } else { - c = container_lazy_or(c1, container_type_1, c2, - container_type_2, &container_result_type); - } - // since we assume that the initial containers are non-empty, - // the - // result here - // can only be non-empty - ra_append(&answer->high_low_container, s1, c, - container_result_type); - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - c1 = - get_copy_of_container(c1, &container_type_1, is_cow(x1)); - if (is_cow(x1)) { - ra_set_container_at_index(&x1->high_low_container, pos1, c1, - container_type_1); - } - ra_append(&answer->high_low_container, s1, c1, container_type_1); - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - c2 = - get_copy_of_container(c2, &container_type_2, is_cow(x2)); - if (is_cow(x2)) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - container_type_2); - } - ra_append(&answer->high_low_container, s2, c2, container_type_2); - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - if (pos1 == length1) { - ra_append_copy_range(&answer->high_low_container, - &x2->high_low_container, pos2, length2, - is_cow(x2)); - } else if (pos2 == length2) { - ra_append_copy_range(&answer->high_low_container, - &x1->high_low_container, pos1, length1, - is_cow(x1)); - } - return answer; +const roaring_bitmap_t *x2, +const bool bitsetconversion) { +uint8_t result_type = 0; +const int length1 = x1->high_low_container.size, +length2 = x2->high_low_container.size; +if (0 == length1) { +return roaring_bitmap_copy(x2); +} +if (0 == length2) { +return roaring_bitmap_copy(x1); +} +roaring_bitmap_t *answer = +roaring_bitmap_create_with_capacity(length1 + length2); +roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); +int pos1 = 0, pos2 = 0; +uint8_t type1, type2; +uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); +uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); +while (true) { +if (s1 == s2) { +container_t *c1 = ra_get_container_at_index( +&x1->high_low_container, pos1, &type1); +container_t *c2 = ra_get_container_at_index( +&x2->high_low_container, pos2, &type2); +container_t *c; +if (bitsetconversion && +(get_container_type(c1, type1) != BITSET_CONTAINER_TYPE) && +(get_container_type(c2, type2) != BITSET_CONTAINER_TYPE) +){ +container_t *newc1 = +container_mutable_unwrap_shared(c1, &type1); +newc1 = container_to_bitset(newc1, type1); +type1 = BITSET_CONTAINER_TYPE; +c = container_lazy_ior(newc1, type1, c2, type2, +&result_type); +if (c != newc1) { // should not happen +container_free(newc1, type1); +} +} else { +c = container_lazy_or(c1, type1, c2, type2, &result_type); +} +// since we assume that the initial containers are non-empty, +// the +// result here +// can only be non-empty +ra_append(&answer->high_low_container, s1, c, result_type); +++pos1; +++pos2; +if (pos1 == length1) break; +if (pos2 == length2) break; +s1 = ra_get_key_at_index(&x1->high_low_container, pos1); +s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + +} else if (s1 < s2) { // s1 < s2 +container_t *c1 = ra_get_container_at_index( +&x1->high_low_container, pos1, &type1); +c1 = get_copy_of_container(c1, &type1, is_cow(x1)); +if (is_cow(x1)) { +ra_set_container_at_index(&x1->high_low_container, pos1, c1, +type1); +} +ra_append(&answer->high_low_container, s1, c1, type1); +pos1++; +if (pos1 == length1) break; +s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + +} else { // s1 > s2 +container_t *c2 = ra_get_container_at_index( +&x2->high_low_container, pos2, &type2); +c2 = get_copy_of_container(c2, &type2, is_cow(x2)); +if (is_cow(x2)) { +ra_set_container_at_index(&x2->high_low_container, pos2, c2, +type2); +} +ra_append(&answer->high_low_container, s2, c2, type2); +pos2++; +if (pos2 == length2) break; +s2 = ra_get_key_at_index(&x2->high_low_container, pos2); +} +} +if (pos1 == length1) { +ra_append_copy_range(&answer->high_low_container, +&x2->high_low_container, pos2, length2, +is_cow(x2)); +} else if (pos2 == length2) { +ra_append_copy_range(&answer->high_low_container, +&x1->high_low_container, pos1, length1, +is_cow(x1)); +} +return answer; } void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2, - const bool bitsetconversion) { - uint8_t container_result_type = 0; - int length1 = x1->high_low_container.size; - const int length2 = x2->high_low_container.size; - - if (0 == length2) return; - - if (0 == length1) { - roaring_bitmap_overwrite(x1, x2); - return; - } - int pos1 = 0, pos2 = 0; - uint8_t container_type_1, container_type_2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - if (!container_is_full(c1, container_type_1)) { - if ((bitsetconversion == false) || - (get_container_type(c1, container_type_1) == - BITSET_CONTAINER_TYPE_CODE)) { - c1 = get_writable_copy_if_shared(c1, &container_type_1); - } else { - // convert to bitset - void *oldc1 = c1; - uint8_t oldt1 = container_type_1; - c1 = container_mutable_unwrap_shared(c1, &container_type_1); - c1 = container_to_bitset(c1, container_type_1); - container_free(oldc1, oldt1); - container_type_1 = BITSET_CONTAINER_TYPE_CODE; - } - - void *c2 = ra_get_container_at_index(&x2->high_low_container, - pos2, &container_type_2); - void *c = container_lazy_ior(c1, container_type_1, c2, - container_type_2, - &container_result_type); - if (c != - c1) { // in this instance a new container was created, and - // we need to free the old one - container_free(c1, container_type_1); - } - - ra_set_container_at_index(&x1->high_low_container, pos1, c, - container_result_type); - } - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - // void *c2_clone = container_clone(c2, container_type_2); - c2 = - get_copy_of_container(c2, &container_type_2, is_cow(x2)); - if (is_cow(x2)) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - container_type_2); - } - ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, - container_type_2); - pos1++; - length1++; - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - if (pos1 == length1) { - ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, - pos2, length2, is_cow(x2)); - } +const roaring_bitmap_t *x2, +const bool bitsetconversion) { +uint8_t result_type = 0; +int length1 = x1->high_low_container.size; +const int length2 = x2->high_low_container.size; + +if (0 == length2) return; + +if (0 == length1) { +roaring_bitmap_overwrite(x1, x2); +return; +} +int pos1 = 0, pos2 = 0; +uint8_t type1, type2; +uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); +uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); +while (true) { +if (s1 == s2) { +container_t *c1 = ra_get_container_at_index( +&x1->high_low_container, pos1, &type1); +if (!container_is_full(c1, type1)) { +if ((bitsetconversion == false) || +(get_container_type(c1, type1) == BITSET_CONTAINER_TYPE) +){ +c1 = get_writable_copy_if_shared(c1, &type1); +} else { +// convert to bitset +container_t *old_c1 = c1; +uint8_t old_type1 = type1; +c1 = container_mutable_unwrap_shared(c1, &type1); +c1 = container_to_bitset(c1, type1); +container_free(old_c1, old_type1); +type1 = BITSET_CONTAINER_TYPE; +} + +container_t *c2 = ra_get_container_at_index( +&x2->high_low_container, pos2, &type2); +container_t *c = container_lazy_ior(c1, type1, c2, type2, +&result_type); + +if (c != c1) { // in this instance a new container was created, +// and we need to free the old one +container_free(c1, type1); +} + +ra_set_container_at_index(&x1->high_low_container, pos1, c, +result_type); +} +++pos1; +++pos2; +if (pos1 == length1) break; +if (pos2 == length2) break; +s1 = ra_get_key_at_index(&x1->high_low_container, pos1); +s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + +} else if (s1 < s2) { // s1 < s2 +pos1++; +if (pos1 == length1) break; +s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + +} else { // s1 > s2 +container_t *c2 = ra_get_container_at_index( +&x2->high_low_container, pos2, &type2); +// container_t *c2_clone = container_clone(c2, type2); +c2 = get_copy_of_container(c2, &type2, is_cow(x2)); +if (is_cow(x2)) { +ra_set_container_at_index(&x2->high_low_container, pos2, c2, +type2); +} +ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, +type2); +pos1++; +length1++; +pos2++; +if (pos2 == length2) break; +s2 = ra_get_key_at_index(&x2->high_low_container, pos2); +} +} +if (pos1 == length1) { +ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, +pos2, length2, is_cow(x2)); +} } roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - uint8_t container_result_type = 0; - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - if (0 == length1) { - return roaring_bitmap_copy(x2); - } - if (0 == length2) { - return roaring_bitmap_copy(x1); - } - roaring_bitmap_t *answer = - roaring_bitmap_create_with_capacity(length1 + length2); - roaring_bitmap_set_copy_on_write(answer, is_cow(x1) && is_cow(x2)); - int pos1 = 0, pos2 = 0; - uint8_t container_type_1, container_type_2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - void *c = - container_lazy_xor(c1, container_type_1, c2, container_type_2, - &container_result_type); - - if (container_nonzero_cardinality(c, container_result_type)) { - ra_append(&answer->high_low_container, s1, c, - container_result_type); - } else { - container_free(c, container_result_type); - } - - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - c1 = - get_copy_of_container(c1, &container_type_1, is_cow(x1)); - if (is_cow(x1)) { - ra_set_container_at_index(&x1->high_low_container, pos1, c1, - container_type_1); - } - ra_append(&answer->high_low_container, s1, c1, container_type_1); - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - c2 = - get_copy_of_container(c2, &container_type_2, is_cow(x2)); - if (is_cow(x2)) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - container_type_2); - } - ra_append(&answer->high_low_container, s2, c2, container_type_2); - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - if (pos1 == length1) { - ra_append_copy_range(&answer->high_low_container, - &x2->high_low_container, pos2, length2, - is_cow(x2)); - } else if (pos2 == length2) { - ra_append_copy_range(&answer->high_low_container, - &x1->high_low_container, pos1, length1, - is_cow(x1)); - } - return answer; +const roaring_bitmap_t *x2) { +uint8_t result_type = 0; +const int length1 = x1->high_low_container.size, +length2 = x2->high_low_container.size; +if (0 == length1) { +return roaring_bitmap_copy(x2); +} +if (0 == length2) { +return roaring_bitmap_copy(x1); +} +roaring_bitmap_t *answer = +roaring_bitmap_create_with_capacity(length1 + length2); +roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2)); +int pos1 = 0, pos2 = 0; +uint8_t type1, type2; +uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); +uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); +while (true) { +if (s1 == s2) { +container_t *c1 = ra_get_container_at_index( +&x1->high_low_container, pos1, &type1); +container_t *c2 = ra_get_container_at_index( +&x2->high_low_container, pos2, &type2); +container_t *c = container_lazy_xor( +c1, type1, c2, type2, &result_type); + +if (container_nonzero_cardinality(c, result_type)) { +ra_append(&answer->high_low_container, s1, c, result_type); +} else { +container_free(c, result_type); +} + +++pos1; +++pos2; +if (pos1 == length1) break; +if (pos2 == length2) break; +s1 = ra_get_key_at_index(&x1->high_low_container, pos1); +s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + +} else if (s1 < s2) { // s1 < s2 +container_t *c1 = ra_get_container_at_index( +&x1->high_low_container, pos1, &type1); +c1 = get_copy_of_container(c1, &type1, is_cow(x1)); +if (is_cow(x1)) { +ra_set_container_at_index(&x1->high_low_container, pos1, c1, +type1); +} +ra_append(&answer->high_low_container, s1, c1, type1); +pos1++; +if (pos1 == length1) break; +s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + +} else { // s1 > s2 +container_t *c2 = ra_get_container_at_index( +&x2->high_low_container, pos2, &type2); +c2 = get_copy_of_container(c2, &type2, is_cow(x2)); +if (is_cow(x2)) { +ra_set_container_at_index(&x2->high_low_container, pos2, c2, +type2); +} +ra_append(&answer->high_low_container, s2, c2, type2); +pos2++; +if (pos2 == length2) break; +s2 = ra_get_key_at_index(&x2->high_low_container, pos2); +} +} +if (pos1 == length1) { +ra_append_copy_range(&answer->high_low_container, +&x2->high_low_container, pos2, length2, +is_cow(x2)); +} else if (pos2 == length2) { +ra_append_copy_range(&answer->high_low_container, +&x1->high_low_container, pos1, length1, +is_cow(x1)); +} +return answer; } void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - assert(x1 != x2); - uint8_t container_result_type = 0; - int length1 = x1->high_low_container.size; - const int length2 = x2->high_low_container.size; - - if (0 == length2) return; - - if (0 == length1) { - roaring_bitmap_overwrite(x1, x2); - return; - } - int pos1 = 0, pos2 = 0; - uint8_t container_type_1, container_type_2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - c1 = get_writable_copy_if_shared(c1, &container_type_1); - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - void *c = - container_lazy_ixor(c1, container_type_1, c2, container_type_2, - &container_result_type); - if (container_nonzero_cardinality(c, container_result_type)) { - ra_set_container_at_index(&x1->high_low_container, pos1, c, - container_result_type); - ++pos1; - } else { - container_free(c, container_result_type); - ra_remove_at_index(&x1->high_low_container, pos1); - --length1; - } - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - // void *c2_clone = container_clone(c2, container_type_2); - c2 = - get_copy_of_container(c2, &container_type_2, is_cow(x2)); - if (is_cow(x2)) { - ra_set_container_at_index(&x2->high_low_container, pos2, c2, - container_type_2); - } - ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, - container_type_2); - pos1++; - length1++; - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - if (pos1 == length1) { - ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, - pos2, length2, is_cow(x2)); - } +const roaring_bitmap_t *x2) { +assert(x1 != x2); +uint8_t result_type = 0; +int length1 = x1->high_low_container.size; +const int length2 = x2->high_low_container.size; + +if (0 == length2) return; + +if (0 == length1) { +roaring_bitmap_overwrite(x1, x2); +return; +} +int pos1 = 0, pos2 = 0; +uint8_t type1, type2; +uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); +uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); +while (true) { +if (s1 == s2) { +container_t *c1 = ra_get_container_at_index( +&x1->high_low_container, pos1, &type1); +container_t *c2 = ra_get_container_at_index( +&x2->high_low_container, pos2, &type2); + +// We do the computation "in place" only when c1 is not a shared container. +// Rationale: using a shared container safely with in place computation would +// require making a copy and then doing the computation in place which is likely +// less efficient than avoiding in place entirely and always generating a new +// container. + +container_t *c; +if (type1 == SHARED_CONTAINER_TYPE) { +c = container_lazy_xor(c1, type1, c2, type2, &result_type); +shared_container_free(CAST_shared(c1)); // release +} +else { +c = container_lazy_ixor(c1, type1, c2, type2, &result_type); +} + +if (container_nonzero_cardinality(c, result_type)) { +ra_set_container_at_index(&x1->high_low_container, pos1, c, +result_type); +++pos1; +} else { +container_free(c, result_type); +ra_remove_at_index(&x1->high_low_container, pos1); +--length1; +} +++pos2; +if (pos1 == length1) break; +if (pos2 == length2) break; +s1 = ra_get_key_at_index(&x1->high_low_container, pos1); +s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + +} else if (s1 < s2) { // s1 < s2 +pos1++; +if (pos1 == length1) break; +s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + +} else { // s1 > s2 +container_t *c2 = ra_get_container_at_index( +&x2->high_low_container, pos2, &type2); +// container_t *c2_clone = container_clone(c2, type2); +c2 = get_copy_of_container(c2, &type2, is_cow(x2)); +if (is_cow(x2)) { +ra_set_container_at_index(&x2->high_low_container, pos2, c2, +type2); +} +ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, +type2); +pos1++; +length1++; +pos2++; +if (pos2 == length2) break; +s2 = ra_get_key_at_index(&x2->high_low_container, pos2); +} +} +if (pos1 == length1) { +ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, +pos2, length2, is_cow(x2)); +} +} + +void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *r) { +roaring_array_t *ra = &r->high_low_container; + +for (int i = 0; i < ra->size; ++i) { +const uint8_t old_type = ra->typecodes[i]; +container_t *old_c = ra->containers[i]; +uint8_t new_type = old_type; +container_t *new_c = container_repair_after_lazy(old_c, &new_type); +ra->containers[i] = new_c; +ra->typecodes[i] = new_type; } - -void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *ra) { - for (int i = 0; i < ra->high_low_container.size; ++i) { - const uint8_t original_typecode = ra->high_low_container.typecodes[i]; - void *container = ra->high_low_container.containers[i]; - uint8_t new_typecode = original_typecode; - void *newcontainer = - container_repair_after_lazy(container, &new_typecode); - ra->high_low_container.containers[i] = newcontainer; - ra->high_low_container.typecodes[i] = new_typecode; - } } @@ -9983,23 +19287,51 @@ void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *ra) { * to x. */ uint64_t roaring_bitmap_rank(const roaring_bitmap_t *bm, uint32_t x) { - uint64_t size = 0; - uint32_t xhigh = x >> 16; - for (int i = 0; i < bm->high_low_container.size; i++) { - uint32_t key = bm->high_low_container.keys[i]; - if (xhigh > key) { - size += - container_get_cardinality(bm->high_low_container.containers[i], - bm->high_low_container.typecodes[i]); - } else if (xhigh == key) { - return size + container_rank(bm->high_low_container.containers[i], - bm->high_low_container.typecodes[i], - x & 0xFFFF); - } else { - return size; - } - } - return size; +uint64_t size = 0; +uint32_t xhigh = x >> 16; +for (int i = 0; i < bm->high_low_container.size; i++) { +uint32_t key = bm->high_low_container.keys[i]; +if (xhigh > key) { +size += +container_get_cardinality(bm->high_low_container.containers[i], +bm->high_low_container.typecodes[i]); +} else if (xhigh == key) { +return size + container_rank(bm->high_low_container.containers[i], +bm->high_low_container.typecodes[i], +x & 0xFFFF); +} else { +return size; +} +} +return size; +} + +/** + * roaring_bitmap_get_index returns the index of x, if not exsist return -1. + */ +int64_t roaring_bitmap_get_index(const roaring_bitmap_t *bm, uint32_t x) { +int64_t index = 0; +const uint16_t xhigh = x >> 16; +int32_t high_idx = ra_get_index(&bm->high_low_container, xhigh); +if (high_idx < 0) return -1; + +for (int i = 0; i < bm->high_low_container.size; i++) { +uint32_t key = bm->high_low_container.keys[i]; +if (xhigh > key) { +index += +container_get_cardinality(bm->high_low_container.containers[i], +bm->high_low_container.typecodes[i]); +} else if (xhigh == key) { +int32_t low_idx = container_get_index( +bm->high_low_container.containers[high_idx], +bm->high_low_container.typecodes[high_idx], x & 0xFFFF); +if (low_idx < 0) return -1; +return index + low_idx; +} else { +return -1; +} +} +return index; } /** @@ -10007,14 +19339,14 @@ uint64_t roaring_bitmap_rank(const roaring_bitmap_t *bm, uint32_t x) { * Returns UINT32_MAX if the set is empty. */ uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *bm) { - if (bm->high_low_container.size > 0) { - void *container = bm->high_low_container.containers[0]; - uint8_t typecode = bm->high_low_container.typecodes[0]; - uint32_t key = bm->high_low_container.keys[0]; - uint32_t lowvalue = container_minimum(container, typecode); - return lowvalue | (key << 16); - } - return UINT32_MAX; +if (bm->high_low_container.size > 0) { +container_t *c = bm->high_low_container.containers[0]; +uint8_t type = bm->high_low_container.typecodes[0]; +uint32_t key = bm->high_low_container.keys[0]; +uint32_t lowvalue = container_minimum(c, type); +return lowvalue | (key << 16); +} +return UINT32_MAX; } /** @@ -10022,132 +19354,167 @@ uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *bm) { * Returns 0 if the set is empty. */ uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *bm) { - if (bm->high_low_container.size > 0) { - void *container = - bm->high_low_container.containers[bm->high_low_container.size - 1]; - uint8_t typecode = - bm->high_low_container.typecodes[bm->high_low_container.size - 1]; - uint32_t key = - bm->high_low_container.keys[bm->high_low_container.size - 1]; - uint32_t lowvalue = container_maximum(container, typecode); - return lowvalue | (key << 16); - } - return 0; +if (bm->high_low_container.size > 0) { +container_t *container = +bm->high_low_container.containers[bm->high_low_container.size - 1]; +uint8_t typecode = +bm->high_low_container.typecodes[bm->high_low_container.size - 1]; +uint32_t key = +bm->high_low_container.keys[bm->high_low_container.size - 1]; +uint32_t lowvalue = container_maximum(container, typecode); +return lowvalue | (key << 16); +} +return 0; } bool roaring_bitmap_select(const roaring_bitmap_t *bm, uint32_t rank, - uint32_t *element) { - void *container; - uint8_t typecode; - uint16_t key; - uint32_t start_rank = 0; - int i = 0; - bool valid = false; - while (!valid && i < bm->high_low_container.size) { - container = bm->high_low_container.containers[i]; - typecode = bm->high_low_container.typecodes[i]; - valid = - container_select(container, typecode, &start_rank, rank, element); - i++; - } - - if (valid) { - key = bm->high_low_container.keys[i - 1]; - *element |= (key << 16); - return true; - } else - return false; +uint32_t *element) { +container_t *container; +uint8_t typecode; +uint16_t key; +uint32_t start_rank = 0; +int i = 0; +bool valid = false; +while (!valid && i < bm->high_low_container.size) { +container = bm->high_low_container.containers[i]; +typecode = bm->high_low_container.typecodes[i]; +valid = +container_select(container, typecode, &start_rank, rank, element); +i++; +} + +if (valid) { +key = bm->high_low_container.keys[i - 1]; +*element |= (((uint32_t)key) << 16); // w/o cast, key promotes signed +return true; +} else +return false; } bool roaring_bitmap_intersect(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - uint64_t answer = 0; - int pos1 = 0, pos2 = 0; - - while (pos1 < length1 && pos2 < length2) { - const uint16_t s1 = ra_get_key_at_index(& x1->high_low_container, pos1); - const uint16_t s2 = ra_get_key_at_index(& x2->high_low_container, pos2); - - if (s1 == s2) { - uint8_t container_type_1, container_type_2; - void *c1 = ra_get_container_at_index(& x1->high_low_container, pos1, - &container_type_1); - void *c2 = ra_get_container_at_index(& x2->high_low_container, pos2, - &container_type_2); - if( container_intersect(c1, container_type_1, c2, container_type_2) ) return true; - ++pos1; - ++pos2; - } else if (s1 < s2) { // s1 < s2 - pos1 = ra_advance_until(& x1->high_low_container, s2, pos1); - } else { // s1 > s2 - pos2 = ra_advance_until(& x2->high_low_container, s1, pos2); - } - } - return answer; +const roaring_bitmap_t *x2) { +const int length1 = x1->high_low_container.size, +length2 = x2->high_low_container.size; +uint64_t answer = 0; +int pos1 = 0, pos2 = 0; + +while (pos1 < length1 && pos2 < length2) { +const uint16_t s1 = ra_get_key_at_index(& x1->high_low_container, pos1); +const uint16_t s2 = ra_get_key_at_index(& x2->high_low_container, pos2); + +if (s1 == s2) { +uint8_t type1, type2; +container_t *c1 = ra_get_container_at_index( +&x1->high_low_container, pos1, &type1); +container_t *c2 = ra_get_container_at_index( +&x2->high_low_container, pos2, &type2); +if (container_intersect(c1, type1, c2, type2)) +return true; +++pos1; +++pos2; +} else if (s1 < s2) { // s1 < s2 +pos1 = ra_advance_until(& x1->high_low_container, s2, pos1); +} else { // s1 > s2 +pos2 = ra_advance_until(& x2->high_low_container, s1, pos2); +} +} +return answer != 0; +} + +bool roaring_bitmap_intersect_with_range(const roaring_bitmap_t *bm, +uint64_t x, uint64_t y) { +if (x >= y) { +// Empty range. +return false; +} +roaring_uint32_iterator_t it; +roaring_init_iterator(bm, &it); +if (!roaring_move_uint32_iterator_equalorlarger(&it, x)) { +// No values above x. +return false; +} +if (it.current_value >= y) { +// No values below y. +return false; +} +return true; } uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - const int length1 = x1->high_low_container.size, - length2 = x2->high_low_container.size; - uint64_t answer = 0; - int pos1 = 0, pos2 = 0; - - while (pos1 < length1 && pos2 < length2) { - const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - if (s1 == s2) { - uint8_t container_type_1, container_type_2; - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - answer += container_and_cardinality(c1, container_type_1, c2, - container_type_2); - ++pos1; - ++pos2; - } else if (s1 < s2) { // s1 < s2 - pos1 = ra_advance_until(&x1->high_low_container, s2, pos1); - } else { // s1 > s2 - pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); - } - } - return answer; +const roaring_bitmap_t *x2) { +const int length1 = x1->high_low_container.size, +length2 = x2->high_low_container.size; +uint64_t answer = 0; +int pos1 = 0, pos2 = 0; +while (pos1 < length1 && pos2 < length2) { +const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); +const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + +if (s1 == s2) { +uint8_t type1, type2; +container_t *c1 = ra_get_container_at_index( +&x1->high_low_container, pos1, &type1); +container_t *c2 = ra_get_container_at_index( +&x2->high_low_container, pos2, &type2); +answer += container_and_cardinality(c1, type1, c2, type2); +++pos1; +++pos2; +} else if (s1 < s2) { // s1 < s2 +pos1 = ra_advance_until(&x1->high_low_container, s2, pos1); +} else { // s1 > s2 +pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); +} +} +return answer; } double roaring_bitmap_jaccard_index(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - const uint64_t c1 = roaring_bitmap_get_cardinality(x1); - const uint64_t c2 = roaring_bitmap_get_cardinality(x2); - const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); - return (double)inter / (double)(c1 + c2 - inter); +const roaring_bitmap_t *x2) { +const uint64_t c1 = roaring_bitmap_get_cardinality(x1); +const uint64_t c2 = roaring_bitmap_get_cardinality(x2); +const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); +return (double)inter / (double)(c1 + c2 - inter); } uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - const uint64_t c1 = roaring_bitmap_get_cardinality(x1); - const uint64_t c2 = roaring_bitmap_get_cardinality(x2); - const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); - return c1 + c2 - inter; +const roaring_bitmap_t *x2) { +const uint64_t c1 = roaring_bitmap_get_cardinality(x1); +const uint64_t c2 = roaring_bitmap_get_cardinality(x2); +const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); +return c1 + c2 - inter; } uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - const uint64_t c1 = roaring_bitmap_get_cardinality(x1); - const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); - return c1 - inter; +const roaring_bitmap_t *x2) { +const uint64_t c1 = roaring_bitmap_get_cardinality(x1); +const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); +return c1 - inter; } uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2) { - const uint64_t c1 = roaring_bitmap_get_cardinality(x1); - const uint64_t c2 = roaring_bitmap_get_cardinality(x2); - const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); - return c1 + c2 - 2 * inter; +const roaring_bitmap_t *x2) { +const uint64_t c1 = roaring_bitmap_get_cardinality(x1); +const uint64_t c2 = roaring_bitmap_get_cardinality(x2); +const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); +return c1 + c2 - 2 * inter; +} + + +bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val) { +const uint16_t hb = val >> 16; +/* + * the next function call involves a binary search and lots of branching. + */ +int32_t i = ra_get_index(&r->high_low_container, hb); +if (i < 0) return false; + +uint8_t typecode; +// next call ought to be cheap +container_t *container = +ra_get_container_at_index(&r->high_low_container, i, &typecode); +// rest might be a tad expensive, possibly involving another round of binary search +return container_contains(container, val & 0xFFFF, typecode); } @@ -10155,54 +19522,53 @@ uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *x1, * Check whether a range of values from range_start (included) to range_end (excluded) is present */ bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end) { - if(range_end >= UINT64_C(0x100000000)) { - range_end = UINT64_C(0x100000000); - } - if (range_start >= range_end) return true; // empty range are always contained! - if (range_end - range_start == 1) return roaring_bitmap_contains(r, (uint32_t)range_start); - uint16_t hb_rs = (uint16_t)(range_start >> 16); - uint16_t hb_re = (uint16_t)((range_end - 1) >> 16); - const int32_t span = hb_re - hb_rs; - const int32_t hlc_sz = ra_get_size(&r->high_low_container); - if (hlc_sz < span + 1) { - return false; - } - int32_t is = ra_get_index(&r->high_low_container, hb_rs); - int32_t ie = ra_get_index(&r->high_low_container, hb_re); - ie = (ie < 0 ? -ie - 1 : ie); - if ((is < 0) || ((ie - is) != span)) { - return false; - } - const uint32_t lb_rs = range_start & 0xFFFF; - const uint32_t lb_re = ((range_end - 1) & 0xFFFF) + 1; - uint8_t typecode; - void *container = ra_get_container_at_index(&r->high_low_container, is, &typecode); - if (hb_rs == hb_re) { - return container_contains_range(container, lb_rs, lb_re, typecode); - } - if (!container_contains_range(container, lb_rs, 1 << 16, typecode)) { - return false; - } - assert(ie < hlc_sz); // would indicate an algorithmic bug - container = ra_get_container_at_index(&r->high_low_container, ie, &typecode); - if (!container_contains_range(container, 0, lb_re, typecode)) { - return false; - } - for (int32_t i = is + 1; i < ie; ++i) { - container = ra_get_container_at_index(&r->high_low_container, i, &typecode); - if (!container_is_full(container, typecode) ) { - return false; - } - } - return true; +if(range_end >= UINT64_C(0x100000000)) { +range_end = UINT64_C(0x100000000); +} +if (range_start >= range_end) return true; // empty range are always contained! +if (range_end - range_start == 1) return roaring_bitmap_contains(r, (uint32_t)range_start); +uint16_t hb_rs = (uint16_t)(range_start >> 16); +uint16_t hb_re = (uint16_t)((range_end - 1) >> 16); +const int32_t span = hb_re - hb_rs; +const int32_t hlc_sz = ra_get_size(&r->high_low_container); +if (hlc_sz < span + 1) { +return false; +} +int32_t is = ra_get_index(&r->high_low_container, hb_rs); +int32_t ie = ra_get_index(&r->high_low_container, hb_re); +if ((ie < 0) || (is < 0) || ((ie - is) != span) || ie >= hlc_sz) { +return false; +} +const uint32_t lb_rs = range_start & 0xFFFF; +const uint32_t lb_re = ((range_end - 1) & 0xFFFF) + 1; +uint8_t type; +container_t *c = ra_get_container_at_index(&r->high_low_container, is, +&type); +if (hb_rs == hb_re) { +return container_contains_range(c, lb_rs, lb_re, type); +} +if (!container_contains_range(c, lb_rs, 1 << 16, type)) { +return false; +} +c = ra_get_container_at_index(&r->high_low_container, ie, &type); +if (!container_contains_range(c, 0, lb_re, type)) { +return false; +} +for (int32_t i = is + 1; i < ie; ++i) { +c = ra_get_container_at_index(&r->high_low_container, i, &type); +if (!container_is_full(c, type) ) { +return false; +} +} +return true; } -bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *ra1, - const roaring_bitmap_t *ra2) { - return (roaring_bitmap_get_cardinality(ra2) > - roaring_bitmap_get_cardinality(ra1) && - roaring_bitmap_is_subset(ra1, ra2)); +bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *r1, +const roaring_bitmap_t *r2) { +return (roaring_bitmap_get_cardinality(r2) > +roaring_bitmap_get_cardinality(r1) && +roaring_bitmap_is_subset(r1, r2)); } @@ -10236,247 +19602,455 @@ bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *ra1, */ size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *rb) { - const roaring_array_t *ra = &rb->high_low_container; - size_t num_bytes = 0; - for (int32_t i = 0; i < ra->size; i++) { - switch (ra->typecodes[i]) { - case BITSET_CONTAINER_TYPE_CODE: { - num_bytes += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); - break; - } - case RUN_CONTAINER_TYPE_CODE: { - const run_container_t *run = - (const run_container_t *) ra->containers[i]; - num_bytes += run->n_runs * sizeof(rle16_t); - break; - } - case ARRAY_CONTAINER_TYPE_CODE: { - const array_container_t *array = - (const array_container_t *) ra->containers[i]; - num_bytes += array->cardinality * sizeof(uint16_t); - break; - } - default: - __builtin_unreachable(); - } - } - num_bytes += (2 + 2 + 1) * ra->size; // keys, counts, typecodes - num_bytes += 4; // header - return num_bytes; +const roaring_array_t *ra = &rb->high_low_container; +size_t num_bytes = 0; +for (int32_t i = 0; i < ra->size; i++) { +switch (ra->typecodes[i]) { +case BITSET_CONTAINER_TYPE: { +num_bytes += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); +break; +} +case RUN_CONTAINER_TYPE: { +const run_container_t *rc = const_CAST_run(ra->containers[i]); +num_bytes += rc->n_runs * sizeof(rle16_t); +break; +} +case ARRAY_CONTAINER_TYPE: { +const array_container_t *ac = +const_CAST_array(ra->containers[i]); +num_bytes += ac->cardinality * sizeof(uint16_t); +break; +} +default: +roaring_unreachable; +} +} +num_bytes += (2 + 2 + 1) * ra->size; // keys, counts, typecodes +num_bytes += 4; // header +return num_bytes; } inline static void *arena_alloc(char **arena, size_t num_bytes) { - char *res = *arena; - *arena += num_bytes; - return res; +char *res = *arena; +*arena += num_bytes; +return res; } void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *rb, char *buf) { - /* - * Note: we do not require user to supply spicificly aligned buffer. +/* + * Note: we do not require user to supply a specifically aligned buffer. * Thus we have to use memcpy() everywhere. */ - const roaring_array_t *ra = &rb->high_low_container; - - size_t bitset_zone_size = 0; - size_t run_zone_size = 0; - size_t array_zone_size = 0; - for (int32_t i = 0; i < ra->size; i++) { - switch (ra->typecodes[i]) { - case BITSET_CONTAINER_TYPE_CODE: { - bitset_zone_size += - BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); - break; - } - case RUN_CONTAINER_TYPE_CODE: { - const run_container_t *run = - (const run_container_t *) ra->containers[i]; - run_zone_size += run->n_runs * sizeof(rle16_t); - break; - } - case ARRAY_CONTAINER_TYPE_CODE: { - const array_container_t *array = - (const array_container_t *) ra->containers[i]; - array_zone_size += array->cardinality * sizeof(uint16_t); - break; - } - default: - __builtin_unreachable(); - } - } - - uint64_t *bitset_zone = (uint64_t *)arena_alloc(&buf, bitset_zone_size); - rle16_t *run_zone = (rle16_t *)arena_alloc(&buf, run_zone_size); - uint16_t *array_zone = (uint16_t *)arena_alloc(&buf, array_zone_size); - uint16_t *key_zone = (uint16_t *)arena_alloc(&buf, 2*ra->size); - uint16_t *count_zone = (uint16_t *)arena_alloc(&buf, 2*ra->size); - uint8_t *typecode_zone = (uint8_t *)arena_alloc(&buf, ra->size); - uint32_t *header_zone = (uint32_t *)arena_alloc(&buf, 4); - - for (int32_t i = 0; i < ra->size; i++) { - uint16_t count; - switch (ra->typecodes[i]) { - case BITSET_CONTAINER_TYPE_CODE: { - const bitset_container_t *bitset = - (const bitset_container_t *) ra->containers[i]; - memcpy(bitset_zone, bitset->array, - BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t)); - bitset_zone += BITSET_CONTAINER_SIZE_IN_WORDS; - if (bitset->cardinality != BITSET_UNKNOWN_CARDINALITY) { - count = bitset->cardinality - 1; - } else { - count = bitset_container_compute_cardinality(bitset) - 1; - } - break; - } - case RUN_CONTAINER_TYPE_CODE: { - const run_container_t *run = - (const run_container_t *) ra->containers[i]; - size_t num_bytes = run->n_runs * sizeof(rle16_t); - memcpy(run_zone, run->runs, num_bytes); - run_zone += run->n_runs; - count = run->n_runs; - break; - } - case ARRAY_CONTAINER_TYPE_CODE: { - const array_container_t *array = - (const array_container_t *) ra->containers[i]; - size_t num_bytes = array->cardinality * sizeof(uint16_t); - memcpy(array_zone, array->array, num_bytes); - array_zone += array->cardinality; - count = array->cardinality - 1; - break; - } - default: - __builtin_unreachable(); - } - memcpy(&count_zone[i], &count, 2); - } - memcpy(key_zone, ra->keys, ra->size * sizeof(uint16_t)); - memcpy(typecode_zone, ra->typecodes, ra->size * sizeof(uint8_t)); - uint32_t header = ((uint32_t)ra->size << 15) | FROZEN_COOKIE; - memcpy(header_zone, &header, 4); +const roaring_array_t *ra = &rb->high_low_container; + +size_t bitset_zone_size = 0; +size_t run_zone_size = 0; +size_t array_zone_size = 0; +for (int32_t i = 0; i < ra->size; i++) { +switch (ra->typecodes[i]) { +case BITSET_CONTAINER_TYPE: { +bitset_zone_size += +BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); +break; +} +case RUN_CONTAINER_TYPE: { +const run_container_t *rc = const_CAST_run(ra->containers[i]); +run_zone_size += rc->n_runs * sizeof(rle16_t); +break; +} +case ARRAY_CONTAINER_TYPE: { +const array_container_t *ac = +const_CAST_array(ra->containers[i]); +array_zone_size += ac->cardinality * sizeof(uint16_t); +break; +} +default: +roaring_unreachable; +} +} + +uint64_t *bitset_zone = (uint64_t *)arena_alloc(&buf, bitset_zone_size); +rle16_t *run_zone = (rle16_t *)arena_alloc(&buf, run_zone_size); +uint16_t *array_zone = (uint16_t *)arena_alloc(&buf, array_zone_size); +uint16_t *key_zone = (uint16_t *)arena_alloc(&buf, 2*ra->size); +uint16_t *count_zone = (uint16_t *)arena_alloc(&buf, 2*ra->size); +uint8_t *typecode_zone = (uint8_t *)arena_alloc(&buf, ra->size); +uint32_t *header_zone = (uint32_t *)arena_alloc(&buf, 4); + +for (int32_t i = 0; i < ra->size; i++) { +uint16_t count; +switch (ra->typecodes[i]) { +case BITSET_CONTAINER_TYPE: { +const bitset_container_t *bc = +const_CAST_bitset(ra->containers[i]); +memcpy(bitset_zone, bc->words, +BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t)); +bitset_zone += BITSET_CONTAINER_SIZE_IN_WORDS; +if (bc->cardinality != BITSET_UNKNOWN_CARDINALITY) { +count = bc->cardinality - 1; +} else { +count = bitset_container_compute_cardinality(bc) - 1; +} +break; +} +case RUN_CONTAINER_TYPE: { +const run_container_t *rc = const_CAST_run(ra->containers[i]); +size_t num_bytes = rc->n_runs * sizeof(rle16_t); +memcpy(run_zone, rc->runs, num_bytes); +run_zone += rc->n_runs; +count = rc->n_runs; +break; +} +case ARRAY_CONTAINER_TYPE: { +const array_container_t *ac = +const_CAST_array(ra->containers[i]); +size_t num_bytes = ac->cardinality * sizeof(uint16_t); +memcpy(array_zone, ac->array, num_bytes); +array_zone += ac->cardinality; +count = ac->cardinality - 1; +break; +} +default: +roaring_unreachable; +} +memcpy(&count_zone[i], &count, 2); +} +memcpy(key_zone, ra->keys, ra->size * sizeof(uint16_t)); +memcpy(typecode_zone, ra->typecodes, ra->size * sizeof(uint8_t)); +uint32_t header = ((uint32_t)ra->size << 15) | FROZEN_COOKIE; +memcpy(header_zone, &header, 4); } const roaring_bitmap_t * roaring_bitmap_frozen_view(const char *buf, size_t length) { - if ((uintptr_t)buf % 32 != 0) { - return NULL; - } - - // cookie and num_containers - if (length < 4) { - return NULL; - } - uint32_t header; - memcpy(&header, buf + length - 4, 4); // header may be misaligned - if ((header & 0x7FFF) != FROZEN_COOKIE) { - return NULL; - } - int32_t num_containers = (header >> 15); - - // typecodes, counts and keys - if (length < 4 + (size_t)num_containers * (1 + 2 + 2)) { - return NULL; - } - uint16_t *keys = (uint16_t *)(buf + length - 4 - num_containers * 5); - uint16_t *counts = (uint16_t *)(buf + length - 4 - num_containers * 3); - uint8_t *typecodes = (uint8_t *)(buf + length - 4 - num_containers * 1); - - // {bitset,array,run}_zone - int32_t num_bitset_containers = 0; - int32_t num_run_containers = 0; - int32_t num_array_containers = 0; - size_t bitset_zone_size = 0; - size_t run_zone_size = 0; - size_t array_zone_size = 0; - for (int32_t i = 0; i < num_containers; i++) { - switch (typecodes[i]) { - case BITSET_CONTAINER_TYPE_CODE: - num_bitset_containers++; - bitset_zone_size += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); - break; - case RUN_CONTAINER_TYPE_CODE: - num_run_containers++; - run_zone_size += counts[i] * sizeof(rle16_t); - break; - case ARRAY_CONTAINER_TYPE_CODE: - num_array_containers++; - array_zone_size += (counts[i] + UINT32_C(1)) * sizeof(uint16_t); - break; - default: - return NULL; - } - } - if (length != bitset_zone_size + run_zone_size + array_zone_size + - 5 * num_containers + 4) { - return NULL; - } - uint64_t *bitset_zone = (uint64_t*) (buf); - rle16_t *run_zone = (rle16_t*) (buf + bitset_zone_size); - uint16_t *array_zone = (uint16_t*) (buf + bitset_zone_size + run_zone_size); - - size_t alloc_size = 0; - alloc_size += sizeof(roaring_bitmap_t); - alloc_size += num_containers * sizeof(void *); - alloc_size += num_bitset_containers * sizeof(bitset_container_t); - alloc_size += num_run_containers * sizeof(run_container_t); - alloc_size += num_array_containers * sizeof(array_container_t); - - char *arena = (char *)malloc(alloc_size); - if (arena == NULL) { - return NULL; - } - - roaring_bitmap_t *rb = (roaring_bitmap_t *) - arena_alloc(&arena, sizeof(roaring_bitmap_t)); - rb->high_low_container.flags = ROARING_FLAG_FROZEN; - rb->high_low_container.allocation_size = num_containers; - rb->high_low_container.size = num_containers; - rb->high_low_container.keys = (uint16_t *)keys; - rb->high_low_container.typecodes = (uint8_t *)typecodes; - rb->high_low_container.containers = - (void **)arena_alloc(&arena, sizeof(void*) * num_containers); - for (int32_t i = 0; i < num_containers; i++) { - switch (typecodes[i]) { - case BITSET_CONTAINER_TYPE_CODE: { - bitset_container_t *bitset = (bitset_container_t *) - arena_alloc(&arena, sizeof(bitset_container_t)); - bitset->array = bitset_zone; - bitset->cardinality = counts[i] + UINT32_C(1); - rb->high_low_container.containers[i] = bitset; - bitset_zone += BITSET_CONTAINER_SIZE_IN_WORDS; - break; - } - case RUN_CONTAINER_TYPE_CODE: { - run_container_t *run = (run_container_t *) - arena_alloc(&arena, sizeof(run_container_t)); - run->capacity = counts[i]; - run->n_runs = counts[i]; - run->runs = run_zone; - rb->high_low_container.containers[i] = run; - run_zone += run->n_runs; - break; - } - case ARRAY_CONTAINER_TYPE_CODE: { - array_container_t *array = (array_container_t *) - arena_alloc(&arena, sizeof(array_container_t)); - array->capacity = counts[i] + UINT32_C(1); - array->cardinality = counts[i] + UINT32_C(1); - array->array = array_zone; - rb->high_low_container.containers[i] = array; - array_zone += counts[i] + UINT32_C(1); - break; - } - default: - free(arena); - return NULL; - } - } - - return rb; -} +if ((uintptr_t)buf % 32 != 0) { +return NULL; +} + +// cookie and num_containers +if (length < 4) { +return NULL; +} +uint32_t header; +memcpy(&header, buf + length - 4, 4); // header may be misaligned +if ((header & 0x7FFF) != FROZEN_COOKIE) { +return NULL; +} +int32_t num_containers = (header >> 15); + +// typecodes, counts and keys +if (length < 4 + (size_t)num_containers * (1 + 2 + 2)) { +return NULL; +} +uint16_t *keys = (uint16_t *)(buf + length - 4 - num_containers * 5); +uint16_t *counts = (uint16_t *)(buf + length - 4 - num_containers * 3); +uint8_t *typecodes = (uint8_t *)(buf + length - 4 - num_containers * 1); + +// {bitset,array,run}_zone +int32_t num_bitset_containers = 0; +int32_t num_run_containers = 0; +int32_t num_array_containers = 0; +size_t bitset_zone_size = 0; +size_t run_zone_size = 0; +size_t array_zone_size = 0; +for (int32_t i = 0; i < num_containers; i++) { +switch (typecodes[i]) { +case BITSET_CONTAINER_TYPE: +num_bitset_containers++; +bitset_zone_size += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); +break; +case RUN_CONTAINER_TYPE: +num_run_containers++; +run_zone_size += counts[i] * sizeof(rle16_t); +break; +case ARRAY_CONTAINER_TYPE: +num_array_containers++; +array_zone_size += (counts[i] + UINT32_C(1)) * sizeof(uint16_t); +break; +default: +return NULL; +} +} +if (length != bitset_zone_size + run_zone_size + array_zone_size + +5 * num_containers + 4) { +return NULL; +} +uint64_t *bitset_zone = (uint64_t*) (buf); +rle16_t *run_zone = (rle16_t*) (buf + bitset_zone_size); +uint16_t *array_zone = (uint16_t*) (buf + bitset_zone_size + run_zone_size); + +size_t alloc_size = 0; +alloc_size += sizeof(roaring_bitmap_t); +alloc_size += num_containers * sizeof(container_t*); +alloc_size += num_bitset_containers * sizeof(bitset_container_t); +alloc_size += num_run_containers * sizeof(run_container_t); +alloc_size += num_array_containers * sizeof(array_container_t); + +char *arena = (char *)roaring_malloc(alloc_size); +if (arena == NULL) { +return NULL; +} + +roaring_bitmap_t *rb = (roaring_bitmap_t *) +arena_alloc(&arena, sizeof(roaring_bitmap_t)); +rb->high_low_container.flags = ROARING_FLAG_FROZEN; +rb->high_low_container.allocation_size = num_containers; +rb->high_low_container.size = num_containers; +rb->high_low_container.keys = (uint16_t *)keys; +rb->high_low_container.typecodes = (uint8_t *)typecodes; +rb->high_low_container.containers = +(container_t **)arena_alloc(&arena, +sizeof(container_t*) * num_containers); +// Ensure offset of high_low_container.containers is known distance used in +// C++ wrapper. sizeof(roaring_bitmap_t) is used as it is the size of the +// only allocation that precedes high_low_container.containers. If this is +// changed (new allocation or changed order), this offset will also need to +// be changed in the C++ wrapper. +assert(rb == +(roaring_bitmap_t *)((char *)rb->high_low_container.containers - +sizeof(roaring_bitmap_t))); +for (int32_t i = 0; i < num_containers; i++) { +switch (typecodes[i]) { +case BITSET_CONTAINER_TYPE: { +bitset_container_t *bitset = (bitset_container_t *) +arena_alloc(&arena, sizeof(bitset_container_t)); +bitset->words = bitset_zone; +bitset->cardinality = counts[i] + UINT32_C(1); +rb->high_low_container.containers[i] = bitset; +bitset_zone += BITSET_CONTAINER_SIZE_IN_WORDS; +break; +} +case RUN_CONTAINER_TYPE: { +run_container_t *run = (run_container_t *) +arena_alloc(&arena, sizeof(run_container_t)); +run->capacity = counts[i]; +run->n_runs = counts[i]; +run->runs = run_zone; +rb->high_low_container.containers[i] = run; +run_zone += run->n_runs; +break; +} +case ARRAY_CONTAINER_TYPE: { +array_container_t *array = (array_container_t *) +arena_alloc(&arena, sizeof(array_container_t)); +array->capacity = counts[i] + UINT32_C(1); +array->cardinality = counts[i] + UINT32_C(1); +array->array = array_zone; +rb->high_low_container.containers[i] = array; +array_zone += counts[i] + UINT32_C(1); +break; +} +default: +roaring_free(arena); +return NULL; +} +} + +return rb; +} + +ALLOW_UNALIGNED +roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf) { +char *start_of_buf = (char *) buf; +uint32_t cookie; +int32_t num_containers; +uint16_t *descriptive_headers; +uint32_t *offset_headers = NULL; +const char *run_flag_bitset = NULL; +bool hasrun = false; + +// deserialize cookie +memcpy(&cookie, buf, sizeof(uint32_t)); +buf += sizeof(uint32_t); +if (cookie == SERIAL_COOKIE_NO_RUNCONTAINER) { +memcpy(&num_containers, buf, sizeof(int32_t)); +buf += sizeof(int32_t); +descriptive_headers = (uint16_t *) buf; +buf += num_containers * 2 * sizeof(uint16_t); +offset_headers = (uint32_t *) buf; +buf += num_containers * sizeof(uint32_t); +} else if ((cookie & 0xFFFF) == SERIAL_COOKIE) { +num_containers = (cookie >> 16) + 1; +hasrun = true; +int32_t run_flag_bitset_size = (num_containers + 7) / 8; +run_flag_bitset = buf; +buf += run_flag_bitset_size; +descriptive_headers = (uint16_t *) buf; +buf += num_containers * 2 * sizeof(uint16_t); +if(num_containers >= NO_OFFSET_THRESHOLD) { +offset_headers = (uint32_t *) buf; +buf += num_containers * sizeof(uint32_t); +} +} else { +return NULL; +} + +// calculate total size for allocation +int32_t num_bitset_containers = 0; +int32_t num_run_containers = 0; +int32_t num_array_containers = 0; + +for (int32_t i = 0; i < num_containers; i++) { +uint16_t tmp; +memcpy(&tmp, descriptive_headers + 2*i+1, sizeof(tmp)); +uint32_t cardinality = tmp + 1; +bool isbitmap = (cardinality > DEFAULT_MAX_SIZE); +bool isrun = false; +if(hasrun) { +if((run_flag_bitset[i / 8] & (1 << (i % 8))) != 0) { +isbitmap = false; +isrun = true; +} +} + +if (isbitmap) { +num_bitset_containers++; +} else if (isrun) { +num_run_containers++; +} else { +num_array_containers++; +} +} + +size_t alloc_size = 0; +alloc_size += sizeof(roaring_bitmap_t); +alloc_size += num_containers * sizeof(container_t*); +alloc_size += num_bitset_containers * sizeof(bitset_container_t); +alloc_size += num_run_containers * sizeof(run_container_t); +alloc_size += num_array_containers * sizeof(array_container_t); +alloc_size += num_containers * sizeof(uint16_t); // keys +alloc_size += num_containers * sizeof(uint8_t); // typecodes + +// allocate bitmap and construct containers +char *arena = (char *)roaring_malloc(alloc_size); +if (arena == NULL) { +return NULL; +} + +roaring_bitmap_t *rb = (roaring_bitmap_t *) +arena_alloc(&arena, sizeof(roaring_bitmap_t)); +rb->high_low_container.flags = ROARING_FLAG_FROZEN; +rb->high_low_container.allocation_size = num_containers; +rb->high_low_container.size = num_containers; +rb->high_low_container.containers = +(container_t **)arena_alloc(&arena, +sizeof(container_t*) * num_containers); + +uint16_t *keys = (uint16_t *)arena_alloc(&arena, num_containers * sizeof(uint16_t)); +uint8_t *typecodes = (uint8_t *)arena_alloc(&arena, num_containers * sizeof(uint8_t)); + +rb->high_low_container.keys = keys; +rb->high_low_container.typecodes = typecodes; + +for (int32_t i = 0; i < num_containers; i++) { +uint16_t tmp; +memcpy(&tmp, descriptive_headers + 2*i+1, sizeof(tmp)); +int32_t cardinality = tmp + 1; +bool isbitmap = (cardinality > DEFAULT_MAX_SIZE); +bool isrun = false; +if(hasrun) { +if((run_flag_bitset[i / 8] & (1 << (i % 8))) != 0) { +isbitmap = false; +isrun = true; +} +} + +keys[i] = descriptive_headers[2*i]; + +if (isbitmap) { +typecodes[i] = BITSET_CONTAINER_TYPE; +bitset_container_t *c = (bitset_container_t *)arena_alloc(&arena, sizeof(bitset_container_t)); +c->cardinality = cardinality; +if(offset_headers != NULL) { +c->words = (uint64_t *) (start_of_buf + offset_headers[i]); +} else { +c->words = (uint64_t *) buf; +buf += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); +} +rb->high_low_container.containers[i] = c; +} else if (isrun) { +typecodes[i] = RUN_CONTAINER_TYPE; +run_container_t *c = (run_container_t *)arena_alloc(&arena, sizeof(run_container_t)); +c->capacity = cardinality; +uint16_t n_runs; +if(offset_headers != NULL) { +memcpy(&n_runs, start_of_buf + offset_headers[i], sizeof(uint16_t)); +c->n_runs = n_runs; +c->runs = (rle16_t *) (start_of_buf + offset_headers[i] + sizeof(uint16_t)); +} else { +memcpy(&n_runs, buf, sizeof(uint16_t)); +c->n_runs = n_runs; +buf += sizeof(uint16_t); +c->runs = (rle16_t *) buf; +buf += c->n_runs * sizeof(rle16_t); +} +rb->high_low_container.containers[i] = c; +} else { +typecodes[i] = ARRAY_CONTAINER_TYPE; +array_container_t *c = (array_container_t *)arena_alloc(&arena, sizeof(array_container_t)); +c->cardinality = cardinality; +c->capacity = cardinality; +if(offset_headers != NULL) { +c->array = (uint16_t *) (start_of_buf + offset_headers[i]); +} else { +c->array = (uint16_t *) buf; +buf += cardinality * sizeof(uint16_t); +} +rb->high_low_container.containers[i] = c; +} +} + +return rb; +} + +bool roaring_bitmap_to_bitset(const roaring_bitmap_t *r, bitset_t * bitset) { +uint32_t max_value = roaring_bitmap_maximum(r); +size_t new_array_size = (size_t)(((uint64_t)max_value + 63)/64); +bool resize_ok = bitset_resize(bitset, new_array_size, true); +if(!resize_ok) { return false; } +const roaring_array_t *ra = &r->high_low_container; +for (int i = 0; i < ra->size; ++i) { +uint64_t* words = bitset->array + (ra->keys[i]<<10); +uint8_t type = ra->typecodes[i]; +const container_t *c = ra->containers[i]; +if(type == SHARED_CONTAINER_TYPE) { +c = container_unwrap_shared(c, &type); +} +switch (type) { +case BITSET_CONTAINER_TYPE: +{ +size_t max_word_index = new_array_size - (ra->keys[i]<<10); +if(max_word_index > 1024) { max_word_index = 1024; } +const bitset_container_t *src = const_CAST_bitset(c); +memcpy(words, src->words, max_word_index * sizeof(uint64_t)); +} +break; +case ARRAY_CONTAINER_TYPE: +{ +const array_container_t *src = const_CAST_array(c); +bitset_set_list(words, src->array, src->cardinality); +} +break; +case RUN_CONTAINER_TYPE: +{ +const run_container_t *src = const_CAST_run(c); +for (int32_t rlepos = 0; rlepos < src->n_runs; ++rlepos) { +rle16_t rle = src->runs[rlepos]; +bitset_set_lenrange(words, rle.value, rle.length); +} +} +break; +default: +roaring_unreachable; +} +} +return true; +} + +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { +#endif /* end file src/roaring.c */ /* begin file src/roaring_array.c */ #include @@ -10487,394 +20061,351 @@ roaring_bitmap_frozen_view(const char *buf, size_t length) { #include +#ifdef __cplusplus +extern "C" { namespace roaring { namespace internal { +#endif + // Convention: [0,ra->size) all elements are initialized // [ra->size, ra->allocation_size) is junk and contains nothing needing freeing extern inline int32_t ra_get_size(const roaring_array_t *ra); extern inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x); -extern inline void *ra_get_container_at_index(const roaring_array_t *ra, - uint16_t i, uint8_t *typecode); + +extern inline container_t *ra_get_container_at_index( +const roaring_array_t *ra, uint16_t i, +uint8_t *typecode); + extern inline void ra_unshare_container_at_index(roaring_array_t *ra, - uint16_t i); -extern inline void ra_replace_key_and_container_at_index(roaring_array_t *ra, - int32_t i, - uint16_t key, void *c, - uint8_t typecode); -extern inline void ra_set_container_at_index(const roaring_array_t *ra, - int32_t i, void *c, - uint8_t typecode); +uint16_t i); -static bool realloc_array(roaring_array_t *ra, int32_t new_capacity) { - // because we combine the allocations, it is not possible to use realloc - /*ra->keys = - (uint16_t *)realloc(ra->keys, sizeof(uint16_t) * new_capacity); -ra->containers = - (void **)realloc(ra->containers, sizeof(void *) * new_capacity); -ra->typecodes = - (uint8_t *)realloc(ra->typecodes, sizeof(uint8_t) * new_capacity); -if (!ra->keys || !ra->containers || !ra->typecodes) { - free(ra->keys); - free(ra->containers); - free(ra->typecodes); - return false; -}*/ +extern inline void ra_replace_key_and_container_at_index( +roaring_array_t *ra, int32_t i, uint16_t key, +container_t *c, uint8_t typecode); - if ( new_capacity == 0 ) { - free(ra->containers); - ra->containers = NULL; - ra->keys = NULL; - ra->typecodes = NULL; - ra->allocation_size = 0; - return true; - } - const size_t memoryneeded = - new_capacity * (sizeof(uint16_t) + sizeof(void *) + sizeof(uint8_t)); - void *bigalloc = malloc(memoryneeded); - if (!bigalloc) return false; - void *oldbigalloc = ra->containers; - void **newcontainers = (void **)bigalloc; - uint16_t *newkeys = (uint16_t *)(newcontainers + new_capacity); - uint8_t *newtypecodes = (uint8_t *)(newkeys + new_capacity); - assert((char *)(newtypecodes + new_capacity) == - (char *)bigalloc + memoryneeded); - if(ra->size > 0) { - memcpy(newcontainers, ra->containers, sizeof(void *) * ra->size); - memcpy(newkeys, ra->keys, sizeof(uint16_t) * ra->size); - memcpy(newtypecodes, ra->typecodes, sizeof(uint8_t) * ra->size); - } - ra->containers = newcontainers; - ra->keys = newkeys; - ra->typecodes = newtypecodes; - ra->allocation_size = new_capacity; - free(oldbigalloc); - return true; +extern inline void ra_set_container_at_index( +const roaring_array_t *ra, int32_t i, +container_t *c, uint8_t typecode); + +static bool realloc_array(roaring_array_t *ra, int32_t new_capacity) { +// +// Note: not implemented using C's realloc(), because the memory layout is +// Struct-of-Arrays vs. Array-of-Structs: +// https://github.com/RoaringBitmap/CRoaring/issues/256 + +if ( new_capacity == 0 ) { +roaring_free(ra->containers); +ra->containers = NULL; +ra->keys = NULL; +ra->typecodes = NULL; +ra->allocation_size = 0; +return true; +} +const size_t memoryneeded = new_capacity * ( +sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t)); +void *bigalloc = roaring_malloc(memoryneeded); +if (!bigalloc) return false; +void *oldbigalloc = ra->containers; +container_t **newcontainers = (container_t **)bigalloc; +uint16_t *newkeys = (uint16_t *)(newcontainers + new_capacity); +uint8_t *newtypecodes = (uint8_t *)(newkeys + new_capacity); +assert((char *)(newtypecodes + new_capacity) == +(char *)bigalloc + memoryneeded); +if(ra->size > 0) { +memcpy(newcontainers, ra->containers, sizeof(container_t *) * ra->size); +memcpy(newkeys, ra->keys, sizeof(uint16_t) * ra->size); +memcpy(newtypecodes, ra->typecodes, sizeof(uint8_t) * ra->size); +} +ra->containers = newcontainers; +ra->keys = newkeys; +ra->typecodes = newtypecodes; +ra->allocation_size = new_capacity; +roaring_free(oldbigalloc); +return true; } bool ra_init_with_capacity(roaring_array_t *new_ra, uint32_t cap) { - if (!new_ra) return false; - ra_init(new_ra); - - if (cap > INT32_MAX) { return false; } - - if(cap > 0) { - void *bigalloc = - malloc(cap * (sizeof(uint16_t) + sizeof(void *) + sizeof(uint8_t))); - if( bigalloc == NULL ) return false; - new_ra->containers = (void **)bigalloc; - new_ra->keys = (uint16_t *)(new_ra->containers + cap); - new_ra->typecodes = (uint8_t *)(new_ra->keys + cap); - // Narrowing is safe because of above check - new_ra->allocation_size = (int32_t)cap; - } - return true; +if (!new_ra) return false; +ra_init(new_ra); + +// Containers hold 64Ki elements, so 64Ki containers is enough to hold `0x10000 * 0x10000` (all 2^32) elements +if (cap > 0x10000) { +cap = 0x10000; +} + +if(cap > 0) { +void *bigalloc = roaring_malloc(cap * +(sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t))); +if( bigalloc == NULL ) return false; +new_ra->containers = (container_t **)bigalloc; +new_ra->keys = (uint16_t *)(new_ra->containers + cap); +new_ra->typecodes = (uint8_t *)(new_ra->keys + cap); +// Narrowing is safe because of above check +new_ra->allocation_size = (int32_t)cap; +} +return true; } int ra_shrink_to_fit(roaring_array_t *ra) { - int savings = (ra->allocation_size - ra->size) * - (sizeof(uint16_t) + sizeof(void *) + sizeof(uint8_t)); - if (!realloc_array(ra, ra->size)) { - return 0; - } - ra->allocation_size = ra->size; - return savings; +int savings = (ra->allocation_size - ra->size) * +(sizeof(uint16_t) + sizeof(container_t *) + sizeof(uint8_t)); +if (!realloc_array(ra, ra->size)) { +return 0; +} +ra->allocation_size = ra->size; +return savings; } void ra_init(roaring_array_t *new_ra) { - if (!new_ra) { return; } - new_ra->keys = NULL; - new_ra->containers = NULL; - new_ra->typecodes = NULL; - - new_ra->allocation_size = 0; - new_ra->size = 0; - new_ra->flags = 0; -} +if (!new_ra) { return; } +new_ra->keys = NULL; +new_ra->containers = NULL; +new_ra->typecodes = NULL; -bool ra_copy(const roaring_array_t *source, roaring_array_t *dest, - bool copy_on_write) { - if (!ra_init_with_capacity(dest, source->size)) return false; - dest->size = source->size; - dest->allocation_size = source->size; - if(dest->size > 0) { - memcpy(dest->keys, source->keys, dest->size * sizeof(uint16_t)); - } - // we go through the containers, turning them into shared containers... - if (copy_on_write) { - for (int32_t i = 0; i < dest->size; ++i) { - source->containers[i] = get_copy_of_container( - source->containers[i], &source->typecodes[i], copy_on_write); - } - // we do a shallow copy to the other bitmap - if(dest->size > 0) { - memcpy(dest->containers, source->containers, - dest->size * sizeof(void *)); - memcpy(dest->typecodes, source->typecodes, - dest->size * sizeof(uint8_t)); - } - } else { - if(dest->size > 0) { - memcpy(dest->typecodes, source->typecodes, - dest->size * sizeof(uint8_t)); - } - for (int32_t i = 0; i < dest->size; i++) { - dest->containers[i] = - container_clone(source->containers[i], source->typecodes[i]); - if (dest->containers[i] == NULL) { - for (int32_t j = 0; j < i; j++) { - container_free(dest->containers[j], dest->typecodes[j]); - } - ra_clear_without_containers(dest); - return false; - } - } - } - return true; +new_ra->allocation_size = 0; +new_ra->size = 0; +new_ra->flags = 0; } bool ra_overwrite(const roaring_array_t *source, roaring_array_t *dest, - bool copy_on_write) { - ra_clear_containers(dest); // we are going to overwrite them - if (dest->allocation_size < source->size) { - if (!realloc_array(dest, source->size)) { - return false; - } - } - dest->size = source->size; - memcpy(dest->keys, source->keys, dest->size * sizeof(uint16_t)); - // we go through the containers, turning them into shared containers... - if (copy_on_write) { - for (int32_t i = 0; i < dest->size; ++i) { - source->containers[i] = get_copy_of_container( - source->containers[i], &source->typecodes[i], copy_on_write); - } - // we do a shallow copy to the other bitmap - memcpy(dest->containers, source->containers, - dest->size * sizeof(void *)); - memcpy(dest->typecodes, source->typecodes, - dest->size * sizeof(uint8_t)); - } else { - memcpy(dest->typecodes, source->typecodes, - dest->size * sizeof(uint8_t)); - for (int32_t i = 0; i < dest->size; i++) { - dest->containers[i] = - container_clone(source->containers[i], source->typecodes[i]); - if (dest->containers[i] == NULL) { - for (int32_t j = 0; j < i; j++) { - container_free(dest->containers[j], dest->typecodes[j]); - } - ra_clear_without_containers(dest); - return false; - } - } - } - return true; +bool copy_on_write) { +ra_clear_containers(dest); // we are going to overwrite them +if (source->size == 0) { // Note: can't call memcpy(NULL), even w/size +dest->size = 0; // <--- This is important. +return true; // output was just cleared, so they match +} +if (dest->allocation_size < source->size) { +if (!realloc_array(dest, source->size)) { +return false; +} +} +dest->size = source->size; +memcpy(dest->keys, source->keys, dest->size * sizeof(uint16_t)); +// we go through the containers, turning them into shared containers... +if (copy_on_write) { +for (int32_t i = 0; i < dest->size; ++i) { +source->containers[i] = get_copy_of_container( +source->containers[i], &source->typecodes[i], copy_on_write); +} +// we do a shallow copy to the other bitmap +memcpy(dest->containers, source->containers, +dest->size * sizeof(container_t *)); +memcpy(dest->typecodes, source->typecodes, +dest->size * sizeof(uint8_t)); +} else { +memcpy(dest->typecodes, source->typecodes, +dest->size * sizeof(uint8_t)); +for (int32_t i = 0; i < dest->size; i++) { +dest->containers[i] = +container_clone(source->containers[i], source->typecodes[i]); +if (dest->containers[i] == NULL) { +for (int32_t j = 0; j < i; j++) { +container_free(dest->containers[j], dest->typecodes[j]); +} +ra_clear_without_containers(dest); +return false; +} +} +} +return true; } void ra_clear_containers(roaring_array_t *ra) { - for (int32_t i = 0; i < ra->size; ++i) { - container_free(ra->containers[i], ra->typecodes[i]); - } +for (int32_t i = 0; i < ra->size; ++i) { +container_free(ra->containers[i], ra->typecodes[i]); +} } void ra_reset(roaring_array_t *ra) { - ra_clear_containers(ra); - ra->size = 0; - ra_shrink_to_fit(ra); +ra_clear_containers(ra); +ra->size = 0; +ra_shrink_to_fit(ra); } void ra_clear_without_containers(roaring_array_t *ra) { - free(ra->containers); // keys and typecodes are allocated with containers - ra->size = 0; - ra->allocation_size = 0; - ra->containers = NULL; - ra->keys = NULL; - ra->typecodes = NULL; +roaring_free(ra->containers); // keys and typecodes are allocated with containers +ra->size = 0; +ra->allocation_size = 0; +ra->containers = NULL; +ra->keys = NULL; +ra->typecodes = NULL; } void ra_clear(roaring_array_t *ra) { - ra_clear_containers(ra); - ra_clear_without_containers(ra); +ra_clear_containers(ra); +ra_clear_without_containers(ra); } bool extend_array(roaring_array_t *ra, int32_t k) { - int32_t desired_size = ra->size + k; - assert(desired_size <= MAX_CONTAINERS); - if (desired_size > ra->allocation_size) { - int32_t new_capacity = - (ra->size < 1024) ? 2 * desired_size : 5 * desired_size / 4; - if (new_capacity > MAX_CONTAINERS) { - new_capacity = MAX_CONTAINERS; - } - - return realloc_array(ra, new_capacity); - } - return true; +int32_t desired_size = ra->size + k; +const int32_t max_containers = 65536; +assert(desired_size <= max_containers); +if (desired_size > ra->allocation_size) { +int32_t new_capacity = +(ra->size < 1024) ? 2 * desired_size : 5 * desired_size / 4; +if (new_capacity > max_containers) { +new_capacity = max_containers; } -void ra_append(roaring_array_t *ra, uint16_t key, void *container, - uint8_t typecode) { - extend_array(ra, 1); - const int32_t pos = ra->size; +return realloc_array(ra, new_capacity); +} +return true; +} - ra->keys[pos] = key; - ra->containers[pos] = container; - ra->typecodes[pos] = typecode; - ra->size++; +void ra_append( +roaring_array_t *ra, uint16_t key, +container_t *c, uint8_t typecode +){ +extend_array(ra, 1); +const int32_t pos = ra->size; + +ra->keys[pos] = key; +ra->containers[pos] = c; +ra->typecodes[pos] = typecode; +ra->size++; } void ra_append_copy(roaring_array_t *ra, const roaring_array_t *sa, - uint16_t index, bool copy_on_write) { - extend_array(ra, 1); - const int32_t pos = ra->size; - - // old contents is junk not needing freeing - ra->keys[pos] = sa->keys[index]; - // the shared container will be in two bitmaps - if (copy_on_write) { - sa->containers[index] = get_copy_of_container( - sa->containers[index], &sa->typecodes[index], copy_on_write); - ra->containers[pos] = sa->containers[index]; - ra->typecodes[pos] = sa->typecodes[index]; - } else { - ra->containers[pos] = - container_clone(sa->containers[index], sa->typecodes[index]); - ra->typecodes[pos] = sa->typecodes[index]; - } - ra->size++; +uint16_t index, bool copy_on_write) { +extend_array(ra, 1); +const int32_t pos = ra->size; + +// old contents is junk not needing freeing +ra->keys[pos] = sa->keys[index]; +// the shared container will be in two bitmaps +if (copy_on_write) { +sa->containers[index] = get_copy_of_container( +sa->containers[index], &sa->typecodes[index], copy_on_write); +ra->containers[pos] = sa->containers[index]; +ra->typecodes[pos] = sa->typecodes[index]; +} else { +ra->containers[pos] = +container_clone(sa->containers[index], sa->typecodes[index]); +ra->typecodes[pos] = sa->typecodes[index]; +} +ra->size++; } void ra_append_copies_until(roaring_array_t *ra, const roaring_array_t *sa, - uint16_t stopping_key, bool copy_on_write) { - for (int32_t i = 0; i < sa->size; ++i) { - if (sa->keys[i] >= stopping_key) break; - ra_append_copy(ra, sa, i, copy_on_write); - } +uint16_t stopping_key, bool copy_on_write) { +for (int32_t i = 0; i < sa->size; ++i) { +if (sa->keys[i] >= stopping_key) break; +ra_append_copy(ra, sa, i, copy_on_write); +} } void ra_append_copy_range(roaring_array_t *ra, const roaring_array_t *sa, - int32_t start_index, int32_t end_index, - bool copy_on_write) { - extend_array(ra, end_index - start_index); - for (int32_t i = start_index; i < end_index; ++i) { - const int32_t pos = ra->size; - ra->keys[pos] = sa->keys[i]; - if (copy_on_write) { - sa->containers[i] = get_copy_of_container( - sa->containers[i], &sa->typecodes[i], copy_on_write); - ra->containers[pos] = sa->containers[i]; - ra->typecodes[pos] = sa->typecodes[i]; - } else { - ra->containers[pos] = - container_clone(sa->containers[i], sa->typecodes[i]); - ra->typecodes[pos] = sa->typecodes[i]; - } - ra->size++; - } +int32_t start_index, int32_t end_index, +bool copy_on_write) { +extend_array(ra, end_index - start_index); +for (int32_t i = start_index; i < end_index; ++i) { +const int32_t pos = ra->size; +ra->keys[pos] = sa->keys[i]; +if (copy_on_write) { +sa->containers[i] = get_copy_of_container( +sa->containers[i], &sa->typecodes[i], copy_on_write); +ra->containers[pos] = sa->containers[i]; +ra->typecodes[pos] = sa->typecodes[i]; +} else { +ra->containers[pos] = +container_clone(sa->containers[i], sa->typecodes[i]); +ra->typecodes[pos] = sa->typecodes[i]; +} +ra->size++; +} } void ra_append_copies_after(roaring_array_t *ra, const roaring_array_t *sa, - uint16_t before_start, bool copy_on_write) { - int start_location = ra_get_index(sa, before_start); - if (start_location >= 0) - ++start_location; - else - start_location = -start_location - 1; - ra_append_copy_range(ra, sa, start_location, sa->size, copy_on_write); +uint16_t before_start, bool copy_on_write) { +int start_location = ra_get_index(sa, before_start); +if (start_location >= 0) +++start_location; +else +start_location = -start_location - 1; +ra_append_copy_range(ra, sa, start_location, sa->size, copy_on_write); } void ra_append_move_range(roaring_array_t *ra, roaring_array_t *sa, - int32_t start_index, int32_t end_index) { - extend_array(ra, end_index - start_index); +int32_t start_index, int32_t end_index) { +extend_array(ra, end_index - start_index); - for (int32_t i = start_index; i < end_index; ++i) { - const int32_t pos = ra->size; +for (int32_t i = start_index; i < end_index; ++i) { +const int32_t pos = ra->size; - ra->keys[pos] = sa->keys[i]; - ra->containers[pos] = sa->containers[i]; - ra->typecodes[pos] = sa->typecodes[i]; - ra->size++; - } +ra->keys[pos] = sa->keys[i]; +ra->containers[pos] = sa->containers[i]; +ra->typecodes[pos] = sa->typecodes[i]; +ra->size++; +} } void ra_append_range(roaring_array_t *ra, roaring_array_t *sa, - int32_t start_index, int32_t end_index, - bool copy_on_write) { - extend_array(ra, end_index - start_index); - - for (int32_t i = start_index; i < end_index; ++i) { - const int32_t pos = ra->size; - ra->keys[pos] = sa->keys[i]; - if (copy_on_write) { - sa->containers[i] = get_copy_of_container( - sa->containers[i], &sa->typecodes[i], copy_on_write); - ra->containers[pos] = sa->containers[i]; - ra->typecodes[pos] = sa->typecodes[i]; - } else { - ra->containers[pos] = - container_clone(sa->containers[i], sa->typecodes[i]); - ra->typecodes[pos] = sa->typecodes[i]; - } - ra->size++; - } -} +int32_t start_index, int32_t end_index, +bool copy_on_write) { +extend_array(ra, end_index - start_index); -void *ra_get_container(roaring_array_t *ra, uint16_t x, uint8_t *typecode) { - int i = binarySearch(ra->keys, (int32_t)ra->size, x); - if (i < 0) return NULL; - *typecode = ra->typecodes[i]; - return ra->containers[i]; +for (int32_t i = start_index; i < end_index; ++i) { +const int32_t pos = ra->size; +ra->keys[pos] = sa->keys[i]; +if (copy_on_write) { +sa->containers[i] = get_copy_of_container( +sa->containers[i], &sa->typecodes[i], copy_on_write); +ra->containers[pos] = sa->containers[i]; +ra->typecodes[pos] = sa->typecodes[i]; +} else { +ra->containers[pos] = +container_clone(sa->containers[i], sa->typecodes[i]); +ra->typecodes[pos] = sa->typecodes[i]; } - -extern inline void *ra_get_container_at_index(const roaring_array_t *ra, uint16_t i, - uint8_t *typecode); - -void *ra_get_writable_container(roaring_array_t *ra, uint16_t x, - uint8_t *typecode) { - int i = binarySearch(ra->keys, (int32_t)ra->size, x); - if (i < 0) return NULL; - *typecode = ra->typecodes[i]; - return get_writable_copy_if_shared(ra->containers[i], typecode); +ra->size++; } - -void *ra_get_writable_container_at_index(roaring_array_t *ra, uint16_t i, - uint8_t *typecode) { - assert(i < ra->size); - *typecode = ra->typecodes[i]; - return get_writable_copy_if_shared(ra->containers[i], typecode); } -uint16_t ra_get_key_at_index(const roaring_array_t *ra, uint16_t i) { - return ra->keys[i]; +container_t *ra_get_container( +roaring_array_t *ra, uint16_t x, uint8_t *typecode +){ +int i = binarySearch(ra->keys, (int32_t)ra->size, x); +if (i < 0) return NULL; +*typecode = ra->typecodes[i]; +return ra->containers[i]; } +extern inline container_t *ra_get_container_at_index( +const roaring_array_t *ra, uint16_t i, +uint8_t *typecode); + +extern inline uint16_t ra_get_key_at_index(const roaring_array_t *ra, +uint16_t i); + extern inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x); extern inline int32_t ra_advance_until(const roaring_array_t *ra, uint16_t x, - int32_t pos); +int32_t pos); // everything skipped over is freed int32_t ra_advance_until_freeing(roaring_array_t *ra, uint16_t x, int32_t pos) { - while (pos < ra->size && ra->keys[pos] < x) { - container_free(ra->containers[pos], ra->typecodes[pos]); - ++pos; - } - return pos; -} - -void ra_insert_new_key_value_at(roaring_array_t *ra, int32_t i, uint16_t key, - void *container, uint8_t typecode) { - extend_array(ra, 1); - // May be an optimization opportunity with DIY memmove - memmove(&(ra->keys[i + 1]), &(ra->keys[i]), - sizeof(uint16_t) * (ra->size - i)); - memmove(&(ra->containers[i + 1]), &(ra->containers[i]), - sizeof(void *) * (ra->size - i)); - memmove(&(ra->typecodes[i + 1]), &(ra->typecodes[i]), - sizeof(uint8_t) * (ra->size - i)); - ra->keys[i] = key; - ra->containers[i] = container; - ra->typecodes[i] = typecode; - ra->size++; +while (pos < ra->size && ra->keys[pos] < x) { +container_free(ra->containers[pos], ra->typecodes[pos]); +++pos; +} +return pos; +} + +void ra_insert_new_key_value_at( +roaring_array_t *ra, int32_t i, uint16_t key, +container_t *c, uint8_t typecode +){ +extend_array(ra, 1); +// May be an optimization opportunity with DIY memmove +memmove(&(ra->keys[i + 1]), &(ra->keys[i]), +sizeof(uint16_t) * (ra->size - i)); +memmove(&(ra->containers[i + 1]), &(ra->containers[i]), +sizeof(container_t *) * (ra->size - i)); +memmove(&(ra->typecodes[i + 1]), &(ra->typecodes[i]), +sizeof(uint8_t) * (ra->size - i)); +ra->keys[i] = key; +ra->containers[i] = c; +ra->typecodes[i] = typecode; +ra->size++; } // note: Java routine set things to 0, enabling GC. @@ -10883,23 +20414,23 @@ void ra_insert_new_key_value_at(roaring_array_t *ra, int32_t i, uint16_t key, // valid containers below ra->size. void ra_downsize(roaring_array_t *ra, int32_t new_length) { - assert(new_length <= ra->size); - ra->size = new_length; +assert(new_length <= ra->size); +ra->size = new_length; } void ra_remove_at_index(roaring_array_t *ra, int32_t i) { - memmove(&(ra->containers[i]), &(ra->containers[i + 1]), - sizeof(void *) * (ra->size - i - 1)); - memmove(&(ra->keys[i]), &(ra->keys[i + 1]), - sizeof(uint16_t) * (ra->size - i - 1)); - memmove(&(ra->typecodes[i]), &(ra->typecodes[i + 1]), - sizeof(uint8_t) * (ra->size - i - 1)); - ra->size--; +memmove(&(ra->containers[i]), &(ra->containers[i + 1]), +sizeof(container_t *) * (ra->size - i - 1)); +memmove(&(ra->keys[i]), &(ra->keys[i + 1]), +sizeof(uint16_t) * (ra->size - i - 1)); +memmove(&(ra->typecodes[i]), &(ra->typecodes[i + 1]), +sizeof(uint8_t) * (ra->size - i - 1)); +ra->size--; } void ra_remove_at_index_and_free(roaring_array_t *ra, int32_t i) { - container_free(ra->containers[i], ra->typecodes[i]); - ra_remove_at_index(ra, i); +container_free(ra->containers[i], ra->typecodes[i]); +ra_remove_at_index(ra, i); } // used in inplace andNot only, to slide left the containers from @@ -10908,237 +20439,223 @@ void ra_remove_at_index_and_free(roaring_array_t *ra, int32_t i) { // downsize. // void ra_copy_range(roaring_array_t *ra, uint32_t begin, uint32_t end, - uint32_t new_begin) { - assert(begin <= end); - assert(new_begin < begin); +uint32_t new_begin) { +assert(begin <= end); +assert(new_begin < begin); - const int range = end - begin; +const int range = end - begin; - // We ensure to previously have freed overwritten containers - // that are not copied elsewhere +// We ensure to previously have freed overwritten containers +// that are not copied elsewhere - memmove(&(ra->containers[new_begin]), &(ra->containers[begin]), - sizeof(void *) * range); - memmove(&(ra->keys[new_begin]), &(ra->keys[begin]), - sizeof(uint16_t) * range); - memmove(&(ra->typecodes[new_begin]), &(ra->typecodes[begin]), - sizeof(uint8_t) * range); +memmove(&(ra->containers[new_begin]), &(ra->containers[begin]), +sizeof(container_t *) * range); +memmove(&(ra->keys[new_begin]), &(ra->keys[begin]), +sizeof(uint16_t) * range); +memmove(&(ra->typecodes[new_begin]), &(ra->typecodes[begin]), +sizeof(uint8_t) * range); } void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance) { - if (distance > 0) { - extend_array(ra, distance); - } - int32_t srcpos = ra->size - count; - int32_t dstpos = srcpos + distance; - memmove(&(ra->keys[dstpos]), &(ra->keys[srcpos]), - sizeof(uint16_t) * count); - memmove(&(ra->containers[dstpos]), &(ra->containers[srcpos]), - sizeof(void *) * count); - memmove(&(ra->typecodes[dstpos]), &(ra->typecodes[srcpos]), - sizeof(uint8_t) * count); - ra->size += distance; -} - - -size_t ra_size_in_bytes(roaring_array_t *ra) { - size_t cardinality = 0; - size_t tot_len = - 1 /* initial byte type */ + 4 /* tot_len */ + sizeof(roaring_array_t) + - ra->size * (sizeof(uint16_t) + sizeof(void *) + sizeof(uint8_t)); - for (int32_t i = 0; i < ra->size; i++) { - tot_len += - (container_serialization_len(ra->containers[i], ra->typecodes[i]) + - sizeof(uint16_t)); - cardinality += - container_get_cardinality(ra->containers[i], ra->typecodes[i]); - } - - if ((cardinality * sizeof(uint32_t) + sizeof(uint32_t)) < tot_len) { - return cardinality * sizeof(uint32_t) + 1 + sizeof(uint32_t); - } - return tot_len; +if (distance > 0) { +extend_array(ra, distance); } +int32_t srcpos = ra->size - count; +int32_t dstpos = srcpos + distance; +memmove(&(ra->keys[dstpos]), &(ra->keys[srcpos]), +sizeof(uint16_t) * count); +memmove(&(ra->containers[dstpos]), &(ra->containers[srcpos]), +sizeof(container_t *) * count); +memmove(&(ra->typecodes[dstpos]), &(ra->typecodes[srcpos]), +sizeof(uint8_t) * count); +ra->size += distance; +} + void ra_to_uint32_array(const roaring_array_t *ra, uint32_t *ans) { - size_t ctr = 0; - for (int32_t i = 0; i < ra->size; ++i) { - int num_added = container_to_uint32_array( - ans + ctr, ra->containers[i], ra->typecodes[i], - ((uint32_t)ra->keys[i]) << 16); - ctr += num_added; - } +size_t ctr = 0; +for (int32_t i = 0; i < ra->size; ++i) { +int num_added = container_to_uint32_array( +ans + ctr, ra->containers[i], ra->typecodes[i], +((uint32_t)ra->keys[i]) << 16); +ctr += num_added; +} } bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, size_t limit, uint32_t *ans) { - size_t ctr = 0; - size_t dtr = 0; - - size_t t_limit = 0; - - bool first = false; - size_t first_skip = 0; - - uint32_t *t_ans = NULL; - size_t cur_len = 0; - - for (int i = 0; i < ra->size; ++i) { - - const void *container = container_unwrap_shared(ra->containers[i], &ra->typecodes[i]); - switch (ra->typecodes[i]) { - case BITSET_CONTAINER_TYPE_CODE: - t_limit = ((const bitset_container_t *)container)->cardinality; - break; - case ARRAY_CONTAINER_TYPE_CODE: - t_limit = ((const array_container_t *)container)->cardinality; - break; - case RUN_CONTAINER_TYPE_CODE: - t_limit = run_container_cardinality((const run_container_t *)container); - break; - } - if (ctr + t_limit - 1 >= offset && ctr < offset + limit){ - if (!first){ - //first_skip = t_limit - (ctr + t_limit - offset); - first_skip = offset - ctr; - first = true; - t_ans = (uint32_t *)malloc(sizeof(*t_ans) * (first_skip + limit)); - if(t_ans == NULL) { - return false; - } - memset(t_ans, 0, sizeof(*t_ans) * (first_skip + limit)) ; - cur_len = first_skip + limit; - } - if (dtr + t_limit > cur_len){ - uint32_t * append_ans = (uint32_t *)malloc(sizeof(*append_ans) * (cur_len + t_limit)); - if(append_ans == NULL) { - if(t_ans != NULL) free(t_ans); - return false; - } - memset(append_ans, 0, sizeof(*append_ans) * (cur_len + t_limit)); - cur_len = cur_len + t_limit; - memcpy(append_ans, t_ans, dtr * sizeof(uint32_t)); - free(t_ans); - t_ans = append_ans; - } - switch (ra->typecodes[i]) { - case BITSET_CONTAINER_TYPE_CODE: - container_to_uint32_array( - t_ans + dtr, (const bitset_container_t *)container, ra->typecodes[i], - ((uint32_t)ra->keys[i]) << 16); - break; - case ARRAY_CONTAINER_TYPE_CODE: - container_to_uint32_array( - t_ans + dtr, (const array_container_t *)container, ra->typecodes[i], - ((uint32_t)ra->keys[i]) << 16); - break; - case RUN_CONTAINER_TYPE_CODE: - container_to_uint32_array( - t_ans + dtr, (const run_container_t *)container, ra->typecodes[i], - ((uint32_t)ra->keys[i]) << 16); - break; - } - dtr += t_limit; - } - ctr += t_limit; - if (dtr-first_skip >= limit) break; - } - if(t_ans != NULL) { - memcpy(ans, t_ans+first_skip, limit * sizeof(uint32_t)); - free(t_ans); - } - return true; +size_t ctr = 0; +size_t dtr = 0; + +size_t t_limit = 0; + +bool first = false; +size_t first_skip = 0; + +uint32_t *t_ans = NULL; +size_t cur_len = 0; + +for (int i = 0; i < ra->size; ++i) { + +const container_t *c = container_unwrap_shared( +ra->containers[i], &ra->typecodes[i]); +switch (ra->typecodes[i]) { +case BITSET_CONTAINER_TYPE: +t_limit = (const_CAST_bitset(c))->cardinality; +break; +case ARRAY_CONTAINER_TYPE: +t_limit = (const_CAST_array(c))->cardinality; +break; +case RUN_CONTAINER_TYPE: +t_limit = run_container_cardinality(const_CAST_run(c)); +break; +} +if (ctr + t_limit - 1 >= offset && ctr < offset + limit){ +if (!first){ +//first_skip = t_limit - (ctr + t_limit - offset); +first_skip = offset - ctr; +first = true; +t_ans = (uint32_t *)roaring_malloc(sizeof(*t_ans) * (first_skip + limit)); +if(t_ans == NULL) { +return false; +} +memset(t_ans, 0, sizeof(*t_ans) * (first_skip + limit)) ; +cur_len = first_skip + limit; +} +if (dtr + t_limit > cur_len){ +uint32_t * append_ans = (uint32_t *)roaring_malloc(sizeof(*append_ans) * (cur_len + t_limit)); +if(append_ans == NULL) { +if(t_ans != NULL) roaring_free(t_ans); +return false; +} +memset(append_ans, 0, sizeof(*append_ans) * (cur_len + t_limit)); +cur_len = cur_len + t_limit; +memcpy(append_ans, t_ans, dtr * sizeof(uint32_t)); +roaring_free(t_ans); +t_ans = append_ans; +} +switch (ra->typecodes[i]) { +case BITSET_CONTAINER_TYPE: +container_to_uint32_array( +t_ans + dtr, +const_CAST_bitset(c), ra->typecodes[i], +((uint32_t)ra->keys[i]) << 16); +break; +case ARRAY_CONTAINER_TYPE: +container_to_uint32_array( +t_ans + dtr, +const_CAST_array(c), ra->typecodes[i], +((uint32_t)ra->keys[i]) << 16); +break; +case RUN_CONTAINER_TYPE: +container_to_uint32_array( +t_ans + dtr, +const_CAST_run(c), ra->typecodes[i], +((uint32_t)ra->keys[i]) << 16); +break; +} +dtr += t_limit; +} +ctr += t_limit; +if (dtr-first_skip >= limit) break; +} +if(t_ans != NULL) { +memcpy(ans, t_ans+first_skip, limit * sizeof(uint32_t)); +free(t_ans); +} +return true; } bool ra_has_run_container(const roaring_array_t *ra) { - for (int32_t k = 0; k < ra->size; ++k) { - if (get_container_type(ra->containers[k], ra->typecodes[k]) == - RUN_CONTAINER_TYPE_CODE) - return true; - } - return false; +for (int32_t k = 0; k < ra->size; ++k) { +if (get_container_type(ra->containers[k], ra->typecodes[k]) == +RUN_CONTAINER_TYPE) +return true; +} +return false; } uint32_t ra_portable_header_size(const roaring_array_t *ra) { - if (ra_has_run_container(ra)) { - if (ra->size < - NO_OFFSET_THRESHOLD) { // for small bitmaps, we omit the offsets - return 4 + (ra->size + 7) / 8 + 4 * ra->size; - } - return 4 + (ra->size + 7) / 8 + - 8 * ra->size; // - 4 because we pack the size with the cookie - } else { - return 4 + 4 + 8 * ra->size; - } +if (ra_has_run_container(ra)) { +if (ra->size < +NO_OFFSET_THRESHOLD) { // for small bitmaps, we omit the offsets +return 4 + (ra->size + 7) / 8 + 4 * ra->size; +} +return 4 + (ra->size + 7) / 8 + +8 * ra->size; // - 4 because we pack the size with the cookie +} else { +return 4 + 4 + 8 * ra->size; +} } size_t ra_portable_size_in_bytes(const roaring_array_t *ra) { - size_t count = ra_portable_header_size(ra); +size_t count = ra_portable_header_size(ra); - for (int32_t k = 0; k < ra->size; ++k) { - count += container_size_in_bytes(ra->containers[k], ra->typecodes[k]); - } - return count; +for (int32_t k = 0; k < ra->size; ++k) { +count += container_size_in_bytes(ra->containers[k], ra->typecodes[k]); +} +return count; } +// This function is endian-sensitive. size_t ra_portable_serialize(const roaring_array_t *ra, char *buf) { - char *initbuf = buf; - uint32_t startOffset = 0; - bool hasrun = ra_has_run_container(ra); - if (hasrun) { - uint32_t cookie = SERIAL_COOKIE | ((ra->size - 1) << 16); - memcpy(buf, &cookie, sizeof(cookie)); - buf += sizeof(cookie); - uint32_t s = (ra->size + 7) / 8; - uint8_t *bitmapOfRunContainers = (uint8_t *)calloc(s, 1); - assert(bitmapOfRunContainers != NULL); // todo: handle - for (int32_t i = 0; i < ra->size; ++i) { - if (get_container_type(ra->containers[i], ra->typecodes[i]) == - RUN_CONTAINER_TYPE_CODE) { - bitmapOfRunContainers[i / 8] |= (1 << (i % 8)); - } - } - memcpy(buf, bitmapOfRunContainers, s); - buf += s; - free(bitmapOfRunContainers); - if (ra->size < NO_OFFSET_THRESHOLD) { - startOffset = 4 + 4 * ra->size + s; - } else { - startOffset = 4 + 8 * ra->size + s; - } - } else { // backwards compatibility - uint32_t cookie = SERIAL_COOKIE_NO_RUNCONTAINER; - - memcpy(buf, &cookie, sizeof(cookie)); - buf += sizeof(cookie); - memcpy(buf, &ra->size, sizeof(ra->size)); - buf += sizeof(ra->size); - - startOffset = 4 + 4 + 4 * ra->size + 4 * ra->size; - } - for (int32_t k = 0; k < ra->size; ++k) { - memcpy(buf, &ra->keys[k], sizeof(ra->keys[k])); - buf += sizeof(ra->keys[k]); - // get_cardinality returns a value in [1,1<<16], subtracting one - // we get [0,1<<16 - 1] which fits in 16 bits - uint16_t card = (uint16_t)( - container_get_cardinality(ra->containers[k], ra->typecodes[k]) - 1); - memcpy(buf, &card, sizeof(card)); - buf += sizeof(card); - } - if ((!hasrun) || (ra->size >= NO_OFFSET_THRESHOLD)) { - // writing the containers offsets - for (int32_t k = 0; k < ra->size; k++) { - memcpy(buf, &startOffset, sizeof(startOffset)); - buf += sizeof(startOffset); - startOffset = - startOffset + - container_size_in_bytes(ra->containers[k], ra->typecodes[k]); - } - } - for (int32_t k = 0; k < ra->size; ++k) { - buf += container_write(ra->containers[k], ra->typecodes[k], buf); - } - return buf - initbuf; +char *initbuf = buf; +uint32_t startOffset = 0; +bool hasrun = ra_has_run_container(ra); +if (hasrun) { +uint32_t cookie = SERIAL_COOKIE | ((ra->size - 1) << 16); +memcpy(buf, &cookie, sizeof(cookie)); +buf += sizeof(cookie); +uint32_t s = (ra->size + 7) / 8; +uint8_t *bitmapOfRunContainers = (uint8_t *)roaring_calloc(s, 1); +assert(bitmapOfRunContainers != NULL); // todo: handle +for (int32_t i = 0; i < ra->size; ++i) { +if (get_container_type(ra->containers[i], ra->typecodes[i]) == +RUN_CONTAINER_TYPE) { +bitmapOfRunContainers[i / 8] |= (1 << (i % 8)); +} +} +memcpy(buf, bitmapOfRunContainers, s); +buf += s; +roaring_free(bitmapOfRunContainers); +if (ra->size < NO_OFFSET_THRESHOLD) { +startOffset = 4 + 4 * ra->size + s; +} else { +startOffset = 4 + 8 * ra->size + s; +} +} else { // backwards compatibility +uint32_t cookie = SERIAL_COOKIE_NO_RUNCONTAINER; + +memcpy(buf, &cookie, sizeof(cookie)); +buf += sizeof(cookie); +memcpy(buf, &ra->size, sizeof(ra->size)); +buf += sizeof(ra->size); + +startOffset = 4 + 4 + 4 * ra->size + 4 * ra->size; +} +for (int32_t k = 0; k < ra->size; ++k) { +memcpy(buf, &ra->keys[k], sizeof(ra->keys[k])); +buf += sizeof(ra->keys[k]); +// get_cardinality returns a value in [1,1<<16], subtracting one +// we get [0,1<<16 - 1] which fits in 16 bits +uint16_t card = (uint16_t)( +container_get_cardinality(ra->containers[k], ra->typecodes[k]) - 1); +memcpy(buf, &card, sizeof(card)); +buf += sizeof(card); +} +if ((!hasrun) || (ra->size >= NO_OFFSET_THRESHOLD)) { +// writing the containers offsets +for (int32_t k = 0; k < ra->size; k++) { +memcpy(buf, &startOffset, sizeof(startOffset)); +buf += sizeof(startOffset); +startOffset = +startOffset + +container_size_in_bytes(ra->containers[k], ra->typecodes[k]); +} +} +for (int32_t k = 0; k < ra->size; ++k) { +buf += container_write(ra->containers[k], ra->typecodes[k], buf); +} +return buf - initbuf; } // Quickly checks whether there is a serialized bitmap at the pointer, @@ -11149,438 +20666,449 @@ size_t ra_portable_serialize(const roaring_array_t *ra, char *buf) { // Otherwise, it returns how many bytes are occupied. // size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes) { - size_t bytestotal = sizeof(int32_t);// for cookie - if(bytestotal > maxbytes) return 0; - uint32_t cookie; - memcpy(&cookie, buf, sizeof(int32_t)); - buf += sizeof(uint32_t); - if ((cookie & 0xFFFF) != SERIAL_COOKIE && - cookie != SERIAL_COOKIE_NO_RUNCONTAINER) { - return 0; - } - int32_t size; - - if ((cookie & 0xFFFF) == SERIAL_COOKIE) - size = (cookie >> 16) + 1; - else { - bytestotal += sizeof(int32_t); - if(bytestotal > maxbytes) return 0; - memcpy(&size, buf, sizeof(int32_t)); - buf += sizeof(uint32_t); - } - if (size > (1<<16)) { - return 0; // logically impossible - } - char *bitmapOfRunContainers = NULL; - bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE; - if (hasrun) { - int32_t s = (size + 7) / 8; - bytestotal += s; - if(bytestotal > maxbytes) return 0; - bitmapOfRunContainers = (char *)buf; - buf += s; - } - bytestotal += size * 2 * sizeof(uint16_t); - if(bytestotal > maxbytes) return 0; - uint16_t *keyscards = (uint16_t *)buf; - buf += size * 2 * sizeof(uint16_t); - if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) { - // skipping the offsets - bytestotal += size * 4; - if(bytestotal > maxbytes) return 0; - buf += size * 4; - } - // Reading the containers - for (int32_t k = 0; k < size; ++k) { - uint16_t tmp; - memcpy(&tmp, keyscards + 2*k+1, sizeof(tmp)); - uint32_t thiscard = tmp + 1; - bool isbitmap = (thiscard > DEFAULT_MAX_SIZE); - bool isrun = false; - if(hasrun) { - if((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) { - isbitmap = false; - isrun = true; - } - } - if (isbitmap) { - size_t containersize = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); - bytestotal += containersize; - if(bytestotal > maxbytes) return 0; - buf += containersize; - } else if (isrun) { - bytestotal += sizeof(uint16_t); - if(bytestotal > maxbytes) return 0; - uint16_t n_runs; - memcpy(&n_runs, buf, sizeof(uint16_t)); - buf += sizeof(uint16_t); - size_t containersize = n_runs * sizeof(rle16_t); - bytestotal += containersize; - if(bytestotal > maxbytes) return 0; - buf += containersize; - } else { - size_t containersize = thiscard * sizeof(uint16_t); - bytestotal += containersize; - if(bytestotal > maxbytes) return 0; - buf += containersize; - } - } - return bytestotal; -} - - -// this function populates answer from the content of buf (reading up to maxbytes bytes). +size_t bytestotal = sizeof(int32_t);// for cookie +if(bytestotal > maxbytes) return 0; +uint32_t cookie; +memcpy(&cookie, buf, sizeof(int32_t)); +buf += sizeof(uint32_t); +if ((cookie & 0xFFFF) != SERIAL_COOKIE && +cookie != SERIAL_COOKIE_NO_RUNCONTAINER) { +return 0; +} +int32_t size; + +if ((cookie & 0xFFFF) == SERIAL_COOKIE) +size = (cookie >> 16) + 1; +else { +bytestotal += sizeof(int32_t); +if(bytestotal > maxbytes) return 0; +memcpy(&size, buf, sizeof(int32_t)); +buf += sizeof(uint32_t); +} +if (size > (1<<16)) { +return 0; +} +char *bitmapOfRunContainers = NULL; +bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE; +if (hasrun) { +int32_t s = (size + 7) / 8; +bytestotal += s; +if(bytestotal > maxbytes) return 0; +bitmapOfRunContainers = (char *)buf; +buf += s; +} +bytestotal += size * 2 * sizeof(uint16_t); +if(bytestotal > maxbytes) return 0; +uint16_t *keyscards = (uint16_t *)buf; +buf += size * 2 * sizeof(uint16_t); +if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) { +// skipping the offsets +bytestotal += size * 4; +if(bytestotal > maxbytes) return 0; +buf += size * 4; +} +// Reading the containers +for (int32_t k = 0; k < size; ++k) { +uint16_t tmp; +memcpy(&tmp, keyscards + 2*k+1, sizeof(tmp)); +uint32_t thiscard = tmp + 1; +bool isbitmap = (thiscard > DEFAULT_MAX_SIZE); +bool isrun = false; +if(hasrun) { +if((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) { +isbitmap = false; +isrun = true; +} +} +if (isbitmap) { +size_t containersize = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); +bytestotal += containersize; +if(bytestotal > maxbytes) return 0; +buf += containersize; +} else if (isrun) { +bytestotal += sizeof(uint16_t); +if(bytestotal > maxbytes) return 0; +uint16_t n_runs; +memcpy(&n_runs, buf, sizeof(uint16_t)); +buf += sizeof(uint16_t); +size_t containersize = n_runs * sizeof(rle16_t); +bytestotal += containersize; +if(bytestotal > maxbytes) return 0; +buf += containersize; +} else { +size_t containersize = thiscard * sizeof(uint16_t); +bytestotal += containersize; +if(bytestotal > maxbytes) return 0; +buf += containersize; +} +} +return bytestotal; +} + +// This function populates answer from the content of buf (reading up to maxbytes bytes). // The function returns false if a properly serialized bitmap cannot be found. -// if it returns true, readbytes is populated by how many bytes were read, we have that *readbytes <= maxbytes. +// If it returns true, readbytes is populated by how many bytes were read, we have that *readbytes <= maxbytes. +// +// This function is endian-sensitive. bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const size_t maxbytes, size_t * readbytes) { - *readbytes = sizeof(int32_t);// for cookie - if(*readbytes > maxbytes) { - fprintf(stderr, "Ran out of bytes while reading first 4 bytes.\n"); - return false; - } - uint32_t cookie; - memcpy(&cookie, buf, sizeof(int32_t)); - buf += sizeof(uint32_t); - if ((cookie & 0xFFFF) != SERIAL_COOKIE && - cookie != SERIAL_COOKIE_NO_RUNCONTAINER) { - fprintf(stderr, "I failed to find one of the right cookies. Found %" PRIu32 "\n", - cookie); - return false; - } - int32_t size; - - if ((cookie & 0xFFFF) == SERIAL_COOKIE) - size = (cookie >> 16) + 1; - else { - *readbytes += sizeof(int32_t); - if(*readbytes > maxbytes) { - fprintf(stderr, "Ran out of bytes while reading second part of the cookie.\n"); - return false; - } - memcpy(&size, buf, sizeof(int32_t)); - buf += sizeof(uint32_t); - } - if (size > (1<<16)) { - fprintf(stderr, "You cannot have so many containers, the data must be corrupted: %" PRId32 "\n", - size); - return false; // logically impossible - } - const char *bitmapOfRunContainers = NULL; - bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE; - if (hasrun) { - int32_t s = (size + 7) / 8; - *readbytes += s; - if(*readbytes > maxbytes) {// data is corrupted? - fprintf(stderr, "Ran out of bytes while reading run bitmap.\n"); - return false; - } - bitmapOfRunContainers = buf; - buf += s; - } - uint16_t *keyscards = (uint16_t *)buf; +*readbytes = sizeof(int32_t);// for cookie +if(*readbytes > maxbytes) { +// Ran out of bytes while reading first 4 bytes. +return false; +} +uint32_t cookie; +memcpy(&cookie, buf, sizeof(int32_t)); +buf += sizeof(uint32_t); +if ((cookie & 0xFFFF) != SERIAL_COOKIE && +cookie != SERIAL_COOKIE_NO_RUNCONTAINER) { +// "I failed to find one of the right cookies. +return false; +} +int32_t size; + +if ((cookie & 0xFFFF) == SERIAL_COOKIE) +size = (cookie >> 16) + 1; +else { +*readbytes += sizeof(int32_t); +if(*readbytes > maxbytes) { +// Ran out of bytes while reading second part of the cookie. +return false; +} +memcpy(&size, buf, sizeof(int32_t)); +buf += sizeof(uint32_t); +} +if (size < 0) { +// You cannot have a negative number of containers, the data must be corrupted. +return false; +} +if (size > (1<<16)) { +// You cannot have so many containers, the data must be corrupted. +return false; +} +const char *bitmapOfRunContainers = NULL; +bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE; +if (hasrun) { +int32_t s = (size + 7) / 8; +*readbytes += s; +if(*readbytes > maxbytes) {// data is corrupted? +// Ran out of bytes while reading run bitmap. +return false; +} +bitmapOfRunContainers = buf; +buf += s; +} +uint16_t *keyscards = (uint16_t *)buf; + +*readbytes += size * 2 * sizeof(uint16_t); +if(*readbytes > maxbytes) { +// Ran out of bytes while reading key-cardinality array. +return false; +} +buf += size * 2 * sizeof(uint16_t); + +bool is_ok = ra_init_with_capacity(answer, size); +if (!is_ok) { +// Failed to allocate memory for roaring array. Bailing out. +return false; +} + +for (int32_t k = 0; k < size; ++k) { +uint16_t tmp; +memcpy(&tmp, keyscards + 2*k, sizeof(tmp)); +answer->keys[k] = tmp; +} +if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) { +*readbytes += size * 4; +if(*readbytes > maxbytes) {// data is corrupted? +// Ran out of bytes while reading offsets. +ra_clear(answer);// we need to clear the containers already allocated, and the roaring array +return false; +} + +// skipping the offsets +buf += size * 4; +} +// Reading the containers +for (int32_t k = 0; k < size; ++k) { +uint16_t tmp; +memcpy(&tmp, keyscards + 2*k+1, sizeof(tmp)); +uint32_t thiscard = tmp + 1; +bool isbitmap = (thiscard > DEFAULT_MAX_SIZE); +bool isrun = false; +if(hasrun) { +if((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) { +isbitmap = false; +isrun = true; +} +} +if (isbitmap) { +// we check that the read is allowed +size_t containersize = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); +*readbytes += containersize; +if(*readbytes > maxbytes) { +// Running out of bytes while reading a bitset container. +ra_clear(answer);// we need to clear the containers already allocated, and the roaring array +return false; +} +// it is now safe to read +bitset_container_t *c = bitset_container_create(); +if(c == NULL) {// memory allocation failure +// Failed to allocate memory for a bitset container. +ra_clear(answer);// we need to clear the containers already allocated, and the roaring array +return false; +} +answer->size++; +buf += bitset_container_read(thiscard, c, buf); +answer->containers[k] = c; +answer->typecodes[k] = BITSET_CONTAINER_TYPE; +} else if (isrun) { +// we check that the read is allowed +*readbytes += sizeof(uint16_t); +if(*readbytes > maxbytes) { +// Running out of bytes while reading a run container (header). +ra_clear(answer);// we need to clear the containers already allocated, and the roaring array +return false; +} +uint16_t n_runs; +memcpy(&n_runs, buf, sizeof(uint16_t)); +size_t containersize = n_runs * sizeof(rle16_t); +*readbytes += containersize; +if(*readbytes > maxbytes) {// data is corrupted? +// Running out of bytes while reading a run container. +ra_clear(answer);// we need to clear the containers already allocated, and the roaring array +return false; +} +// it is now safe to read + +run_container_t *c = run_container_create(); +if(c == NULL) {// memory allocation failure +// Failed to allocate memory for a run container. +ra_clear(answer);// we need to clear the containers already allocated, and the roaring array +return false; +} +answer->size++; +buf += run_container_read(thiscard, c, buf); +answer->containers[k] = c; +answer->typecodes[k] = RUN_CONTAINER_TYPE; +} else { +// we check that the read is allowed +size_t containersize = thiscard * sizeof(uint16_t); +*readbytes += containersize; +if(*readbytes > maxbytes) {// data is corrupted? +// Running out of bytes while reading an array container. +ra_clear(answer);// we need to clear the containers already allocated, and the roaring array +return false; +} +// it is now safe to read +array_container_t *c = +array_container_create_given_capacity(thiscard); +if(c == NULL) {// memory allocation failure +// Failed to allocate memory for an array container. +ra_clear(answer);// we need to clear the containers already allocated, and the roaring array +return false; +} +answer->size++; +buf += array_container_read(thiscard, c, buf); +answer->containers[k] = c; +answer->typecodes[k] = ARRAY_CONTAINER_TYPE; +} +} +return true; +} + +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace internal { +#endif +/* end file src/roaring_array.c */ +/* begin file src/roaring_priority_queue.c */ - *readbytes += size * 2 * sizeof(uint16_t); - if(*readbytes > maxbytes) { - fprintf(stderr, "Ran out of bytes while reading key-cardinality array.\n"); - return false; - } - buf += size * 2 * sizeof(uint16_t); - bool is_ok = ra_init_with_capacity(answer, size); - if (!is_ok) { - fprintf(stderr, "Failed to allocate memory for roaring array. Bailing out.\n"); - return false; - } +#ifdef __cplusplus +using namespace ::roaring::internal; - for (int32_t k = 0; k < size; ++k) { - uint16_t tmp; - memcpy(&tmp, keyscards + 2*k, sizeof(tmp)); - answer->keys[k] = tmp; - } - if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) { - *readbytes += size * 4; - if(*readbytes > maxbytes) {// data is corrupted? - fprintf(stderr, "Ran out of bytes while reading offsets.\n"); - ra_clear(answer);// we need to clear the containers already allocated, and the roaring array - return false; - } - - // skipping the offsets - buf += size * 4; - } - // Reading the containers - for (int32_t k = 0; k < size; ++k) { - uint16_t tmp; - memcpy(&tmp, keyscards + 2*k+1, sizeof(tmp)); - uint32_t thiscard = tmp + 1; - bool isbitmap = (thiscard > DEFAULT_MAX_SIZE); - bool isrun = false; - if(hasrun) { - if((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) { - isbitmap = false; - isrun = true; - } - } - if (isbitmap) { - // we check that the read is allowed - size_t containersize = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); - *readbytes += containersize; - if(*readbytes > maxbytes) { - fprintf(stderr, "Running out of bytes while reading a bitset container.\n"); - ra_clear(answer);// we need to clear the containers already allocated, and the roaring array - return false; - } - // it is now safe to read - bitset_container_t *c = bitset_container_create(); - if(c == NULL) {// memory allocation failure - fprintf(stderr, "Failed to allocate memory for a bitset container.\n"); - ra_clear(answer);// we need to clear the containers already allocated, and the roaring array - return false; - } - answer->size++; - buf += bitset_container_read(thiscard, c, buf); - answer->containers[k] = c; - answer->typecodes[k] = BITSET_CONTAINER_TYPE_CODE; - } else if (isrun) { - // we check that the read is allowed - *readbytes += sizeof(uint16_t); - if(*readbytes > maxbytes) { - fprintf(stderr, "Running out of bytes while reading a run container (header).\n"); - ra_clear(answer);// we need to clear the containers already allocated, and the roaring array - return false; - } - uint16_t n_runs; - memcpy(&n_runs, buf, sizeof(uint16_t)); - size_t containersize = n_runs * sizeof(rle16_t); - *readbytes += containersize; - if(*readbytes > maxbytes) {// data is corrupted? - fprintf(stderr, "Running out of bytes while reading a run container.\n"); - ra_clear(answer);// we need to clear the containers already allocated, and the roaring array - return false; - } - // it is now safe to read - - run_container_t *c = run_container_create(); - if(c == NULL) {// memory allocation failure - fprintf(stderr, "Failed to allocate memory for a run container.\n"); - ra_clear(answer);// we need to clear the containers already allocated, and the roaring array - return false; - } - answer->size++; - buf += run_container_read(thiscard, c, buf); - answer->containers[k] = c; - answer->typecodes[k] = RUN_CONTAINER_TYPE_CODE; - } else { - // we check that the read is allowed - size_t containersize = thiscard * sizeof(uint16_t); - *readbytes += containersize; - if(*readbytes > maxbytes) {// data is corrupted? - fprintf(stderr, "Running out of bytes while reading an array container.\n"); - ra_clear(answer);// we need to clear the containers already allocated, and the roaring array - return false; - } - // it is now safe to read - array_container_t *c = - array_container_create_given_capacity(thiscard); - if(c == NULL) {// memory allocation failure - fprintf(stderr, "Failed to allocate memory for an array container.\n"); - ra_clear(answer);// we need to clear the containers already allocated, and the roaring array - return false; - } - answer->size++; - buf += array_container_read(thiscard, c, buf); - answer->containers[k] = c; - answer->typecodes[k] = ARRAY_CONTAINER_TYPE_CODE; - } - } - return true; -} -/* end file src/roaring_array.c */ -/* begin file src/roaring_priority_queue.c */ +extern "C" { namespace roaring { namespace api { +#endif struct roaring_pq_element_s { - uint64_t size; - bool is_temporary; - roaring_bitmap_t *bitmap; +uint64_t size; +bool is_temporary; +roaring_bitmap_t *bitmap; }; typedef struct roaring_pq_element_s roaring_pq_element_t; struct roaring_pq_s { - roaring_pq_element_t *elements; - uint64_t size; +roaring_pq_element_t *elements; +uint64_t size; }; typedef struct roaring_pq_s roaring_pq_t; static inline bool compare(roaring_pq_element_t *t1, roaring_pq_element_t *t2) { - return t1->size < t2->size; +return t1->size < t2->size; } static void pq_add(roaring_pq_t *pq, roaring_pq_element_t *t) { - uint64_t i = pq->size; - pq->elements[pq->size++] = *t; - while (i > 0) { - uint64_t p = (i - 1) >> 1; - roaring_pq_element_t ap = pq->elements[p]; - if (!compare(t, &ap)) break; - pq->elements[i] = ap; - i = p; - } - pq->elements[i] = *t; +uint64_t i = pq->size; +pq->elements[pq->size++] = *t; +while (i > 0) { +uint64_t p = (i - 1) >> 1; +roaring_pq_element_t ap = pq->elements[p]; +if (!compare(t, &ap)) break; +pq->elements[i] = ap; +i = p; +} +pq->elements[i] = *t; } static void pq_free(roaring_pq_t *pq) { - free(pq->elements); - pq->elements = NULL; // paranoid - free(pq); +roaring_free(pq); } static void percolate_down(roaring_pq_t *pq, uint32_t i) { - uint32_t size = (uint32_t)pq->size; - uint32_t hsize = size >> 1; - roaring_pq_element_t ai = pq->elements[i]; - while (i < hsize) { - uint32_t l = (i << 1) + 1; - uint32_t r = l + 1; - roaring_pq_element_t bestc = pq->elements[l]; - if (r < size) { - if (compare(pq->elements + r, &bestc)) { - l = r; - bestc = pq->elements[r]; - } - } - if (!compare(&bestc, &ai)) { - break; - } - pq->elements[i] = bestc; - i = l; - } - pq->elements[i] = ai; +uint32_t size = (uint32_t)pq->size; +uint32_t hsize = size >> 1; +roaring_pq_element_t ai = pq->elements[i]; +while (i < hsize) { +uint32_t l = (i << 1) + 1; +uint32_t r = l + 1; +roaring_pq_element_t bestc = pq->elements[l]; +if (r < size) { +if (compare(pq->elements + r, &bestc)) { +l = r; +bestc = pq->elements[r]; +} +} +if (!compare(&bestc, &ai)) { +break; +} +pq->elements[i] = bestc; +i = l; +} +pq->elements[i] = ai; } static roaring_pq_t *create_pq(const roaring_bitmap_t **arr, uint32_t length) { - roaring_pq_t *answer = (roaring_pq_t *)malloc(sizeof(roaring_pq_t)); - answer->elements = - (roaring_pq_element_t *)malloc(sizeof(roaring_pq_element_t) * length); - answer->size = length; - for (uint32_t i = 0; i < length; i++) { - answer->elements[i].bitmap = (roaring_bitmap_t *)arr[i]; - answer->elements[i].is_temporary = false; - answer->elements[i].size = - roaring_bitmap_portable_size_in_bytes(arr[i]); - } - for (int32_t i = (length >> 1); i >= 0; i--) { - percolate_down(answer, i); - } - return answer; +size_t alloc_size = sizeof(roaring_pq_t) + sizeof(roaring_pq_element_t) * length; +roaring_pq_t *answer = (roaring_pq_t *)roaring_malloc(alloc_size); +answer->elements = (roaring_pq_element_t *)(answer + 1); +answer->size = length; +for (uint32_t i = 0; i < length; i++) { +answer->elements[i].bitmap = (roaring_bitmap_t *)arr[i]; +answer->elements[i].is_temporary = false; +answer->elements[i].size = +roaring_bitmap_portable_size_in_bytes(arr[i]); +} +for (int32_t i = (length >> 1); i >= 0; i--) { +percolate_down(answer, i); +} +return answer; } static roaring_pq_element_t pq_poll(roaring_pq_t *pq) { - roaring_pq_element_t ans = *pq->elements; - if (pq->size > 1) { - pq->elements[0] = pq->elements[--pq->size]; - percolate_down(pq, 0); - } else - --pq->size; - // memmove(pq->elements,pq->elements+1,(pq->size-1)*sizeof(roaring_pq_element_t));--pq->size; - return ans; +roaring_pq_element_t ans = *pq->elements; +if (pq->size > 1) { +pq->elements[0] = pq->elements[--pq->size]; +percolate_down(pq, 0); +} else +--pq->size; +// memmove(pq->elements,pq->elements+1,(pq->size-1)*sizeof(roaring_pq_element_t));--pq->size; +return ans; } // this function consumes and frees the inputs static roaring_bitmap_t *lazy_or_from_lazy_inputs(roaring_bitmap_t *x1, - roaring_bitmap_t *x2) { - uint8_t container_result_type = 0; - const int length1 = ra_get_size(&x1->high_low_container), - length2 = ra_get_size(&x2->high_low_container); - if (0 == length1) { - roaring_bitmap_free(x1); - return x2; - } - if (0 == length2) { - roaring_bitmap_free(x2); - return x1; - } - uint32_t neededcap = length1 > length2 ? length2 : length1; - roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap); - int pos1 = 0, pos2 = 0; - uint8_t container_type_1, container_type_2; - uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - while (true) { - if (s1 == s2) { - // todo: unsharing can be inefficient as it may create a clone where - // none - // is needed, but it has the benefit of being easy to reason about. - ra_unshare_container_at_index(&x1->high_low_container, pos1); - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - assert(container_type_1 != SHARED_CONTAINER_TYPE_CODE); - ra_unshare_container_at_index(&x2->high_low_container, pos2); - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - assert(container_type_2 != SHARED_CONTAINER_TYPE_CODE); - void *c; - - if ((container_type_2 == BITSET_CONTAINER_TYPE_CODE) && - (container_type_1 != BITSET_CONTAINER_TYPE_CODE)) { - c = container_lazy_ior(c2, container_type_2, c1, - container_type_1, - &container_result_type); - container_free(c1, container_type_1); - if (c != c2) { - container_free(c2, container_type_2); - } - } else { - c = container_lazy_ior(c1, container_type_1, c2, - container_type_2, - &container_result_type); - container_free(c2, container_type_2); - if (c != c1) { - container_free(c1, container_type_1); - } - } - // since we assume that the initial containers are non-empty, the - // result here - // can only be non-empty - ra_append(&answer->high_low_container, s1, c, - container_result_type); - ++pos1; - ++pos2; - if (pos1 == length1) break; - if (pos2 == length2) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - - } else if (s1 < s2) { // s1 < s2 - void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, - &container_type_1); - ra_append(&answer->high_low_container, s1, c1, container_type_1); - pos1++; - if (pos1 == length1) break; - s1 = ra_get_key_at_index(&x1->high_low_container, pos1); - - } else { // s1 > s2 - void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, - &container_type_2); - ra_append(&answer->high_low_container, s2, c2, container_type_2); - pos2++; - if (pos2 == length2) break; - s2 = ra_get_key_at_index(&x2->high_low_container, pos2); - } - } - if (pos1 == length1) { - ra_append_move_range(&answer->high_low_container, - &x2->high_low_container, pos2, length2); - } else if (pos2 == length2) { - ra_append_move_range(&answer->high_low_container, - &x1->high_low_container, pos1, length1); - } - ra_clear_without_containers(&x1->high_low_container); - ra_clear_without_containers(&x2->high_low_container); - free(x1); - free(x2); - return answer; +roaring_bitmap_t *x2) { +uint8_t result_type = 0; +const int length1 = ra_get_size(&x1->high_low_container), +length2 = ra_get_size(&x2->high_low_container); +if (0 == length1) { +roaring_bitmap_free(x1); +return x2; +} +if (0 == length2) { +roaring_bitmap_free(x2); +return x1; +} +uint32_t neededcap = length1 > length2 ? length2 : length1; +roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap); +int pos1 = 0, pos2 = 0; +uint8_t type1, type2; +uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); +uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); +while (true) { +if (s1 == s2) { +// todo: unsharing can be inefficient as it may create a clone where +// none +// is needed, but it has the benefit of being easy to reason about. + +ra_unshare_container_at_index(&x1->high_low_container, pos1); +container_t *c1 = ra_get_container_at_index( +&x1->high_low_container, pos1, &type1); +assert(type1 != SHARED_CONTAINER_TYPE); + +ra_unshare_container_at_index(&x2->high_low_container, pos2); +container_t *c2 = ra_get_container_at_index( +&x2->high_low_container, pos2, &type2); +assert(type2 != SHARED_CONTAINER_TYPE); + +container_t *c; + +if ((type2 == BITSET_CONTAINER_TYPE) && +(type1 != BITSET_CONTAINER_TYPE) +){ +c = container_lazy_ior(c2, type2, c1, type1, &result_type); +container_free(c1, type1); +if (c != c2) { +container_free(c2, type2); +} +} else { +c = container_lazy_ior(c1, type1, c2, type2, &result_type); +container_free(c2, type2); +if (c != c1) { +container_free(c1, type1); +} +} +// since we assume that the initial containers are non-empty, the +// result here +// can only be non-empty +ra_append(&answer->high_low_container, s1, c, result_type); +++pos1; +++pos2; +if (pos1 == length1) break; +if (pos2 == length2) break; +s1 = ra_get_key_at_index(&x1->high_low_container, pos1); +s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + +} else if (s1 < s2) { // s1 < s2 +container_t *c1 = ra_get_container_at_index( +&x1->high_low_container, pos1, &type1); +ra_append(&answer->high_low_container, s1, c1, type1); +pos1++; +if (pos1 == length1) break; +s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + +} else { // s1 > s2 +container_t *c2 = ra_get_container_at_index( +&x2->high_low_container, pos2, &type2); +ra_append(&answer->high_low_container, s2, c2, type2); +pos2++; +if (pos2 == length2) break; +s2 = ra_get_key_at_index(&x2->high_low_container, pos2); +} +} +if (pos1 == length1) { +ra_append_move_range(&answer->high_low_container, +&x2->high_low_container, pos2, length2); +} else if (pos2 == length2) { +ra_append_move_range(&answer->high_low_container, +&x1->high_low_container, pos1, length1); +} +ra_clear_without_containers(&x1->high_low_container); +ra_clear_without_containers(&x2->high_low_container); +roaring_free(x1); +roaring_free(x2); +return answer; } /** @@ -11590,51 +21118,55 @@ static roaring_bitmap_t *lazy_or_from_lazy_inputs(roaring_bitmap_t *x1, * result. */ roaring_bitmap_t *roaring_bitmap_or_many_heap(uint32_t number, - const roaring_bitmap_t **x) { - if (number == 0) { - return roaring_bitmap_create(); - } - if (number == 1) { - return roaring_bitmap_copy(x[0]); - } - roaring_pq_t *pq = create_pq(x, number); - while (pq->size > 1) { - roaring_pq_element_t x1 = pq_poll(pq); - roaring_pq_element_t x2 = pq_poll(pq); - - if (x1.is_temporary && x2.is_temporary) { - roaring_bitmap_t *newb = - lazy_or_from_lazy_inputs(x1.bitmap, x2.bitmap); - // should normally return a fresh new bitmap *except* that - // it can return x1.bitmap or x2.bitmap in degenerate cases - bool temporary = !((newb == x1.bitmap) && (newb == x2.bitmap)); - uint64_t bsize = roaring_bitmap_portable_size_in_bytes(newb); - roaring_pq_element_t newelement = { - .size = bsize, .is_temporary = temporary, .bitmap = newb}; - pq_add(pq, &newelement); - } else if (x2.is_temporary) { - roaring_bitmap_lazy_or_inplace(x2.bitmap, x1.bitmap, false); - x2.size = roaring_bitmap_portable_size_in_bytes(x2.bitmap); - pq_add(pq, &x2); - } else if (x1.is_temporary) { - roaring_bitmap_lazy_or_inplace(x1.bitmap, x2.bitmap, false); - x1.size = roaring_bitmap_portable_size_in_bytes(x1.bitmap); - - pq_add(pq, &x1); - } else { - roaring_bitmap_t *newb = - roaring_bitmap_lazy_or(x1.bitmap, x2.bitmap, false); - uint64_t bsize = roaring_bitmap_portable_size_in_bytes(newb); - roaring_pq_element_t newelement = { - .size = bsize, .is_temporary = true, .bitmap = newb}; - - pq_add(pq, &newelement); - } - } - roaring_pq_element_t X = pq_poll(pq); - roaring_bitmap_t *answer = X.bitmap; - roaring_bitmap_repair_after_lazy(answer); - pq_free(pq); - return answer; -} +const roaring_bitmap_t **x) { +if (number == 0) { +return roaring_bitmap_create(); +} +if (number == 1) { +return roaring_bitmap_copy(x[0]); +} +roaring_pq_t *pq = create_pq(x, number); +while (pq->size > 1) { +roaring_pq_element_t x1 = pq_poll(pq); +roaring_pq_element_t x2 = pq_poll(pq); + +if (x1.is_temporary && x2.is_temporary) { +roaring_bitmap_t *newb = +lazy_or_from_lazy_inputs(x1.bitmap, x2.bitmap); +// should normally return a fresh new bitmap *except* that +// it can return x1.bitmap or x2.bitmap in degenerate cases +bool temporary = !((newb == x1.bitmap) && (newb == x2.bitmap)); +uint64_t bsize = roaring_bitmap_portable_size_in_bytes(newb); +roaring_pq_element_t newelement = { +.size = bsize, .is_temporary = temporary, .bitmap = newb}; +pq_add(pq, &newelement); +} else if (x2.is_temporary) { +roaring_bitmap_lazy_or_inplace(x2.bitmap, x1.bitmap, false); +x2.size = roaring_bitmap_portable_size_in_bytes(x2.bitmap); +pq_add(pq, &x2); +} else if (x1.is_temporary) { +roaring_bitmap_lazy_or_inplace(x1.bitmap, x2.bitmap, false); +x1.size = roaring_bitmap_portable_size_in_bytes(x1.bitmap); + +pq_add(pq, &x1); +} else { +roaring_bitmap_t *newb = +roaring_bitmap_lazy_or(x1.bitmap, x2.bitmap, false); +uint64_t bsize = roaring_bitmap_portable_size_in_bytes(newb); +roaring_pq_element_t newelement = { +.size = bsize, .is_temporary = true, .bitmap = newb}; + +pq_add(pq, &newelement); +} +} +roaring_pq_element_t X = pq_poll(pq); +roaring_bitmap_t *answer = X.bitmap; +roaring_bitmap_repair_after_lazy(answer); +pq_free(pq); +return answer; +} + +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace api { +#endif /* end file src/roaring_priority_queue.c */ diff --git a/roaring.h b/roaring.h index 9515bd2..6c2ff00 100644 --- a/roaring.h +++ b/roaring.h @@ -1,135 +1,365 @@ -/* auto-generated on Sat Jun 27 12:40:38 2020. Do not edit! */ +// !!! DO NOT EDIT - THIS IS AN AUTO-GENERATED FILE !!! +// Created by amalgamation.sh on 2023-09-27T16:30:23Z + +/* + * The CRoaring project is under a dual license (Apache/MIT). + * Users of the library may choose one or the other license. + */ +/* + * Copyright 2016-2022 The CRoaring authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 + */ +/* + * MIT License + * + * Copyright 2016-2022 The CRoaring authors + * + * Permission is hereby granted, free of charge, to any + * person obtaining a copy of this software and associated + * documentation files (the "Software"), to deal in the + * Software without restriction, including without + * limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software + * is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice + * shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF + * ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED + * TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT + * SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR + * IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * SPDX-License-Identifier: MIT + */ + /* begin file include/roaring/roaring_version.h */ -// /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand -#ifndef ROARING_INCLUDE_ROARING_VERSION -#define ROARING_INCLUDE_ROARING_VERSION -#define ROARING_VERSION = 0.2.66, -enum { - ROARING_VERSION_MAJOR = 0, - ROARING_VERSION_MINOR = 2, - ROARING_VERSION_REVISION = 66 -}; -#endif // ROARING_INCLUDE_ROARING_VERSION +// /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand +#ifndef ROARING_INCLUDE_ROARING_VERSION +#define ROARING_INCLUDE_ROARING_VERSION +#define ROARING_VERSION "2.0.2" +enum { + ROARING_VERSION_MAJOR = 2, + ROARING_VERSION_MINOR = 0, + ROARING_VERSION_REVISION = 2 +}; +#endif // ROARING_INCLUDE_ROARING_VERSION /* end file include/roaring/roaring_version.h */ +/* begin file include/roaring/roaring_types.h */ +/* + Typedefs used by various components +*/ + +#ifndef ROARING_TYPES_H +#define ROARING_TYPES_H + +#include +#include + +#ifdef __cplusplus +extern "C" { namespace roaring { namespace api { +#endif + + +/** + * When building .c files as C++, there's added compile-time checking if the + * container types are derived from a `container_t` base class. So long as + * such a base class is empty, the struct will behave compatibly with C structs + * despite the derivation. This is due to the Empty Base Class Optimization: + * + * https://en.cppreference.com/w/cpp/language/ebo + * + * But since C isn't namespaced, taking `container_t` globally might collide + * with other projects. So roaring.h uses ROARING_CONTAINER_T, while internal + * code #undefs that after declaring `typedef ROARING_CONTAINER_T container_t;` + */ +#if defined(__cplusplus) +extern "C++" { + struct container_s {}; + } + #define ROARING_CONTAINER_T ::roaring::api::container_s +#else +#define ROARING_CONTAINER_T void // no compile-time checking +#endif + +#define ROARING_FLAG_COW UINT8_C(0x1) +#define ROARING_FLAG_FROZEN UINT8_C(0x2) + +/** + * Roaring arrays are array-based key-value pairs having containers as values + * and 16-bit integer keys. A roaring bitmap might be implemented as such. + */ + +// parallel arrays. Element sizes quite different. +// Alternative is array +// of structs. Which would have better +// cache performance through binary searches? + +typedef struct roaring_array_s { + int32_t size; + int32_t allocation_size; + ROARING_CONTAINER_T **containers; // Use container_t in non-API files! + uint16_t *keys; + uint8_t *typecodes; + uint8_t flags; +} roaring_array_t; + + +typedef bool (*roaring_iterator)(uint32_t value, void *param); +typedef bool (*roaring_iterator64)(uint64_t value, void *param); + +/** +* (For advanced users.) +* The roaring_statistics_t can be used to collect detailed statistics about +* the composition of a roaring bitmap. +*/ +typedef struct roaring_statistics_s { + uint32_t n_containers; /* number of containers */ + + uint32_t n_array_containers; /* number of array containers */ + uint32_t n_run_containers; /* number of run containers */ + uint32_t n_bitset_containers; /* number of bitmap containers */ + + uint32_t + n_values_array_containers; /* number of values in array containers */ + uint32_t n_values_run_containers; /* number of values in run containers */ + uint32_t + n_values_bitset_containers; /* number of values in bitmap containers */ + + uint32_t n_bytes_array_containers; /* number of allocated bytes in array + containers */ + uint32_t n_bytes_run_containers; /* number of allocated bytes in run + containers */ + uint32_t n_bytes_bitset_containers; /* number of allocated bytes in bitmap + containers */ + + uint32_t + max_value; /* the maximal value, undefined if cardinality is zero */ + uint32_t + min_value; /* the minimal value, undefined if cardinality is zero */ + uint64_t sum_value; /* the sum of all values (could be used to compute + average) */ + + uint64_t cardinality; /* total number of values stored in the bitmap */ + + // and n_values_arrays, n_values_rle, n_values_bitmap +} roaring_statistics_t; + +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace api { +#endif + +#endif /* ROARING_TYPES_H */ +/* end file include/roaring/roaring_types.h */ /* begin file include/roaring/portability.h */ /* * portability.h * */ +/** + * All macros should be prefixed with either CROARING or ROARING. + * The library uses both ROARING_... + * as well as CROAIRING_ as prefixes. The ROARING_ prefix is for + * macros that are provided by the build system or that are closely + * related to the format. The header macros may also use ROARING_. + * The CROARING_ prefix is for internal macros that a user is unlikely + * to ever interact with. + */ + #ifndef INCLUDE_PORTABILITY_H_ #define INCLUDE_PORTABILITY_H_ #ifndef _GNU_SOURCE -#define _GNU_SOURCE -#endif +#define _GNU_SOURCE 1 +#endif // _GNU_SOURCE #ifndef __STDC_FORMAT_MACROS #define __STDC_FORMAT_MACROS 1 +#endif // __STDC_FORMAT_MACROS + +#ifdef _MSC_VER +#define CROARING_VISUAL_STUDIO 1 +/** + * We want to differentiate carefully between + * clang under visual studio and regular visual + * studio. + */ +#ifdef __clang__ +// clang under visual studio +#define CROARING_CLANG_VISUAL_STUDIO 1 +#else +// just regular visual studio (best guess) +#define CROARING_REGULAR_VISUAL_STUDIO 1 +#endif // __clang__ +#endif // _MSC_VER +#ifndef CROARING_VISUAL_STUDIO +#define CROARING_VISUAL_STUDIO 0 +#endif +#ifndef CROARING_CLANG_VISUAL_STUDIO +#define CROARING_CLANG_VISUAL_STUDIO 0 +#endif +#ifndef CROARING_REGULAR_VISUAL_STUDIO +#define CROARING_REGULAR_VISUAL_STUDIO 0 #endif -#if !(defined(_POSIX_C_SOURCE)) || (_POSIX_C_SOURCE < 200809L) -#define _POSIX_C_SOURCE 200809L +#if defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE < 200809L) +#undef _POSIX_C_SOURCE #endif + +#ifndef _POSIX_C_SOURCE +#define _POSIX_C_SOURCE 200809L +#endif // !(defined(_POSIX_C_SOURCE)) || (_POSIX_C_SOURCE < 200809L) #if !(defined(_XOPEN_SOURCE)) || (_XOPEN_SOURCE < 700) #define _XOPEN_SOURCE 700 +#endif // !(defined(_XOPEN_SOURCE)) || (_XOPEN_SOURCE < 700) + +#ifdef __illumos__ +#define __EXTENSIONS__ #endif #include #include #include // will provide posix_memalign with _POSIX_C_SOURCE as defined above -#if !(defined(__APPLE__)) && !(defined(__FreeBSD__)) +#ifdef __GLIBC__ #include // this should never be needed but there are some reports that it is needed. #endif - -#if defined(_MSC_VER) && !defined(__clang__) && !defined(_WIN64) && !defined(ROARING_ACK_32BIT) -#pragma message( \ - "You appear to be attempting a 32-bit build under Visual Studio. We recommend a 64-bit build instead.") +#ifdef __cplusplus +extern "C" { // portability definitions are in global scope, not a namespace #endif #if defined(__SIZEOF_LONG_LONG__) && __SIZEOF_LONG_LONG__ != 8 #error This code assumes 64-bit long longs (by use of the GCC intrinsics). Your system is not currently supported. #endif -#if defined(_MSC_VER) +#if CROARING_REGULAR_VISUAL_STUDIO +#ifndef __restrict__ #define __restrict__ __restrict -#endif +#endif // __restrict__ +#endif // CROARING_REGULAR_VISUAL_STUDIO + -#ifndef DISABLE_X64 // some users may want to compile as if they did not have - // an x64 processor - -/////////////////////// -/// We support X64 hardware in the following manner: -/// -/// if IS_X64 is defined then we have at least SSE and SSE2 -/// (All Intel processors sold in the recent past have at least SSE and SSE2 support, -/// going back to the Pentium 4.) -/// -/// if USESSE4 is defined then we assume at least SSE4.2, SSE4.1, -/// SSSE3, SSE3... + IS_X64 -/// if USEAVX is defined, then we assume AVX2, AVX + USESSE4 -/// -/// So if you have hardware that supports AVX but not AVX2, then "USEAVX" -/// won't be enabled. -/// If you have hardware that supports SSE4.1, but not SSE4.2, then USESSE4 -/// won't be defined. -////////////////////// - -// unless DISABLEAVX was defined, if we have __AVX2__, we enable AVX -#if (!defined(USEAVX)) && (!defined(DISABLEAVX)) && (defined(__AVX2__)) -#define USEAVX + +#if defined(__x86_64__) || defined(_M_X64) +// we have an x64 processor +#define CROARING_IS_X64 1 + +#if defined(_MSC_VER) && (_MSC_VER < 1910) +// Old visual studio systems won't support AVX2 well. +#undef CROARING_IS_X64 #endif -// if we have __SSE4_2__, we enable SSE4 -#if (defined(__POPCNT__)) && (defined(__SSE4_2__)) -#define USESSE4 +#if defined(__clang_major__) && (__clang_major__<= 8) && !defined(__AVX2__) +// Older versions of clang have a bug affecting us +// https://stackoverflow.com/questions/57228537/how-does-one-use-pragma-clang-attribute-push-with-c-namespaces +#undef CROARING_IS_X64 #endif -#if defined(USEAVX) || defined(__x86_64__) || defined(_M_X64) -// we have an x64 processor -#define IS_X64 +#ifdef ROARING_DISABLE_X64 +#undef CROARING_IS_X64 +#endif // we include the intrinsic header -#ifndef _MSC_VER +#if !CROARING_REGULAR_VISUAL_STUDIO /* Non-Microsoft C/C++-compatible compiler */ #include // on some recent GCC, this will declare posix_memalign -#endif -#endif -#if !defined(USENEON) && !defined(DISABLENEON) && defined(__ARM_NEON) -# define USENEON + + +#if CROARING_CLANG_VISUAL_STUDIO + +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + * e.g., if __AVX2__ is set... in turn, we normally set these + * macros by compiling against the corresponding architecture + * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole + * software with these advanced instructions. These headers would + * normally guard against such usage, but we carefully included + * (or ) before, so the headers + * are fooled. + */ +#include // for _blsr_u64 +#include // for __lzcnt64 +#include // for most things (AVX2, AVX512, _popcnt64) +#include +#include +#include +#include +#include +#if _MSC_VER >= 1920 +// Important: we need the AVX-512 headers: +#include +#include +#include +#include +#include +#include +#include +#include +#endif // _MSC_VER >= 1920 +// unfortunately, we may not get _blsr_u64, but, thankfully, clang +// has it as a macro. +#ifndef _blsr_u64 +// we roll our own +#define _blsr_u64(n) ((n - 1) & n) +#endif // _blsr_u64 +#endif // SIMDJSON_CLANG_VISUAL_STUDIO + + +#endif // CROARING_REGULAR_VISUAL_STUDIO +#endif // defined(__x86_64__) || defined(_M_X64) + +#if !defined(CROARING_USENEON) && !defined(DISABLENEON) && defined(__ARM_NEON) +# define CROARING_USENEON #endif -#if defined(USENEON) +#if defined(CROARING_USENEON) # include #endif -#ifndef _MSC_VER +#if !CROARING_REGULAR_VISUAL_STUDIO /* Non-Microsoft C/C++-compatible compiler, assumes that it supports inline * assembly */ -#define ROARING_INLINE_ASM -#endif - -#ifdef USEAVX -#define USESSE4 // if we have AVX, then we have SSE4 -#define USE_BMI // we assume that AVX2 and BMI go hand and hand -#define USEAVX2FORDECODING // optimization -// vector operations should work on not just AVX -#define ROARING_VECTOR_OPERATIONS_ENABLED // vector unions (optimization) -#endif +#define CROARING_INLINE_ASM 1 +#endif // _MSC_VER -#endif // DISABLE_X64 - -#ifdef _MSC_VER +#if CROARING_REGULAR_VISUAL_STUDIO /* Microsoft C/C++-compatible compiler */ #include #ifndef __clang__ // if one compiles with MSVC *with* clang, then these // intrinsics are defined!!! +#define CROARING_INTRINSICS 1 // sadly there is no way to check whether we are missing these intrinsics // specifically. -/* wrappers for Visual Studio built-ins that look like gcc built-ins */ +/* wrappers for Visual Studio built-ins that look like gcc built-ins __builtin_ctzll */ /* result might be undefined when input_num is zero */ -static inline int __builtin_ctzll(unsigned long long input_num) { +inline int roaring_trailing_zeroes(unsigned long long input_num) { unsigned long index; #ifdef _WIN64 // highly recommended!!! _BitScanForward64(&index, input_num); @@ -140,12 +370,13 @@ static inline int __builtin_ctzll(unsigned long long input_num) { _BitScanForward(&index, (uint32_t)(input_num >> 32)); index += 32; } -#endif +#endif // _WIN64 return index; } +/* wrappers for Visual Studio built-ins that look like gcc built-ins __builtin_clzll */ /* result might be undefined when input_num is zero */ -static inline int __builtin_clzll(unsigned long long input_num) { +inline int roaring_leading_zeroes(unsigned long long input_num) { unsigned long index; #ifdef _WIN64 // highly recommended!!! _BitScanReverse64(&index, input_num); @@ -156,82 +387,33 @@ static inline int __builtin_clzll(unsigned long long input_num) { } else { _BitScanReverse(&index, (uint32_t)(input_num)); } -#endif +#endif // _WIN64 return 63 - index; } -/* result might be undefined when input_num is zero */ -#ifdef USESSE4 -/* POPCNT support was added to processors around the release of SSE4.2 */ -/* USESSE4 flag guarantees POPCNT support */ -static inline int __builtin_popcountll(unsigned long long input_num) { -#ifdef _WIN64 // highly recommended!!! - return (int)__popcnt64(input_num); -#else // if we must support 32-bit Windows - return (int)(__popcnt((uint32_t)input_num) + - __popcnt((uint32_t)(input_num >> 32))); -#endif -} -#else -/* software implementation avoids POPCNT */ -static inline int __builtin_popcountll(unsigned long long input_num) { - const uint64_t m1 = 0x5555555555555555; //binary: 0101... - const uint64_t m2 = 0x3333333333333333; //binary: 00110011.. - const uint64_t m4 = 0x0f0f0f0f0f0f0f0f; //binary: 4 zeros, 4 ones ... - const uint64_t h01 = 0x0101010101010101; //the sum of 256 to the power of 0,1,2,3... - - input_num -= (input_num >> 1) & m1; - input_num = (input_num & m2) + ((input_num >> 2) & m2); - input_num = (input_num + (input_num >> 4)) & m4; - return (input_num * h01) >> 56; -} -#endif - /* Use #define so this is effective even under /Ob0 (no inline) */ -#define __builtin_unreachable() __assume(0) -#endif - -#endif +#define roaring_unreachable __assume(0) +#endif // __clang__ -// without the following, we get lots of warnings about posix_memalign -#ifndef __cplusplus -extern int posix_memalign(void **__memptr, size_t __alignment, size_t __size); -#endif //__cplusplus // C++ does not have a well defined signature +#endif // CROARING_REGULAR_VISUAL_STUDIO -// portable version of posix_memalign -static inline void *roaring_bitmap_aligned_malloc(size_t alignment, size_t size) { - void *p; -#ifdef _MSC_VER - p = _aligned_malloc(size, alignment); -#elif defined(__MINGW32__) || defined(__MINGW64__) - p = __mingw_aligned_malloc(size, alignment); -#else - // somehow, if this is used before including "x86intrin.h", it creates an - // implicit defined warning. - if (posix_memalign(&p, alignment, size) != 0) return NULL; -#endif - return p; -} - -static inline void roaring_bitmap_aligned_free(void *memblock) { -#ifdef _MSC_VER - _aligned_free(memblock); -#elif defined(__MINGW32__) || defined(__MINGW64__) - __mingw_aligned_free(memblock); -#else - free(memblock); +#ifndef CROARING_INTRINSICS +#define CROARING_INTRINSICS 1 +#define roaring_unreachable __builtin_unreachable() +static inline int roaring_trailing_zeroes(unsigned long long input_num) { return __builtin_ctzll(input_num); } +static inline int roaring_leading_zeroes(unsigned long long input_num) { return __builtin_clzll(input_num); } #endif -} -#if defined(_MSC_VER) +#if CROARING_REGULAR_VISUAL_STUDIO #define ALIGNED(x) __declspec(align(x)) -#else -#if defined(__GNUC__) +#elif defined(__GNUC__) || defined(__clang__) #define ALIGNED(x) __attribute__((aligned(x))) -#endif +#else +#warning "Warning. Unrecognized compiler." +#define ALIGNED(x) #endif -#ifdef __GNUC__ +#if defined(__GNUC__) || defined(__clang__) #define WARN_UNUSED __attribute__((warn_unused_result)) #else #define WARN_UNUSED @@ -239,6240 +421,629 @@ static inline void roaring_bitmap_aligned_free(void *memblock) { #define IS_BIG_ENDIAN (*(uint16_t *)"\0\xff" < 0x100) -static inline int hamming(uint64_t x) { -#ifdef USESSE4 - return (int) _mm_popcnt_u64(x); +#ifdef CROARING_USENEON +// we can always compute the popcount fast. +#elif (defined(_M_ARM) || defined(_M_ARM64)) && ((defined(_WIN64) || defined(_WIN32)) && defined(CROARING_REGULAR_VISUAL_STUDIO) && CROARING_REGULAR_VISUAL_STUDIO) +// we will need this function: +static inline int roaring_hamming_backup(uint64_t x) { + uint64_t c1 = UINT64_C(0x5555555555555555); + uint64_t c2 = UINT64_C(0x3333333333333333); + uint64_t c4 = UINT64_C(0x0F0F0F0F0F0F0F0F); + x -= (x >> 1) & c1; + x = (( x >> 2) & c2) + (x & c2); x=(x +(x>>4))&c4; + x *= UINT64_C(0x0101010101010101); + return x >> 56; +} +#endif + + +static inline int roaring_hamming(uint64_t x) { +#if defined(_WIN64) && defined(CROARING_REGULAR_VISUAL_STUDIO) && CROARING_REGULAR_VISUAL_STUDIO + #ifdef CROARING_USENEON + return vaddv_u8(vcnt_u8(vcreate_u8(input_num))); +#elif defined(_M_ARM64) + return roaring_hamming_backup(x); + // (int) _CountOneBits64(x); is unavailable +#else // _M_ARM64 + return (int) __popcnt64(x); +#endif // _M_ARM64 +#elif defined(_WIN32) && defined(CROARING_REGULAR_VISUAL_STUDIO) && CROARING_REGULAR_VISUAL_STUDIO + #ifdef _M_ARM + return roaring_hamming_backup(x); + // _CountOneBits is unavailable +#else // _M_ARM + return (int) __popcnt(( unsigned int)x) + (int) __popcnt(( unsigned int)(x>>32)); +#endif // _M_ARM #else - // won't work under visual studio, but hopeful we have _mm_popcnt_u64 in - // many cases return __builtin_popcountll(x); #endif } #ifndef UINT64_C #define UINT64_C(c) (c##ULL) -#endif +#endif // UINT64_C #ifndef UINT32_C #define UINT32_C(c) (c##UL) -#endif +#endif // UINT32_C -#endif /* INCLUDE_PORTABILITY_H_ */ -/* end file include/roaring/portability.h */ -/* begin file include/roaring/containers/perfparameters.h */ -#ifndef PERFPARAMETERS_H_ -#define PERFPARAMETERS_H_ +#ifdef __cplusplus +} // extern "C" { +#endif // __cplusplus -#include -/** -During lazy computations, we can transform array containers into bitset -containers as -long as we can expect them to have ARRAY_LAZY_LOWERBOUND values. -*/ -enum { ARRAY_LAZY_LOWERBOUND = 1024 }; +// this is almost standard? +#undef STRINGIFY_IMPLEMENTATION_ +#undef STRINGIFY +#define STRINGIFY_IMPLEMENTATION_(a) #a +#define STRINGIFY(a) STRINGIFY_IMPLEMENTATION_(a) -/* default initial size of a run container - setting it to zero delays the malloc.*/ -enum { RUN_DEFAULT_INIT_SIZE = 0 }; +// Our fast kernels require 64-bit systems. +// +// On 32-bit x86, we lack 64-bit popcnt, lzcnt, blsr instructions. +// Furthermore, the number of SIMD registers is reduced. +// +// On 32-bit ARM, we would have smaller registers. +// +// The library should still have the fallback kernel. It is +// slower, but it should run everywhere. -/* default initial size of an array container - setting it to zero delays the malloc */ -enum { ARRAY_DEFAULT_INIT_SIZE = 0 }; +// +// Enable valid runtime implementations, and select CROARING_BUILTIN_IMPLEMENTATION +// -/* automatic bitset conversion during lazy or */ -#ifndef LAZY_OR_BITSET_CONVERSION -#define LAZY_OR_BITSET_CONVERSION true +// We are going to use runtime dispatch. +#if CROARING_IS_X64 +#ifdef __clang__ +// clang does not have GCC push pop +// warning: clang attribute push can't be used within a namespace in clang up +// til 8.0 so CROARING_TARGET_REGION and CROARING_UNTARGET_REGION must be *outside* of a +// namespace. +#define CROARING_TARGET_REGION(T) \ + _Pragma(STRINGIFY( \ + clang attribute push(__attribute__((target(T))), apply_to = function))) +#define CROARING_UNTARGET_REGION _Pragma("clang attribute pop") +#elif defined(__GNUC__) +// GCC is easier +#define CROARING_TARGET_REGION(T) \ + _Pragma("GCC push_options") _Pragma(STRINGIFY(GCC target(T))) +#define CROARING_UNTARGET_REGION _Pragma("GCC pop_options") +#endif // clang then gcc + +#endif // CROARING_IS_X64 + +// Default target region macros don't do anything. +#ifndef CROARING_TARGET_REGION +#define CROARING_TARGET_REGION(T) +#define CROARING_UNTARGET_REGION #endif -/* automatically attempt to convert a bitset to a full run during lazy - * evaluation */ -#ifndef LAZY_OR_BITSET_CONVERSION_TO_FULL -#define LAZY_OR_BITSET_CONVERSION_TO_FULL true + +#define CROARING_TARGET_AVX2 CROARING_TARGET_REGION("avx2,bmi,pclmul,lzcnt,popcnt") +#define CROARING_TARGET_AVX512 CROARING_TARGET_REGION("avx2,bmi,bmi2,pclmul,lzcnt,popcnt,avx512f,avx512dq,avx512bw,avx512vbmi2,avx512bitalg,avx512vpopcntdq") +#define CROARING_UNTARGET_AVX2 CROARING_UNTARGET_REGION +#define CROARING_UNTARGET_AVX512 CROARING_UNTARGET_REGION + +#ifdef __AVX2__ +// No need for runtime dispatching. +// It is unnecessary and harmful to old clang to tag regions. +#undef CROARING_TARGET_AVX2 +#define CROARING_TARGET_AVX2 +#undef CROARING_UNTARGET_AVX2 +#define CROARING_UNTARGET_AVX2 #endif -/* automatically attempt to convert a bitset to a full run */ -#ifndef OR_BITSET_CONVERSION_TO_FULL -#define OR_BITSET_CONVERSION_TO_FULL true +#if defined(__AVX512F__) && defined(__AVX512DQ__) && defined(__AVX512BW__) && defined(__AVX512VBMI2__) && defined(__AVX512BITALG__) && defined(__AVX512VPOPCNTDQ__) +// No need for runtime dispatching. +// It is unnecessary and harmful to old clang to tag regions. +#undef CROARING_TARGET_AVX512 +#define CROARING_TARGET_AVX512 +#undef CROARING_UNTARGET_AVX512 +#define CROARING_UNTARGET_AVX512 #endif +// Allow unaligned memory access +#if defined(__GNUC__) || defined(__clang__) +#define ALLOW_UNALIGNED __attribute__((no_sanitize("alignment"))) +#else +#define ALLOW_UNALIGNED #endif -/* end file include/roaring/containers/perfparameters.h */ -/* begin file include/roaring/array_util.h */ -#ifndef ARRAY_UTIL_H -#define ARRAY_UTIL_H -#include // for size_t -#include +#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) +#define CROARING_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +#elif defined(_WIN32) +#define CROARING_IS_BIG_ENDIAN 0 +#else +#if defined(__APPLE__) || defined(__FreeBSD__) // defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__ +#include +#elif defined(sun) || defined(__sun) // defined(__APPLE__) || defined(__FreeBSD__) +#include +#else // defined(__APPLE__) || defined(__FreeBSD__) +#ifdef __has_include +#if __has_include() + #include + #endif //__has_include() +#endif //__has_include -/* - * Good old binary search. - * Assumes that array is sorted, has logarithmic complexity. - * if the result is x, then: - * if ( x>0 ) you have array[x] = ikey - * if ( x<0 ) then inserting ikey at position -x-1 in array (insuring that array[-x-1]=ikey) - * keys the array sorted. - */ -inline int32_t binarySearch(const uint16_t *array, int32_t lenarray, - uint16_t ikey) { - int32_t low = 0; - int32_t high = lenarray - 1; - while (low <= high) { - int32_t middleIndex = (low + high) >> 1; - uint16_t middleValue = array[middleIndex]; - if (middleValue < ikey) { - low = middleIndex + 1; - } else if (middleValue > ikey) { - high = middleIndex - 1; - } else { - return middleIndex; - } - } - return -(low + 1); -} +#endif // defined(__APPLE__) || defined(__FreeBSD__) -/** - * Galloping search - * Assumes that array is sorted, has logarithmic complexity. - * if the result is x, then if x = length, you have that all values in array between pos and length - * are smaller than min. - * otherwise returns the first index x such that array[x] >= min. - */ -static inline int32_t advanceUntil(const uint16_t *array, int32_t pos, - int32_t length, uint16_t min) { - int32_t lower = pos + 1; - if ((lower >= length) || (array[lower] >= min)) { - return lower; - } +#ifndef !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__) +#define CROARING_IS_BIG_ENDIAN 0 +#endif - int32_t spansize = 1; +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define CROARING_IS_BIG_ENDIAN 0 +#else // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define CROARING_IS_BIG_ENDIAN 1 +#endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#endif - while ((lower + spansize < length) && (array[lower + spansize] < min)) { - spansize <<= 1; - } - int32_t upper = (lower + spansize < length) ? lower + spansize : length - 1; +// Defines for the possible CROARING atomic implementations +#define CROARING_ATOMIC_IMPL_NONE 1 +#define CROARING_ATOMIC_IMPL_CPP 2 +#define CROARING_ATOMIC_IMPL_C 3 +#define CROARING_ATOMIC_IMPL_C_WINDOWS 4 + +// If the use has forced a specific implementation, use that, otherwise, +// figure out the best implementation we can use. +#if !defined(CROARING_ATOMIC_IMPL) +#if defined(__cplusplus) && __cplusplus >= 201103L +#ifdef __has_include + #if __has_include() + #define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_CPP + #endif //__has_include() + #else + // We lack __has_include to check: + #define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_CPP + #endif //__has_include +#elif __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_ATOMICS__) +#define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_C +#elif CROARING_REGULAR_VISUAL_STUDIO +// https://www.technetworkhub.com/c11-atomics-in-visual-studio-2022-version-17/ + #define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_C_WINDOWS +#endif +#endif // !defined(CROARING_ATOMIC_IMPL) - if (array[upper] == min) { - return upper; - } - if (array[upper] < min) { - // means - // array - // has no - // item - // >= min - // pos = array.length; - return length; - } +#if CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_C +#include +typedef _Atomic(uint32_t) croaring_refcount_t; - // we know that the next-smallest span was too small - lower += (spansize >> 1); - - int32_t mid = 0; - while (lower + 1 != upper) { - mid = (lower + upper) >> 1; - if (array[mid] == min) { - return mid; - } else if (array[mid] < min) { - lower = mid; - } else { - upper = mid; - } +static inline void croaring_refcount_inc(croaring_refcount_t *val) { + // Increasing the reference counter can always be done with + // memory_order_relaxed: New references to an object can only be formed from + // an existing reference, and passing an existing reference from one thread to + // another must already provide any required synchronization. + atomic_fetch_add_explicit(val, 1, memory_order_relaxed); +} + +static inline bool croaring_refcount_dec(croaring_refcount_t *val) { + // It is important to enforce any possible access to the object in one thread + // (through an existing reference) to happen before deleting the object in a + // different thread. This is achieved by a "release" operation after dropping + // a reference (any access to the object through this reference must obviously + // happened before), and an "acquire" operation before deleting the object. + bool is_zero = atomic_fetch_sub_explicit(val, 1, memory_order_release) == 1; + if (is_zero) { + atomic_thread_fence(memory_order_acquire); } - return upper; + return is_zero; } -/** - * Returns number of elements which are less then $ikey. - * Array elements must be unique and sorted. - */ -static inline int32_t count_less(const uint16_t *array, int32_t lenarray, - uint16_t ikey) { - if (lenarray == 0) return 0; - int32_t pos = binarySearch(array, lenarray, ikey); - return pos >= 0 ? pos : -(pos+1); +static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) { + return atomic_load_explicit(val, memory_order_relaxed); } +#elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_CPP +#include +typedef std::atomic croaring_refcount_t; -/** - * Returns number of elements which are greater then $ikey. - * Array elements must be unique and sorted. - */ -static inline int32_t count_greater(const uint16_t *array, int32_t lenarray, - uint16_t ikey) { - if (lenarray == 0) return 0; - int32_t pos = binarySearch(array, lenarray, ikey); - if (pos >= 0) { - return lenarray - (pos+1); - } else { - return lenarray - (-pos-1); - } +static inline void croaring_refcount_inc(croaring_refcount_t *val) { + val->fetch_add(1, std::memory_order_relaxed); } -/** - * From Schlegel et al., Fast Sorted-Set Intersection using SIMD Instructions - * Optimized by D. Lemire on May 3rd 2013 - * - * C should have capacity greater than the minimum of s_1 and s_b + 8 - * where 8 is sizeof(__m128i)/sizeof(uint16_t). - */ -int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a, - const uint16_t *__restrict__ B, size_t s_b, - uint16_t *C); +static inline bool croaring_refcount_dec(croaring_refcount_t *val) { + // See above comments on the c11 atomic implementation for memory ordering + bool is_zero = val->fetch_sub(1, std::memory_order_release) == 1; + if (is_zero) { + std::atomic_thread_fence(std::memory_order_acquire); + } + return is_zero; +} -/** - * Compute the cardinality of the intersection using SSE4 instructions - */ -int32_t intersect_vector16_cardinality(const uint16_t *__restrict__ A, - size_t s_a, - const uint16_t *__restrict__ B, - size_t s_b); - -/* Computes the intersection between one small and one large set of uint16_t. - * Stores the result into buffer and return the number of elements. */ -int32_t intersect_skewed_uint16(const uint16_t *smallarray, size_t size_s, - const uint16_t *largearray, size_t size_l, - uint16_t *buffer); - -/* Computes the size of the intersection between one small and one large set of - * uint16_t. */ -int32_t intersect_skewed_uint16_cardinality(const uint16_t *smallarray, - size_t size_s, - const uint16_t *largearray, - size_t size_l); - - -/* Check whether the size of the intersection between one small and one large set of uint16_t is non-zero. */ -bool intersect_skewed_uint16_nonempty(const uint16_t *smallarray, size_t size_s, - const uint16_t *largearray, size_t size_l); -/** - * Generic intersection function. - */ -int32_t intersect_uint16(const uint16_t *A, const size_t lenA, - const uint16_t *B, const size_t lenB, uint16_t *out); -/** - * Compute the size of the intersection (generic). - */ -int32_t intersect_uint16_cardinality(const uint16_t *A, const size_t lenA, - const uint16_t *B, const size_t lenB); +static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) { + return val->load(std::memory_order_relaxed); +} +#elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_C_WINDOWS +#include +#pragma intrinsic(_InterlockedIncrement) +#pragma intrinsic(_InterlockedDecrement) -/** - * Checking whether the size of the intersection is non-zero. - */ -bool intersect_uint16_nonempty(const uint16_t *A, const size_t lenA, - const uint16_t *B, const size_t lenB); -/** - * Generic union function. - */ -size_t union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2, - size_t size_2, uint16_t *buffer); +// _InterlockedIncrement and _InterlockedDecrement take a (signed) long, and +// overflow is defined to wrap, so we can pretend it is a uint32_t for our case +typedef volatile long croaring_refcount_t; -/** - * Generic XOR function. - */ -int32_t xor_uint16(const uint16_t *array_1, int32_t card_1, - const uint16_t *array_2, int32_t card_2, uint16_t *out); +static inline void croaring_refcount_inc(croaring_refcount_t *val) { + _InterlockedIncrement(val); +} -/** - * Generic difference function (ANDNOT). - */ -int difference_uint16(const uint16_t *a1, int length1, const uint16_t *a2, - int length2, uint16_t *a_out); +static inline bool croaring_refcount_dec(croaring_refcount_t *val) { + return _InterlockedDecrement(val) == 0; +} -/** - * Generic intersection function. - */ -size_t intersection_uint32(const uint32_t *A, const size_t lenA, - const uint32_t *B, const size_t lenB, uint32_t *out); +static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) { + // Per https://learn.microsoft.com/en-us/windows/win32/sync/interlocked-variable-access + // > Simple reads and writes to properly-aligned 32-bit variables are atomic + // > operations. In other words, you will not end up with only one portion + // > of the variable updated; all bits are updated in an atomic fashion. + return *val; +} +#elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_NONE +#include +typedef uint32_t croaring_refcount_t; -/** - * Generic intersection function, returns just the cardinality. - */ -size_t intersection_uint32_card(const uint32_t *A, const size_t lenA, - const uint32_t *B, const size_t lenB); +static inline void croaring_refcount_inc(croaring_refcount_t *val) { + *val += 1; +} -/** - * Generic union function. - */ -size_t union_uint32(const uint32_t *set_1, size_t size_1, const uint32_t *set_2, - size_t size_2, uint32_t *buffer); +static inline bool croaring_refcount_dec(croaring_refcount_t *val) { + assert(*val > 0); + *val -= 1; + return val == 0; +} -/** - * A fast SSE-based union function. - */ -uint32_t union_vector16(const uint16_t *__restrict__ set_1, uint32_t size_1, - const uint16_t *__restrict__ set_2, uint32_t size_2, - uint16_t *__restrict__ buffer); -/** - * A fast SSE-based XOR function. - */ -uint32_t xor_vector16(const uint16_t *__restrict__ array1, uint32_t length1, - const uint16_t *__restrict__ array2, uint32_t length2, - uint16_t *__restrict__ output); +static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) { + return *val; +} +#else +#error "Unknown atomic implementation" +#endif -/** - * A fast SSE-based difference function. - */ -int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a, - const uint16_t *__restrict__ B, size_t s_b, - uint16_t *C); -/** - * Generic union function, returns just the cardinality. - */ -size_t union_uint32_card(const uint32_t *set_1, size_t size_1, - const uint32_t *set_2, size_t size_2); - -/** -* combines union_uint16 and union_vector16 optimally -*/ -size_t fast_union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2, - size_t size_2, uint16_t *buffer); - - -bool memequals(const void *s1, const void *s2, size_t n); - -#endif -/* end file include/roaring/array_util.h */ -/* begin file include/roaring/roaring_types.h */ -/* - Typedefs used by various components -*/ - -#ifndef ROARING_TYPES_H -#define ROARING_TYPES_H - -typedef bool (*roaring_iterator)(uint32_t value, void *param); -typedef bool (*roaring_iterator64)(uint64_t value, void *param); - -/** -* (For advanced users.) -* The roaring_statistics_t can be used to collect detailed statistics about -* the composition of a roaring bitmap. -*/ -typedef struct roaring_statistics_s { - uint32_t n_containers; /* number of containers */ - - uint32_t n_array_containers; /* number of array containers */ - uint32_t n_run_containers; /* number of run containers */ - uint32_t n_bitset_containers; /* number of bitmap containers */ - - uint32_t - n_values_array_containers; /* number of values in array containers */ - uint32_t n_values_run_containers; /* number of values in run containers */ - uint32_t - n_values_bitset_containers; /* number of values in bitmap containers */ - - uint32_t n_bytes_array_containers; /* number of allocated bytes in array - containers */ - uint32_t n_bytes_run_containers; /* number of allocated bytes in run - containers */ - uint32_t n_bytes_bitset_containers; /* number of allocated bytes in bitmap - containers */ - - uint32_t - max_value; /* the maximal value, undefined if cardinality is zero */ - uint32_t - min_value; /* the minimal value, undefined if cardinality is zero */ - uint64_t sum_value; /* the sum of all values (could be used to compute - average) */ - - uint64_t cardinality; /* total number of values stored in the bitmap */ - - // and n_values_arrays, n_values_rle, n_values_bitmap -} roaring_statistics_t; - -#endif /* ROARING_TYPES_H */ -/* end file include/roaring/roaring_types.h */ -/* begin file include/roaring/utilasm.h */ -/* - * utilasm.h - * - */ - -#ifndef INCLUDE_UTILASM_H_ -#define INCLUDE_UTILASM_H_ - - -#if defined(USE_BMI) & defined(ROARING_INLINE_ASM) -#define ASMBITMANIPOPTIMIZATION // optimization flag - -#define ASM_SHIFT_RIGHT(srcReg, bitsReg, destReg) \ - __asm volatile("shrx %1, %2, %0" \ - : "=r"(destReg) \ - : /* write */ \ - "r"(bitsReg), /* read only */ \ - "r"(srcReg) /* read only */ \ - ) - -#define ASM_INPLACESHIFT_RIGHT(srcReg, bitsReg) \ - __asm volatile("shrx %1, %0, %0" \ - : "+r"(srcReg) \ - : /* read/write */ \ - "r"(bitsReg) /* read only */ \ - ) - -#define ASM_SHIFT_LEFT(srcReg, bitsReg, destReg) \ - __asm volatile("shlx %1, %2, %0" \ - : "=r"(destReg) \ - : /* write */ \ - "r"(bitsReg), /* read only */ \ - "r"(srcReg) /* read only */ \ - ) -// set bit at position testBit within testByte to 1 and -// copy cmovDst to cmovSrc if that bit was previously clear -#define ASM_SET_BIT_INC_WAS_CLEAR(testByte, testBit, count) \ - __asm volatile( \ - "bts %2, %0\n" \ - "sbb $-1, %1\n" \ - : "+r"(testByte), /* read/write */ \ - "+r"(count) \ - : /* read/write */ \ - "r"(testBit) /* read only */ \ - ) - -#define ASM_CLEAR_BIT_DEC_WAS_SET(testByte, testBit, count) \ - __asm volatile( \ - "btr %2, %0\n" \ - "sbb $0, %1\n" \ - : "+r"(testByte), /* read/write */ \ - "+r"(count) \ - : /* read/write */ \ - "r"(testBit) /* read only */ \ - ) - -#define ASM_BT64(testByte, testBit, count) \ - __asm volatile( \ - "bt %2,%1\n" \ - "sbb %0,%0" /*could use setb */ \ - : "=r"(count) \ - : /* write */ \ - "r"(testByte), /* read only */ \ - "r"(testBit) /* read only */ \ - ) - -#endif // USE_BMI -#endif /* INCLUDE_UTILASM_H_ */ -/* end file include/roaring/utilasm.h */ -/* begin file include/roaring/bitset_util.h */ -#ifndef BITSET_UTIL_H -#define BITSET_UTIL_H +// We need portability.h to be included first, +// but we also always want isadetection.h to be +// included (right after). +// See https://github.com/RoaringBitmap/CRoaring/issues/394 +// There is no scenario where we want portability.h to +// be included, but not isadetection.h: the latter is a +// strict requirement. +#endif /* INCLUDE_PORTABILITY_H_ */ +/* end file include/roaring/portability.h */ +/* begin file include/roaring/bitset/bitset.h */ +#ifndef CBITSET_BITSET_H +#define CBITSET_BITSET_H + +// For compatibility with MSVC with the use of `restrict` +#if (__STDC_VERSION__ >= 199901L) || \ + (defined(__GNUC__) && defined(__STDC_VERSION__)) +#define CBITSET_RESTRICT restrict +#else +#define CBITSET_RESTRICT +#endif // (__STDC_VERSION__ >= 199901L) || (defined(__GNUC__) && +// defined(__STDC_VERSION__ )) +#include #include - - -/* - * Set all bits in indexes [begin,end) to true. - */ -static inline void bitset_set_range(uint64_t *bitmap, uint32_t start, - uint32_t end) { - if (start == end) return; - uint32_t firstword = start / 64; - uint32_t endword = (end - 1) / 64; - if (firstword == endword) { - bitmap[firstword] |= ((~UINT64_C(0)) << (start % 64)) & - ((~UINT64_C(0)) >> ((~end + 1) % 64)); - return; - } - bitmap[firstword] |= (~UINT64_C(0)) << (start % 64); - for (uint32_t i = firstword + 1; i < endword; i++) bitmap[i] = ~UINT64_C(0); - bitmap[endword] |= (~UINT64_C(0)) >> ((~end + 1) % 64); -} - - -/* - * Find the cardinality of the bitset in [begin,begin+lenminusone] - */ -static inline int bitset_lenrange_cardinality(uint64_t *bitmap, uint32_t start, - uint32_t lenminusone) { - uint32_t firstword = start / 64; - uint32_t endword = (start + lenminusone) / 64; - if (firstword == endword) { - return hamming(bitmap[firstword] & - ((~UINT64_C(0)) >> ((63 - lenminusone) % 64)) - << (start % 64)); - } - int answer = hamming(bitmap[firstword] & ((~UINT64_C(0)) << (start % 64))); - for (uint32_t i = firstword + 1; i < endword; i++) { - answer += hamming(bitmap[i]); - } - answer += - hamming(bitmap[endword] & - (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64)); - return answer; -} - -/* - * Check whether the cardinality of the bitset in [begin,begin+lenminusone] is 0 - */ -static inline bool bitset_lenrange_empty(uint64_t *bitmap, uint32_t start, - uint32_t lenminusone) { - uint32_t firstword = start / 64; - uint32_t endword = (start + lenminusone) / 64; - if (firstword == endword) { - return (bitmap[firstword] & ((~UINT64_C(0)) >> ((63 - lenminusone) % 64)) - << (start % 64)) == 0; - } - if(((bitmap[firstword] & ((~UINT64_C(0)) << (start%64)))) != 0) return false; - for (uint32_t i = firstword + 1; i < endword; i++) { - if(bitmap[i] != 0) return false; - } - if((bitmap[endword] & (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64)) != 0) return false; - return true; -} - - -/* - * Set all bits in indexes [begin,begin+lenminusone] to true. - */ -static inline void bitset_set_lenrange(uint64_t *bitmap, uint32_t start, - uint32_t lenminusone) { - uint32_t firstword = start / 64; - uint32_t endword = (start + lenminusone) / 64; - if (firstword == endword) { - bitmap[firstword] |= ((~UINT64_C(0)) >> ((63 - lenminusone) % 64)) - << (start % 64); - return; - } - uint64_t temp = bitmap[endword]; - bitmap[firstword] |= (~UINT64_C(0)) << (start % 64); - for (uint32_t i = firstword + 1; i < endword; i += 2) - bitmap[i] = bitmap[i + 1] = ~UINT64_C(0); - bitmap[endword] = - temp | (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64); -} - -/* - * Flip all the bits in indexes [begin,end). - */ -static inline void bitset_flip_range(uint64_t *bitmap, uint32_t start, - uint32_t end) { - if (start == end) return; - uint32_t firstword = start / 64; - uint32_t endword = (end - 1) / 64; - bitmap[firstword] ^= ~((~UINT64_C(0)) << (start % 64)); - for (uint32_t i = firstword; i < endword; i++) bitmap[i] = ~bitmap[i]; - bitmap[endword] ^= ((~UINT64_C(0)) >> ((~end + 1) % 64)); -} - -/* - * Set all bits in indexes [begin,end) to false. - */ -static inline void bitset_reset_range(uint64_t *bitmap, uint32_t start, - uint32_t end) { - if (start == end) return; - uint32_t firstword = start / 64; - uint32_t endword = (end - 1) / 64; - if (firstword == endword) { - bitmap[firstword] &= ~(((~UINT64_C(0)) << (start % 64)) & - ((~UINT64_C(0)) >> ((~end + 1) % 64))); - return; - } - bitmap[firstword] &= ~((~UINT64_C(0)) << (start % 64)); - for (uint32_t i = firstword + 1; i < endword; i++) bitmap[i] = UINT64_C(0); - bitmap[endword] &= ~((~UINT64_C(0)) >> ((~end + 1) % 64)); -} - -/* - * Given a bitset containing "length" 64-bit words, write out the position - * of all the set bits to "out", values start at "base". - * - * The "out" pointer should be sufficient to store the actual number of bits - * set. - * - * Returns how many values were actually decoded. - * - * This function should only be expected to be faster than - * bitset_extract_setbits - * when the density of the bitset is high. - * - * This function uses AVX2 decoding. - */ -size_t bitset_extract_setbits_avx2(uint64_t *bitset, size_t length, void *vout, - size_t outcapacity, uint32_t base); - -/* - * Given a bitset containing "length" 64-bit words, write out the position - * of all the set bits to "out", values start at "base". - * - * The "out" pointer should be sufficient to store the actual number of bits - *set. - * - * Returns how many values were actually decoded. - */ -size_t bitset_extract_setbits(uint64_t *bitset, size_t length, void *vout, - uint32_t base); - -/* - * Given a bitset containing "length" 64-bit words, write out the position - * of all the set bits to "out" as 16-bit integers, values start at "base" (can - *be set to zero) - * - * The "out" pointer should be sufficient to store the actual number of bits - *set. - * - * Returns how many values were actually decoded. - * - * This function should only be expected to be faster than - *bitset_extract_setbits_uint16 - * when the density of the bitset is high. - * - * This function uses SSE decoding. - */ -size_t bitset_extract_setbits_sse_uint16(const uint64_t *bitset, size_t length, - uint16_t *out, size_t outcapacity, - uint16_t base); - -/* - * Given a bitset containing "length" 64-bit words, write out the position - * of all the set bits to "out", values start at "base" - * (can be set to zero) - * - * The "out" pointer should be sufficient to store the actual number of bits - *set. - * - * Returns how many values were actually decoded. - */ -size_t bitset_extract_setbits_uint16(const uint64_t *bitset, size_t length, - uint16_t *out, uint16_t base); - -/* - * Given two bitsets containing "length" 64-bit words, write out the position - * of all the common set bits to "out", values start at "base" - * (can be set to zero) - * - * The "out" pointer should be sufficient to store the actual number of bits - * set. - * - * Returns how many values were actually decoded. - */ -size_t bitset_extract_intersection_setbits_uint16(const uint64_t * __restrict__ bitset1, - const uint64_t * __restrict__ bitset2, - size_t length, uint16_t *out, - uint16_t base); - -/* - * Given a bitset having cardinality card, set all bit values in the list (there - * are length of them) - * and return the updated cardinality. This evidently assumes that the bitset - * already contained data. - */ -uint64_t bitset_set_list_withcard(void *bitset, uint64_t card, - const uint16_t *list, uint64_t length); -/* - * Given a bitset, set all bit values in the list (there - * are length of them). - */ -void bitset_set_list(void *bitset, const uint16_t *list, uint64_t length); - -/* - * Given a bitset having cardinality card, unset all bit values in the list - * (there are length of them) - * and return the updated cardinality. This evidently assumes that the bitset - * already contained data. - */ -uint64_t bitset_clear_list(void *bitset, uint64_t card, const uint16_t *list, - uint64_t length); - -/* - * Given a bitset having cardinality card, toggle all bit values in the list - * (there are length of them) - * and return the updated cardinality. This evidently assumes that the bitset - * already contained data. - */ - -uint64_t bitset_flip_list_withcard(void *bitset, uint64_t card, - const uint16_t *list, uint64_t length); - -void bitset_flip_list(void *bitset, const uint16_t *list, uint64_t length); - -#ifdef USEAVX -/*** - * BEGIN Harley-Seal popcount functions. - */ - -/** - * Compute the population count of a 256-bit word - * This is not especially fast, but it is convenient as part of other functions. - */ -static inline __m256i popcount256(__m256i v) { - const __m256i lookuppos = _mm256_setr_epi8( - /* 0 */ 4 + 0, /* 1 */ 4 + 1, /* 2 */ 4 + 1, /* 3 */ 4 + 2, - /* 4 */ 4 + 1, /* 5 */ 4 + 2, /* 6 */ 4 + 2, /* 7 */ 4 + 3, - /* 8 */ 4 + 1, /* 9 */ 4 + 2, /* a */ 4 + 2, /* b */ 4 + 3, - /* c */ 4 + 2, /* d */ 4 + 3, /* e */ 4 + 3, /* f */ 4 + 4, - - /* 0 */ 4 + 0, /* 1 */ 4 + 1, /* 2 */ 4 + 1, /* 3 */ 4 + 2, - /* 4 */ 4 + 1, /* 5 */ 4 + 2, /* 6 */ 4 + 2, /* 7 */ 4 + 3, - /* 8 */ 4 + 1, /* 9 */ 4 + 2, /* a */ 4 + 2, /* b */ 4 + 3, - /* c */ 4 + 2, /* d */ 4 + 3, /* e */ 4 + 3, /* f */ 4 + 4); - const __m256i lookupneg = _mm256_setr_epi8( - /* 0 */ 4 - 0, /* 1 */ 4 - 1, /* 2 */ 4 - 1, /* 3 */ 4 - 2, - /* 4 */ 4 - 1, /* 5 */ 4 - 2, /* 6 */ 4 - 2, /* 7 */ 4 - 3, - /* 8 */ 4 - 1, /* 9 */ 4 - 2, /* a */ 4 - 2, /* b */ 4 - 3, - /* c */ 4 - 2, /* d */ 4 - 3, /* e */ 4 - 3, /* f */ 4 - 4, - - /* 0 */ 4 - 0, /* 1 */ 4 - 1, /* 2 */ 4 - 1, /* 3 */ 4 - 2, - /* 4 */ 4 - 1, /* 5 */ 4 - 2, /* 6 */ 4 - 2, /* 7 */ 4 - 3, - /* 8 */ 4 - 1, /* 9 */ 4 - 2, /* a */ 4 - 2, /* b */ 4 - 3, - /* c */ 4 - 2, /* d */ 4 - 3, /* e */ 4 - 3, /* f */ 4 - 4); - const __m256i low_mask = _mm256_set1_epi8(0x0f); - - const __m256i lo = _mm256_and_si256(v, low_mask); - const __m256i hi = _mm256_and_si256(_mm256_srli_epi16(v, 4), low_mask); - const __m256i popcnt1 = _mm256_shuffle_epi8(lookuppos, lo); - const __m256i popcnt2 = _mm256_shuffle_epi8(lookupneg, hi); - return _mm256_sad_epu8(popcnt1, popcnt2); -} - -/** - * Simple CSA over 256 bits - */ -static inline void CSA(__m256i *h, __m256i *l, __m256i a, __m256i b, - __m256i c) { - const __m256i u = _mm256_xor_si256(a, b); - *h = _mm256_or_si256(_mm256_and_si256(a, b), _mm256_and_si256(u, c)); - *l = _mm256_xor_si256(u, c); -} - -/** - * Fast Harley-Seal AVX population count function - */ -inline static uint64_t avx2_harley_seal_popcount256(const __m256i *data, - const uint64_t size) { - __m256i total = _mm256_setzero_si256(); - __m256i ones = _mm256_setzero_si256(); - __m256i twos = _mm256_setzero_si256(); - __m256i fours = _mm256_setzero_si256(); - __m256i eights = _mm256_setzero_si256(); - __m256i sixteens = _mm256_setzero_si256(); - __m256i twosA, twosB, foursA, foursB, eightsA, eightsB; - - const uint64_t limit = size - size % 16; - uint64_t i = 0; - - for (; i < limit; i += 16) { - CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i), - _mm256_lddqu_si256(data + i + 1)); - CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 2), - _mm256_lddqu_si256(data + i + 3)); - CSA(&foursA, &twos, twos, twosA, twosB); - CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i + 4), - _mm256_lddqu_si256(data + i + 5)); - CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 6), - _mm256_lddqu_si256(data + i + 7)); - CSA(&foursB, &twos, twos, twosA, twosB); - CSA(&eightsA, &fours, fours, foursA, foursB); - CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i + 8), - _mm256_lddqu_si256(data + i + 9)); - CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 10), - _mm256_lddqu_si256(data + i + 11)); - CSA(&foursA, &twos, twos, twosA, twosB); - CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i + 12), - _mm256_lddqu_si256(data + i + 13)); - CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 14), - _mm256_lddqu_si256(data + i + 15)); - CSA(&foursB, &twos, twos, twosA, twosB); - CSA(&eightsB, &fours, fours, foursA, foursB); - CSA(&sixteens, &eights, eights, eightsA, eightsB); - - total = _mm256_add_epi64(total, popcount256(sixteens)); - } - - total = _mm256_slli_epi64(total, 4); // * 16 - total = _mm256_add_epi64( - total, _mm256_slli_epi64(popcount256(eights), 3)); // += 8 * ... - total = _mm256_add_epi64( - total, _mm256_slli_epi64(popcount256(fours), 2)); // += 4 * ... - total = _mm256_add_epi64( - total, _mm256_slli_epi64(popcount256(twos), 1)); // += 2 * ... - total = _mm256_add_epi64(total, popcount256(ones)); - for (; i < size; i++) - total = - _mm256_add_epi64(total, popcount256(_mm256_lddqu_si256(data + i))); - - return (uint64_t)(_mm256_extract_epi64(total, 0)) + - (uint64_t)(_mm256_extract_epi64(total, 1)) + - (uint64_t)(_mm256_extract_epi64(total, 2)) + - (uint64_t)(_mm256_extract_epi64(total, 3)); -} - -#define AVXPOPCNTFNC(opname, avx_intrinsic) \ - static inline uint64_t avx2_harley_seal_popcount256_##opname( \ - const __m256i *data1, const __m256i *data2, const uint64_t size) { \ - __m256i total = _mm256_setzero_si256(); \ - __m256i ones = _mm256_setzero_si256(); \ - __m256i twos = _mm256_setzero_si256(); \ - __m256i fours = _mm256_setzero_si256(); \ - __m256i eights = _mm256_setzero_si256(); \ - __m256i sixteens = _mm256_setzero_si256(); \ - __m256i twosA, twosB, foursA, foursB, eightsA, eightsB; \ - __m256i A1, A2; \ - const uint64_t limit = size - size % 16; \ - uint64_t i = 0; \ - for (; i < limit; i += 16) { \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i), \ - _mm256_lddqu_si256(data2 + i)); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 1), \ - _mm256_lddqu_si256(data2 + i + 1)); \ - CSA(&twosA, &ones, ones, A1, A2); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 2), \ - _mm256_lddqu_si256(data2 + i + 2)); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 3), \ - _mm256_lddqu_si256(data2 + i + 3)); \ - CSA(&twosB, &ones, ones, A1, A2); \ - CSA(&foursA, &twos, twos, twosA, twosB); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 4), \ - _mm256_lddqu_si256(data2 + i + 4)); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 5), \ - _mm256_lddqu_si256(data2 + i + 5)); \ - CSA(&twosA, &ones, ones, A1, A2); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 6), \ - _mm256_lddqu_si256(data2 + i + 6)); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 7), \ - _mm256_lddqu_si256(data2 + i + 7)); \ - CSA(&twosB, &ones, ones, A1, A2); \ - CSA(&foursB, &twos, twos, twosA, twosB); \ - CSA(&eightsA, &fours, fours, foursA, foursB); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 8), \ - _mm256_lddqu_si256(data2 + i + 8)); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 9), \ - _mm256_lddqu_si256(data2 + i + 9)); \ - CSA(&twosA, &ones, ones, A1, A2); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 10), \ - _mm256_lddqu_si256(data2 + i + 10)); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 11), \ - _mm256_lddqu_si256(data2 + i + 11)); \ - CSA(&twosB, &ones, ones, A1, A2); \ - CSA(&foursA, &twos, twos, twosA, twosB); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 12), \ - _mm256_lddqu_si256(data2 + i + 12)); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 13), \ - _mm256_lddqu_si256(data2 + i + 13)); \ - CSA(&twosA, &ones, ones, A1, A2); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 14), \ - _mm256_lddqu_si256(data2 + i + 14)); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 15), \ - _mm256_lddqu_si256(data2 + i + 15)); \ - CSA(&twosB, &ones, ones, A1, A2); \ - CSA(&foursB, &twos, twos, twosA, twosB); \ - CSA(&eightsB, &fours, fours, foursA, foursB); \ - CSA(&sixteens, &eights, eights, eightsA, eightsB); \ - total = _mm256_add_epi64(total, popcount256(sixteens)); \ - } \ - total = _mm256_slli_epi64(total, 4); \ - total = _mm256_add_epi64(total, \ - _mm256_slli_epi64(popcount256(eights), 3)); \ - total = \ - _mm256_add_epi64(total, _mm256_slli_epi64(popcount256(fours), 2)); \ - total = \ - _mm256_add_epi64(total, _mm256_slli_epi64(popcount256(twos), 1)); \ - total = _mm256_add_epi64(total, popcount256(ones)); \ - for (; i < size; i++) { \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i), \ - _mm256_lddqu_si256(data2 + i)); \ - total = _mm256_add_epi64(total, popcount256(A1)); \ - } \ - return (uint64_t)(_mm256_extract_epi64(total, 0)) + \ - (uint64_t)(_mm256_extract_epi64(total, 1)) + \ - (uint64_t)(_mm256_extract_epi64(total, 2)) + \ - (uint64_t)(_mm256_extract_epi64(total, 3)); \ - } \ - static inline uint64_t avx2_harley_seal_popcount256andstore_##opname( \ - const __m256i *__restrict__ data1, const __m256i *__restrict__ data2, \ - __m256i *__restrict__ out, const uint64_t size) { \ - __m256i total = _mm256_setzero_si256(); \ - __m256i ones = _mm256_setzero_si256(); \ - __m256i twos = _mm256_setzero_si256(); \ - __m256i fours = _mm256_setzero_si256(); \ - __m256i eights = _mm256_setzero_si256(); \ - __m256i sixteens = _mm256_setzero_si256(); \ - __m256i twosA, twosB, foursA, foursB, eightsA, eightsB; \ - __m256i A1, A2; \ - const uint64_t limit = size - size % 16; \ - uint64_t i = 0; \ - for (; i < limit; i += 16) { \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i), \ - _mm256_lddqu_si256(data2 + i)); \ - _mm256_storeu_si256(out + i, A1); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 1), \ - _mm256_lddqu_si256(data2 + i + 1)); \ - _mm256_storeu_si256(out + i + 1, A2); \ - CSA(&twosA, &ones, ones, A1, A2); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 2), \ - _mm256_lddqu_si256(data2 + i + 2)); \ - _mm256_storeu_si256(out + i + 2, A1); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 3), \ - _mm256_lddqu_si256(data2 + i + 3)); \ - _mm256_storeu_si256(out + i + 3, A2); \ - CSA(&twosB, &ones, ones, A1, A2); \ - CSA(&foursA, &twos, twos, twosA, twosB); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 4), \ - _mm256_lddqu_si256(data2 + i + 4)); \ - _mm256_storeu_si256(out + i + 4, A1); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 5), \ - _mm256_lddqu_si256(data2 + i + 5)); \ - _mm256_storeu_si256(out + i + 5, A2); \ - CSA(&twosA, &ones, ones, A1, A2); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 6), \ - _mm256_lddqu_si256(data2 + i + 6)); \ - _mm256_storeu_si256(out + i + 6, A1); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 7), \ - _mm256_lddqu_si256(data2 + i + 7)); \ - _mm256_storeu_si256(out + i + 7, A2); \ - CSA(&twosB, &ones, ones, A1, A2); \ - CSA(&foursB, &twos, twos, twosA, twosB); \ - CSA(&eightsA, &fours, fours, foursA, foursB); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 8), \ - _mm256_lddqu_si256(data2 + i + 8)); \ - _mm256_storeu_si256(out + i + 8, A1); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 9), \ - _mm256_lddqu_si256(data2 + i + 9)); \ - _mm256_storeu_si256(out + i + 9, A2); \ - CSA(&twosA, &ones, ones, A1, A2); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 10), \ - _mm256_lddqu_si256(data2 + i + 10)); \ - _mm256_storeu_si256(out + i + 10, A1); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 11), \ - _mm256_lddqu_si256(data2 + i + 11)); \ - _mm256_storeu_si256(out + i + 11, A2); \ - CSA(&twosB, &ones, ones, A1, A2); \ - CSA(&foursA, &twos, twos, twosA, twosB); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 12), \ - _mm256_lddqu_si256(data2 + i + 12)); \ - _mm256_storeu_si256(out + i + 12, A1); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 13), \ - _mm256_lddqu_si256(data2 + i + 13)); \ - _mm256_storeu_si256(out + i + 13, A2); \ - CSA(&twosA, &ones, ones, A1, A2); \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 14), \ - _mm256_lddqu_si256(data2 + i + 14)); \ - _mm256_storeu_si256(out + i + 14, A1); \ - A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 15), \ - _mm256_lddqu_si256(data2 + i + 15)); \ - _mm256_storeu_si256(out + i + 15, A2); \ - CSA(&twosB, &ones, ones, A1, A2); \ - CSA(&foursB, &twos, twos, twosA, twosB); \ - CSA(&eightsB, &fours, fours, foursA, foursB); \ - CSA(&sixteens, &eights, eights, eightsA, eightsB); \ - total = _mm256_add_epi64(total, popcount256(sixteens)); \ - } \ - total = _mm256_slli_epi64(total, 4); \ - total = _mm256_add_epi64(total, \ - _mm256_slli_epi64(popcount256(eights), 3)); \ - total = \ - _mm256_add_epi64(total, _mm256_slli_epi64(popcount256(fours), 2)); \ - total = \ - _mm256_add_epi64(total, _mm256_slli_epi64(popcount256(twos), 1)); \ - total = _mm256_add_epi64(total, popcount256(ones)); \ - for (; i < size; i++) { \ - A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i), \ - _mm256_lddqu_si256(data2 + i)); \ - _mm256_storeu_si256(out + i, A1); \ - total = _mm256_add_epi64(total, popcount256(A1)); \ - } \ - return (uint64_t)(_mm256_extract_epi64(total, 0)) + \ - (uint64_t)(_mm256_extract_epi64(total, 1)) + \ - (uint64_t)(_mm256_extract_epi64(total, 2)) + \ - (uint64_t)(_mm256_extract_epi64(total, 3)); \ - } - -AVXPOPCNTFNC(or, _mm256_or_si256) -AVXPOPCNTFNC(union, _mm256_or_si256) -AVXPOPCNTFNC(and, _mm256_and_si256) -AVXPOPCNTFNC(intersection, _mm256_and_si256) -AVXPOPCNTFNC (xor, _mm256_xor_si256) -AVXPOPCNTFNC(andnot, _mm256_andnot_si256) - -/*** - * END Harley-Seal popcount functions. - */ - -#endif // USEAVX - -#endif -/* end file include/roaring/bitset_util.h */ -/* begin file include/roaring/containers/array.h */ -/* - * array.h - * - */ - -#ifndef INCLUDE_CONTAINERS_ARRAY_H_ -#define INCLUDE_CONTAINERS_ARRAY_H_ - +#include +#include #include +#ifdef __cplusplus +extern "C" { namespace roaring { namespace api { +#endif -/* Containers with DEFAULT_MAX_SIZE or less integers should be arrays */ -enum { DEFAULT_MAX_SIZE = 4096 }; - -/* struct array_container - sparse representation of a bitmap - * - * @cardinality: number of indices in `array` (and the bitmap) - * @capacity: allocated size of `array` - * @array: sorted list of integers - */ -struct array_container_s { - int32_t cardinality; - int32_t capacity; - uint16_t *array; +struct bitset_s { + uint64_t *CBITSET_RESTRICT array; + /* For simplicity and performance, we prefer to have a size and a capacity that is a multiple of 64 bits. + * Thus we only track the size and the capacity in terms of 64-bit words allocated */ + size_t arraysize; + size_t capacity; }; -typedef struct array_container_s array_container_t; +typedef struct bitset_s bitset_t; -/* Create a new array with default. Return NULL in case of failure. See also - * array_container_create_given_capacity. */ -array_container_t *array_container_create(void); +/* Create a new bitset. Return NULL in case of failure. */ +bitset_t *bitset_create(void); -/* Create a new array with a specified capacity size. Return NULL in case of +/* Create a new bitset able to contain size bits. Return NULL in case of * failure. */ -array_container_t *array_container_create_given_capacity(int32_t size); - -/* Create a new array containing all values in [min,max). */ -array_container_t * array_container_create_range(uint32_t min, uint32_t max); - -/* - * Shrink the capacity to the actual size, return the number of bytes saved. - */ -int array_container_shrink_to_fit(array_container_t *src); - -/* Free memory owned by `array'. */ -void array_container_free(array_container_t *array); +bitset_t *bitset_create_with_capacity(size_t size); -/* Duplicate container */ -array_container_t *array_container_clone(const array_container_t *src); - -int32_t array_container_serialize(const array_container_t *container, - char *buf) WARN_UNUSED; - -uint32_t array_container_serialization_len(const array_container_t *container); - -void *array_container_deserialize(const char *buf, size_t buf_len); - -/* Get the cardinality of `array'. */ -static inline int array_container_cardinality(const array_container_t *array) { - return array->cardinality; -} - -static inline bool array_container_nonzero_cardinality( - const array_container_t *array) { - return array->cardinality > 0; -} +/* Free memory. */ +void bitset_free(bitset_t *bitset); -/* Copy one container into another. We assume that they are distinct. */ -void array_container_copy(const array_container_t *src, array_container_t *dst); +/* Set all bits to zero. */ +void bitset_clear(bitset_t *bitset); -/* Add all the values in [min,max) (included) at a distance k*step from min. - The container must have a size less or equal to DEFAULT_MAX_SIZE after this - addition. */ -void array_container_add_from_range(array_container_t *arr, uint32_t min, - uint32_t max, uint16_t step); +/* Set all bits to one. */ +void bitset_fill(bitset_t *bitset); -/* Set the cardinality to zero (does not release memory). */ -static inline void array_container_clear(array_container_t *array) { - array->cardinality = 0; -} +/* Create a copy */ +bitset_t *bitset_copy(const bitset_t *bitset); -static inline bool array_container_empty(const array_container_t *array) { - return array->cardinality == 0; -} +/* For advanced users: Resize the bitset so that it can support newarraysize * 64 bits. + * Return true in case of success, false for failure. Pad + * with zeroes new buffer areas if requested. */ +bool bitset_resize(bitset_t *bitset, size_t newarraysize, bool padwithzeroes); -/* check whether the cardinality is equal to the capacity (this does not mean -* that it contains 1<<16 elements) */ -static inline bool array_container_full(const array_container_t *array) { - return array->cardinality == array->capacity; +/* returns how many bytes of memory the backend buffer uses */ +inline size_t bitset_size_in_bytes(const bitset_t *bitset) { + return bitset->arraysize * sizeof(uint64_t); } - -/* Compute the union of `src_1' and `src_2' and write the result to `dst' - * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ -void array_container_union(const array_container_t *src_1, - const array_container_t *src_2, - array_container_t *dst); - -/* symmetric difference, see array_container_union */ -void array_container_xor(const array_container_t *array_1, - const array_container_t *array_2, - array_container_t *out); - -/* Computes the intersection of src_1 and src_2 and write the result to - * dst. It is assumed that dst is distinct from both src_1 and src_2. */ -void array_container_intersection(const array_container_t *src_1, - const array_container_t *src_2, - array_container_t *dst); - -/* Check whether src_1 and src_2 intersect. */ -bool array_container_intersect(const array_container_t *src_1, - const array_container_t *src_2); - - -/* computers the size of the intersection between two arrays. - */ -int array_container_intersection_cardinality(const array_container_t *src_1, - const array_container_t *src_2); - -/* computes the intersection of array1 and array2 and write the result to - * array1. - * */ -void array_container_intersection_inplace(array_container_t *src_1, - const array_container_t *src_2); - -/* - * Write out the 16-bit integers contained in this container as a list of 32-bit - * integers using base - * as the starting value (it might be expected that base has zeros in its 16 - * least significant bits). - * The function returns the number of values written. - * The caller is responsible for allocating enough memory in out. - */ -int array_container_to_uint32_array(void *vout, const array_container_t *cont, - uint32_t base); - -/* Compute the number of runs */ -int32_t array_container_number_of_runs(const array_container_t *a); - -/* - * Print this container using printf (useful for debugging). - */ -void array_container_printf(const array_container_t *v); - -/* - * Print this container using printf as a comma-separated list of 32-bit - * integers starting at base. - */ -void array_container_printf_as_uint32_array(const array_container_t *v, - uint32_t base); - -/** - * Return the serialized size in bytes of a container having cardinality "card". - */ -static inline int32_t array_container_serialized_size_in_bytes(int32_t card) { - return card * 2 + 2; +/* returns how many bits can be accessed */ +inline size_t bitset_size_in_bits(const bitset_t *bitset) { + return bitset->arraysize * 64; } -/** - * Increase capacity to at least min. - * Whether the existing data needs to be copied over depends on the "preserve" - * parameter. If preserve is false, then the new content will be uninitialized, - * otherwise the old content is copied. - */ -void array_container_grow(array_container_t *container, int32_t min, - bool preserve); - -bool array_container_iterate(const array_container_t *cont, uint32_t base, - roaring_iterator iterator, void *ptr); -bool array_container_iterate64(const array_container_t *cont, uint32_t base, - roaring_iterator64 iterator, uint64_t high_bits, - void *ptr); - -/** - * Writes the underlying array to buf, outputs how many bytes were written. - * This is meant to be byte-by-byte compatible with the Java and Go versions of - * Roaring. - * The number of bytes written should be - * array_container_size_in_bytes(container). - * - */ -int32_t array_container_write(const array_container_t *container, char *buf); -/** - * Reads the instance from buf, outputs how many bytes were read. - * This is meant to be byte-by-byte compatible with the Java and Go versions of - * Roaring. - * The number of bytes read should be array_container_size_in_bytes(container). - * You need to provide the (known) cardinality. - */ -int32_t array_container_read(int32_t cardinality, array_container_t *container, - const char *buf); - -/** - * Return the serialized size in bytes of a container (see - * bitset_container_write) - * This is meant to be compatible with the Java and Go versions of Roaring and - * assumes - * that the cardinality of the container is already known. - * - */ -static inline int32_t array_container_size_in_bytes( - const array_container_t *container) { - return container->cardinality * sizeof(uint16_t); +/* returns how many words (64-bit) of memory the backend buffer uses */ +inline size_t bitset_size_in_words(const bitset_t *bitset) { + return bitset->arraysize; } -/** - * Return true if the two arrays have the same content. - */ -static inline bool array_container_equals( - const array_container_t *container1, - const array_container_t *container2) { +/* For advanced users: Grow the bitset so that it can support newarraysize * 64 bits with padding. + * Return true in case of success, false for failure. */ +bool bitset_grow(bitset_t *bitset, size_t newarraysize); - if (container1->cardinality != container2->cardinality) { - return false; - } - return memequals(container1->array, container2->array, container1->cardinality*2); -} +/* attempts to recover unused memory, return false in case of roaring_reallocation + * failure */ +bool bitset_trim(bitset_t *bitset); -/** - * Return true if container1 is a subset of container2. - */ -bool array_container_is_subset(const array_container_t *container1, - const array_container_t *container2); +/* shifts all bits by 's' positions so that the bitset representing values + * 1,2,10 would represent values 1+s, 2+s, 10+s */ +void bitset_shift_left(bitset_t *bitset, size_t s); -/** - * If the element of given rank is in this container, supposing that the first - * element has rank start_rank, then the function returns true and sets element - * accordingly. - * Otherwise, it returns false and update start_rank. - */ -static inline bool array_container_select(const array_container_t *container, - uint32_t *start_rank, uint32_t rank, - uint32_t *element) { - int card = array_container_cardinality(container); - if (*start_rank + card <= rank) { - *start_rank += card; - return false; - } else { - *element = container->array[rank - *start_rank]; - return true; - } -} +/* shifts all bits by 's' positions so that the bitset representing values + * 1,2,10 would represent values 1-s, 2-s, 10-s, negative values are deleted */ +void bitset_shift_right(bitset_t *bitset, size_t s); -/* Computes the difference of array1 and array2 and write the result - * to array out. - * Array out does not need to be distinct from array_1 +/* Set the ith bit. Attempts to resize the bitset if needed (may silently fail) */ -void array_container_andnot(const array_container_t *array_1, - const array_container_t *array_2, - array_container_t *out); - -/* Append x to the set. Assumes that the value is larger than any preceding - * values. */ -static inline void array_container_append(array_container_t *arr, - uint16_t pos) { - const int32_t capacity = arr->capacity; - - if (array_container_full(arr)) { - array_container_grow(arr, capacity + 1, true); +inline void bitset_set(bitset_t *bitset, size_t i) { + size_t shiftedi = i / 64; + if (shiftedi >= bitset->arraysize) { + if (!bitset_grow(bitset, shiftedi + 1)) { + return; + } } - - arr->array[arr->cardinality++] = pos; + bitset->array[shiftedi] |= ((uint64_t)1) << (i % 64); } -/** - * Add value to the set if final cardinality doesn't exceed max_cardinality. - * Return code: - * 1 -- value was added - * 0 -- value was already present - * -1 -- value was not added because cardinality would exceed max_cardinality - */ -static inline int array_container_try_add(array_container_t *arr, uint16_t value, - int32_t max_cardinality) { - const int32_t cardinality = arr->cardinality; - - // best case, we can append. - if ((array_container_empty(arr) || arr->array[cardinality - 1] < value) && - cardinality < max_cardinality) { - array_container_append(arr, value); - return 1; - } - - const int32_t loc = binarySearch(arr->array, cardinality, value); - - if (loc >= 0) { - return 0; - } else if (cardinality < max_cardinality) { - if (array_container_full(arr)) { - array_container_grow(arr, arr->capacity + 1, true); +/* Set the ith bit to the specified value. Attempts to resize the bitset if + * needed (may silently fail) */ +inline void bitset_set_to_value(bitset_t *bitset, size_t i, bool flag) { + size_t shiftedi = i / 64; + uint64_t mask = ((uint64_t)1) << (i % 64); + uint64_t dynmask = ((uint64_t)flag) << (i % 64); + if (shiftedi >= bitset->arraysize) { + if (!bitset_grow(bitset, shiftedi + 1)) { + return; } - const int32_t insert_idx = -loc - 1; - memmove(arr->array + insert_idx + 1, arr->array + insert_idx, - (cardinality - insert_idx) * sizeof(uint16_t)); - arr->array[insert_idx] = value; - arr->cardinality++; - return 1; - } else { - return -1; } + uint64_t w = bitset->array[shiftedi]; + w &= ~mask; + w |= dynmask; + bitset->array[shiftedi] = w; } -/* Add value to the set. Returns true if x was not already present. */ -static inline bool array_container_add(array_container_t *arr, uint16_t value) { - return array_container_try_add(arr, value, INT32_MAX) == 1; -} - -/* Remove x from the set. Returns true if x was present. */ -static inline bool array_container_remove(array_container_t *arr, - uint16_t pos) { - const int32_t idx = binarySearch(arr->array, arr->cardinality, pos); - const bool is_present = idx >= 0; - if (is_present) { - memmove(arr->array + idx, arr->array + idx + 1, - (arr->cardinality - idx - 1) * sizeof(uint16_t)); - arr->cardinality--; +/* Get the value of the ith bit. */ +inline bool bitset_get(const bitset_t *bitset, size_t i) { + size_t shiftedi = i / 64; + if (shiftedi >= bitset->arraysize) { + return false; } - - return is_present; + return (bitset->array[shiftedi] & (((uint64_t)1) << (i % 64))) != 0; } -/* Check whether x is present. */ -inline bool array_container_contains(const array_container_t *arr, - uint16_t pos) { - // return binarySearch(arr->array, arr->cardinality, pos) >= 0; - // binary search with fallback to linear search for short ranges - int32_t low = 0; - const uint16_t * carr = (const uint16_t *) arr->array; - int32_t high = arr->cardinality - 1; - // while (high - low >= 0) { - while(high >= low + 16) { - int32_t middleIndex = (low + high)>>1; - uint16_t middleValue = carr[middleIndex]; - if (middleValue < pos) { - low = middleIndex + 1; - } else if (middleValue > pos) { - high = middleIndex - 1; - } else { - return true; - } - } - - for (int i=low; i <= high; i++) { - uint16_t v = carr[i]; - if (v == pos) { - return true; - } - if ( v > pos ) return false; - } - return false; +/* Count number of bits set. */ +size_t bitset_count(const bitset_t *bitset); -} +/* Find the index of the first bit set. Or zero if the bitset is empty. */ +size_t bitset_minimum(const bitset_t *bitset); -//* Check whether a range of values from range_start (included) to range_end (excluded) is present. */ -static inline bool array_container_contains_range(const array_container_t *arr, - uint32_t range_start, uint32_t range_end) { +/* Find the index of the last bit set. Or zero if the bitset is empty. */ +size_t bitset_maximum(const bitset_t *bitset); - const uint16_t rs_included = range_start; - const uint16_t re_included = range_end - 1; +/* compute the union in-place (to b1), returns true if successful, to generate a + * new bitset first call bitset_copy */ +bool bitset_inplace_union(bitset_t *CBITSET_RESTRICT b1, + const bitset_t *CBITSET_RESTRICT b2); - const uint16_t *carr = (const uint16_t *) arr->array; +/* report the size of the union (without materializing it) */ +size_t bitset_union_count(const bitset_t *CBITSET_RESTRICT b1, + const bitset_t *CBITSET_RESTRICT b2); - const int32_t start = advanceUntil(carr, -1, arr->cardinality, rs_included); - const int32_t end = advanceUntil(carr, start - 1, arr->cardinality, re_included); - - return (start < arr->cardinality) && (end < arr->cardinality) - && (((uint16_t)(end - start)) == re_included - rs_included) - && (carr[start] == rs_included) && (carr[end] == re_included); -} - -/* Returns the smallest value (assumes not empty) */ -inline uint16_t array_container_minimum(const array_container_t *arr) { - if (arr->cardinality == 0) return 0; - return arr->array[0]; -} - -/* Returns the largest value (assumes not empty) */ -inline uint16_t array_container_maximum(const array_container_t *arr) { - if (arr->cardinality == 0) return 0; - return arr->array[arr->cardinality - 1]; -} - -/* Returns the number of values equal or smaller than x */ -inline int array_container_rank(const array_container_t *arr, uint16_t x) { - const int32_t idx = binarySearch(arr->array, arr->cardinality, x); - const bool is_present = idx >= 0; - if (is_present) { - return idx + 1; - } else { - return -idx - 1; - } -} - -/* Returns the index of the first value equal or smaller than x, or -1 */ -inline int array_container_index_equalorlarger(const array_container_t *arr, uint16_t x) { - const int32_t idx = binarySearch(arr->array, arr->cardinality, x); - const bool is_present = idx >= 0; - if (is_present) { - return idx; - } else { - int32_t candidate = - idx - 1; - if(candidate < arr->cardinality) return candidate; - return -1; - } -} - -/* - * Adds all values in range [min,max] using hint: - * nvals_less is the number of array values less than $min - * nvals_greater is the number of array values greater than $max - */ -static inline void array_container_add_range_nvals(array_container_t *array, - uint32_t min, uint32_t max, - int32_t nvals_less, - int32_t nvals_greater) { - int32_t union_cardinality = nvals_less + (max - min + 1) + nvals_greater; - if (union_cardinality > array->capacity) { - array_container_grow(array, union_cardinality, true); - } - memmove(&(array->array[union_cardinality - nvals_greater]), - &(array->array[array->cardinality - nvals_greater]), - nvals_greater * sizeof(uint16_t)); - for (uint32_t i = 0; i <= max - min; i++) { - array->array[nvals_less + i] = min + i; - } - array->cardinality = union_cardinality; -} - -/** - * Adds all values in range [min,max]. - */ -static inline void array_container_add_range(array_container_t *array, - uint32_t min, uint32_t max) { - int32_t nvals_greater = count_greater(array->array, array->cardinality, max); - int32_t nvals_less = count_less(array->array, array->cardinality - nvals_greater, min); - array_container_add_range_nvals(array, min, max, nvals_less, nvals_greater); -} - -/* - * Removes all elements array[pos] .. array[pos+count-1] - */ -static inline void array_container_remove_range(array_container_t *array, - uint32_t pos, uint32_t count) { - if (count != 0) { - memmove(&(array->array[pos]), &(array->array[pos+count]), - (array->cardinality - pos - count) * sizeof(uint16_t)); - array->cardinality -= count; - } -} - -#endif /* INCLUDE_CONTAINERS_ARRAY_H_ */ -/* end file include/roaring/containers/array.h */ -/* begin file include/roaring/containers/bitset.h */ -/* - * bitset.h - * - */ - -#ifndef INCLUDE_CONTAINERS_BITSET_H_ -#define INCLUDE_CONTAINERS_BITSET_H_ - -#include -#include - -#ifdef USEAVX -#define ALIGN_AVX __attribute__((aligned(sizeof(__m256i)))) -#else -#define ALIGN_AVX -#endif - -enum { - BITSET_CONTAINER_SIZE_IN_WORDS = (1 << 16) / 64, - BITSET_UNKNOWN_CARDINALITY = -1 -}; - -struct bitset_container_s { - int32_t cardinality; - uint64_t *array; -}; - -typedef struct bitset_container_s bitset_container_t; - -/* Create a new bitset. Return NULL in case of failure. */ -bitset_container_t *bitset_container_create(void); - -/* Free memory. */ -void bitset_container_free(bitset_container_t *bitset); - -/* Clear bitset (sets bits to 0). */ -void bitset_container_clear(bitset_container_t *bitset); - -/* Set all bits to 1. */ -void bitset_container_set_all(bitset_container_t *bitset); - -/* Duplicate bitset */ -bitset_container_t *bitset_container_clone(const bitset_container_t *src); - -int32_t bitset_container_serialize(const bitset_container_t *container, - char *buf) WARN_UNUSED; - -uint32_t bitset_container_serialization_len(void); - -void *bitset_container_deserialize(const char *buf, size_t buf_len); - -/* Set the bit in [begin,end). WARNING: as of April 2016, this method is slow - * and - * should not be used in performance-sensitive code. Ever. */ -void bitset_container_set_range(bitset_container_t *bitset, uint32_t begin, - uint32_t end); - -#ifdef ASMBITMANIPOPTIMIZATION -/* Set the ith bit. */ -static inline void bitset_container_set(bitset_container_t *bitset, - uint16_t pos) { - uint64_t shift = 6; - uint64_t offset; - uint64_t p = pos; - ASM_SHIFT_RIGHT(p, shift, offset); - uint64_t load = bitset->array[offset]; - ASM_SET_BIT_INC_WAS_CLEAR(load, p, bitset->cardinality); - bitset->array[offset] = load; -} - -/* Unset the ith bit. */ -static inline void bitset_container_unset(bitset_container_t *bitset, - uint16_t pos) { - uint64_t shift = 6; - uint64_t offset; - uint64_t p = pos; - ASM_SHIFT_RIGHT(p, shift, offset); - uint64_t load = bitset->array[offset]; - ASM_CLEAR_BIT_DEC_WAS_SET(load, p, bitset->cardinality); - bitset->array[offset] = load; -} - -/* Add `pos' to `bitset'. Returns true if `pos' was not present. Might be slower - * than bitset_container_set. */ -static inline bool bitset_container_add(bitset_container_t *bitset, - uint16_t pos) { - uint64_t shift = 6; - uint64_t offset; - uint64_t p = pos; - ASM_SHIFT_RIGHT(p, shift, offset); - uint64_t load = bitset->array[offset]; - // could be possibly slightly further optimized - const int32_t oldcard = bitset->cardinality; - ASM_SET_BIT_INC_WAS_CLEAR(load, p, bitset->cardinality); - bitset->array[offset] = load; - return bitset->cardinality - oldcard; -} - -/* Remove `pos' from `bitset'. Returns true if `pos' was present. Might be - * slower than bitset_container_unset. */ -static inline bool bitset_container_remove(bitset_container_t *bitset, - uint16_t pos) { - uint64_t shift = 6; - uint64_t offset; - uint64_t p = pos; - ASM_SHIFT_RIGHT(p, shift, offset); - uint64_t load = bitset->array[offset]; - // could be possibly slightly further optimized - const int32_t oldcard = bitset->cardinality; - ASM_CLEAR_BIT_DEC_WAS_SET(load, p, bitset->cardinality); - bitset->array[offset] = load; - return oldcard - bitset->cardinality; -} - -/* Get the value of the ith bit. */ -inline bool bitset_container_get(const bitset_container_t *bitset, - uint16_t pos) { - uint64_t word = bitset->array[pos >> 6]; - const uint64_t p = pos; - ASM_INPLACESHIFT_RIGHT(word, p); - return word & 1; -} - -#else - -/* Set the ith bit. */ -static inline void bitset_container_set(bitset_container_t *bitset, - uint16_t pos) { - const uint64_t old_word = bitset->array[pos >> 6]; - const int index = pos & 63; - const uint64_t new_word = old_word | (UINT64_C(1) << index); - bitset->cardinality += (uint32_t)((old_word ^ new_word) >> index); - bitset->array[pos >> 6] = new_word; -} - -/* Unset the ith bit. */ -static inline void bitset_container_unset(bitset_container_t *bitset, - uint16_t pos) { - const uint64_t old_word = bitset->array[pos >> 6]; - const int index = pos & 63; - const uint64_t new_word = old_word & (~(UINT64_C(1) << index)); - bitset->cardinality -= (uint32_t)((old_word ^ new_word) >> index); - bitset->array[pos >> 6] = new_word; -} - -/* Add `pos' to `bitset'. Returns true if `pos' was not present. Might be slower - * than bitset_container_set. */ -static inline bool bitset_container_add(bitset_container_t *bitset, - uint16_t pos) { - const uint64_t old_word = bitset->array[pos >> 6]; - const int index = pos & 63; - const uint64_t new_word = old_word | (UINT64_C(1) << index); - const uint64_t increment = (old_word ^ new_word) >> index; - bitset->cardinality += (uint32_t)increment; - bitset->array[pos >> 6] = new_word; - return increment > 0; -} - -/* Remove `pos' from `bitset'. Returns true if `pos' was present. Might be - * slower than bitset_container_unset. */ -static inline bool bitset_container_remove(bitset_container_t *bitset, - uint16_t pos) { - const uint64_t old_word = bitset->array[pos >> 6]; - const int index = pos & 63; - const uint64_t new_word = old_word & (~(UINT64_C(1) << index)); - const uint64_t increment = (old_word ^ new_word) >> index; - bitset->cardinality -= (uint32_t)increment; - bitset->array[pos >> 6] = new_word; - return increment > 0; -} - -/* Get the value of the ith bit. */ -inline bool bitset_container_get(const bitset_container_t *bitset, - uint16_t pos) { - const uint64_t word = bitset->array[pos >> 6]; - return (word >> (pos & 63)) & 1; -} - -#endif - -/* -* Check if all bits are set in a range of positions from pos_start (included) to -* pos_end (excluded). -*/ -static inline bool bitset_container_get_range(const bitset_container_t *bitset, - uint32_t pos_start, uint32_t pos_end) { - - const uint32_t start = pos_start >> 6; - const uint32_t end = pos_end >> 6; - - const uint64_t first = ~((1ULL << (pos_start & 0x3F)) - 1); - const uint64_t last = (1ULL << (pos_end & 0x3F)) - 1; - - if (start == end) return ((bitset->array[end] & first & last) == (first & last)); - if ((bitset->array[start] & first) != first) return false; - - if ((end < BITSET_CONTAINER_SIZE_IN_WORDS) && ((bitset->array[end] & last) != last)){ - - return false; - } - - for (uint16_t i = start + 1; (i < BITSET_CONTAINER_SIZE_IN_WORDS) && (i < end); ++i){ - - if (bitset->array[i] != UINT64_C(0xFFFFFFFFFFFFFFFF)) return false; - } - - return true; -} - -/* Check whether `bitset' is present in `array'. Calls bitset_container_get. */ -inline bool bitset_container_contains(const bitset_container_t *bitset, - uint16_t pos) { - return bitset_container_get(bitset, pos); -} - -/* -* Check whether a range of bits from position `pos_start' (included) to `pos_end' (excluded) -* is present in `bitset'. Calls bitset_container_get_all. -*/ -static inline bool bitset_container_contains_range(const bitset_container_t *bitset, - uint32_t pos_start, uint32_t pos_end) { - return bitset_container_get_range(bitset, pos_start, pos_end); -} - -/* Get the number of bits set */ -static inline int bitset_container_cardinality( - const bitset_container_t *bitset) { - return bitset->cardinality; -} - - - - -/* Copy one container into another. We assume that they are distinct. */ -void bitset_container_copy(const bitset_container_t *source, - bitset_container_t *dest); - -/* Add all the values [min,max) at a distance k*step from min: min, - * min+step,.... */ -void bitset_container_add_from_range(bitset_container_t *bitset, uint32_t min, - uint32_t max, uint16_t step); - -/* Get the number of bits set (force computation). This does not modify bitset. - * To update the cardinality, you should do - * bitset->cardinality = bitset_container_compute_cardinality(bitset).*/ -int bitset_container_compute_cardinality(const bitset_container_t *bitset); - -/* Get whether there is at least one bit set (see bitset_container_empty for the reverse), - when the cardinality is unknown, it is computed and stored in the struct */ -static inline bool bitset_container_nonzero_cardinality( - bitset_container_t *bitset) { - // account for laziness - if (bitset->cardinality == BITSET_UNKNOWN_CARDINALITY) { - // could bail early instead with a nonzero result - bitset->cardinality = bitset_container_compute_cardinality(bitset); - } - return bitset->cardinality > 0; -} - -/* Check whether this bitset is empty (see bitset_container_nonzero_cardinality for the reverse), - * it never modifies the bitset struct. */ -static inline bool bitset_container_empty( - const bitset_container_t *bitset) { - if (bitset->cardinality == BITSET_UNKNOWN_CARDINALITY) { - for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i ++) { - if((bitset->array[i]) != 0) return false; - } - return true; - } - return bitset->cardinality == 0; -} - - -/* Get whether there is at least one bit set (see bitset_container_empty for the reverse), - the bitset is never modified */ -static inline bool bitset_container_const_nonzero_cardinality( - const bitset_container_t *bitset) { - return !bitset_container_empty(bitset); -} - -/* - * Check whether the two bitsets intersect - */ -bool bitset_container_intersect(const bitset_container_t *src_1, - const bitset_container_t *src_2); - -/* Computes the union of bitsets `src_1' and `src_2' into `dst' and return the - * cardinality. */ -int bitset_container_or(const bitset_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* Computes the union of bitsets `src_1' and `src_2' and return the cardinality. - */ -int bitset_container_or_justcard(const bitset_container_t *src_1, - const bitset_container_t *src_2); - -/* Computes the union of bitsets `src_1' and `src_2' into `dst' and return the - * cardinality. Same as bitset_container_or. */ -int bitset_container_union(const bitset_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* Computes the union of bitsets `src_1' and `src_2' and return the - * cardinality. Same as bitset_container_or_justcard. */ -int bitset_container_union_justcard(const bitset_container_t *src_1, - const bitset_container_t *src_2); - -/* Computes the union of bitsets `src_1' and `src_2' into `dst', but does not - * update the cardinality. Provided to optimize chained operations. */ -int bitset_container_or_nocard(const bitset_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* Computes the intersection of bitsets `src_1' and `src_2' into `dst' and - * return the cardinality. */ -int bitset_container_and(const bitset_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* Computes the intersection of bitsets `src_1' and `src_2' and return the - * cardinality. */ -int bitset_container_and_justcard(const bitset_container_t *src_1, - const bitset_container_t *src_2); - -/* Computes the intersection of bitsets `src_1' and `src_2' into `dst' and - * return the cardinality. Same as bitset_container_and. */ -int bitset_container_intersection(const bitset_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* Computes the intersection of bitsets `src_1' and `src_2' and return the - * cardinality. Same as bitset_container_and_justcard. */ -int bitset_container_intersection_justcard(const bitset_container_t *src_1, - const bitset_container_t *src_2); - -/* Computes the intersection of bitsets `src_1' and `src_2' into `dst', but does - * not update the cardinality. Provided to optimize chained operations. */ -int bitset_container_and_nocard(const bitset_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* Computes the exclusive or of bitsets `src_1' and `src_2' into `dst' and - * return the cardinality. */ -int bitset_container_xor(const bitset_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* Computes the exclusive or of bitsets `src_1' and `src_2' and return the - * cardinality. */ -int bitset_container_xor_justcard(const bitset_container_t *src_1, - const bitset_container_t *src_2); - -/* Computes the exclusive or of bitsets `src_1' and `src_2' into `dst', but does - * not update the cardinality. Provided to optimize chained operations. */ -int bitset_container_xor_nocard(const bitset_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* Computes the and not of bitsets `src_1' and `src_2' into `dst' and return the - * cardinality. */ -int bitset_container_andnot(const bitset_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* Computes the and not of bitsets `src_1' and `src_2' and return the - * cardinality. */ -int bitset_container_andnot_justcard(const bitset_container_t *src_1, - const bitset_container_t *src_2); - -/* Computes the and not or of bitsets `src_1' and `src_2' into `dst', but does - * not update the cardinality. Provided to optimize chained operations. */ -int bitset_container_andnot_nocard(const bitset_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* - * Write out the 16-bit integers contained in this container as a list of 32-bit - * integers using base - * as the starting value (it might be expected that base has zeros in its 16 - * least significant bits). - * The function returns the number of values written. - * The caller is responsible for allocating enough memory in out. - * The out pointer should point to enough memory (the cardinality times 32 - * bits). - */ -int bitset_container_to_uint32_array(void *out, const bitset_container_t *cont, - uint32_t base); - -/* - * Print this container using printf (useful for debugging). - */ -void bitset_container_printf(const bitset_container_t *v); - -/* - * Print this container using printf as a comma-separated list of 32-bit - * integers starting at base. - */ -void bitset_container_printf_as_uint32_array(const bitset_container_t *v, - uint32_t base); - -/** - * Return the serialized size in bytes of a container. - */ -static inline int32_t bitset_container_serialized_size_in_bytes(void) { - return BITSET_CONTAINER_SIZE_IN_WORDS * 8; -} - -/** - * Return the the number of runs. - */ -int bitset_container_number_of_runs(bitset_container_t *b); - -bool bitset_container_iterate(const bitset_container_t *cont, uint32_t base, - roaring_iterator iterator, void *ptr); -bool bitset_container_iterate64(const bitset_container_t *cont, uint32_t base, - roaring_iterator64 iterator, uint64_t high_bits, - void *ptr); - -/** - * Writes the underlying array to buf, outputs how many bytes were written. - * This is meant to be byte-by-byte compatible with the Java and Go versions of - * Roaring. - * The number of bytes written should be - * bitset_container_size_in_bytes(container). - */ -int32_t bitset_container_write(const bitset_container_t *container, char *buf); - -/** - * Reads the instance from buf, outputs how many bytes were read. - * This is meant to be byte-by-byte compatible with the Java and Go versions of - * Roaring. - * The number of bytes read should be bitset_container_size_in_bytes(container). - * You need to provide the (known) cardinality. - */ -int32_t bitset_container_read(int32_t cardinality, - bitset_container_t *container, const char *buf); -/** - * Return the serialized size in bytes of a container (see - * bitset_container_write). - * This is meant to be compatible with the Java and Go versions of Roaring and - * assumes - * that the cardinality of the container is already known or can be computed. - */ -static inline int32_t bitset_container_size_in_bytes( - const bitset_container_t *container) { - (void)container; - return BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); -} - -/** - * Return true if the two containers have the same content. - */ -bool bitset_container_equals(const bitset_container_t *container1, - const bitset_container_t *container2); - -/** -* Return true if container1 is a subset of container2. -*/ -bool bitset_container_is_subset(const bitset_container_t *container1, - const bitset_container_t *container2); - -/** - * If the element of given rank is in this container, supposing that the first - * element has rank start_rank, then the function returns true and sets element - * accordingly. - * Otherwise, it returns false and update start_rank. - */ -bool bitset_container_select(const bitset_container_t *container, - uint32_t *start_rank, uint32_t rank, - uint32_t *element); - -/* Returns the smallest value (assumes not empty) */ -uint16_t bitset_container_minimum(const bitset_container_t *container); - -/* Returns the largest value (assumes not empty) */ -uint16_t bitset_container_maximum(const bitset_container_t *container); - -/* Returns the number of values equal or smaller than x */ -int bitset_container_rank(const bitset_container_t *container, uint16_t x); - -/* Returns the index of the first value equal or larger than x, or -1 */ -int bitset_container_index_equalorlarger(const bitset_container_t *container, uint16_t x); -#endif /* INCLUDE_CONTAINERS_BITSET_H_ */ -/* end file include/roaring/containers/bitset.h */ -/* begin file include/roaring/containers/run.h */ -/* - * run.h - * - */ - -#ifndef INCLUDE_CONTAINERS_RUN_H_ -#define INCLUDE_CONTAINERS_RUN_H_ - -#include -#include -#include -#include - - -/* struct rle16_s - run length pair - * - * @value: start position of the run - * @length: length of the run is `length + 1` - * - * An RLE pair {v, l} would represent the integers between the interval - * [v, v+l+1], e.g. {3, 2} = [3, 4, 5]. - */ -struct rle16_s { - uint16_t value; - uint16_t length; -}; - -typedef struct rle16_s rle16_t; - -/* struct run_container_s - run container bitmap - * - * @n_runs: number of rle_t pairs in `runs`. - * @capacity: capacity in rle_t pairs `runs` can hold. - * @runs: pairs of rle_t. - * - */ -struct run_container_s { - int32_t n_runs; - int32_t capacity; - rle16_t *runs; -}; - -typedef struct run_container_s run_container_t; - -/* Create a new run container. Return NULL in case of failure. */ -run_container_t *run_container_create(void); - -/* Create a new run container with given capacity. Return NULL in case of - * failure. */ -run_container_t *run_container_create_given_capacity(int32_t size); - -/* - * Shrink the capacity to the actual size, return the number of bytes saved. - */ -int run_container_shrink_to_fit(run_container_t *src); - -/* Free memory owned by `run'. */ -void run_container_free(run_container_t *run); - -/* Duplicate container */ -run_container_t *run_container_clone(const run_container_t *src); - -int32_t run_container_serialize(const run_container_t *container, - char *buf) WARN_UNUSED; - -uint32_t run_container_serialization_len(const run_container_t *container); - -void *run_container_deserialize(const char *buf, size_t buf_len); - -/* - * Effectively deletes the value at index index, repacking data. - */ -static inline void recoverRoomAtIndex(run_container_t *run, uint16_t index) { - memmove(run->runs + index, run->runs + (1 + index), - (run->n_runs - index - 1) * sizeof(rle16_t)); - run->n_runs--; -} - -/** - * Good old binary search through rle data - */ -inline int32_t interleavedBinarySearch(const rle16_t *array, int32_t lenarray, - uint16_t ikey) { - int32_t low = 0; - int32_t high = lenarray - 1; - while (low <= high) { - int32_t middleIndex = (low + high) >> 1; - uint16_t middleValue = array[middleIndex].value; - if (middleValue < ikey) { - low = middleIndex + 1; - } else if (middleValue > ikey) { - high = middleIndex - 1; - } else { - return middleIndex; - } - } - return -(low + 1); -} - -/* - * Returns index of the run which contains $ikey - */ -static inline int32_t rle16_find_run(const rle16_t *array, int32_t lenarray, - uint16_t ikey) { - int32_t low = 0; - int32_t high = lenarray - 1; - while (low <= high) { - int32_t middleIndex = (low + high) >> 1; - uint16_t min = array[middleIndex].value; - uint16_t max = array[middleIndex].value + array[middleIndex].length; - if (ikey > max) { - low = middleIndex + 1; - } else if (ikey < min) { - high = middleIndex - 1; - } else { - return middleIndex; - } - } - return -(low + 1); -} - - -/** - * Returns number of runs which can'be be merged with the key because they - * are less than the key. - * Note that [5,6,7,8] can be merged with the key 9 and won't be counted. - */ -static inline int32_t rle16_count_less(const rle16_t* array, int32_t lenarray, - uint16_t key) { - if (lenarray == 0) return 0; - int32_t low = 0; - int32_t high = lenarray - 1; - while (low <= high) { - int32_t middleIndex = (low + high) >> 1; - uint16_t min_value = array[middleIndex].value; - uint16_t max_value = array[middleIndex].value + array[middleIndex].length; - if (max_value + UINT32_C(1) < key) { // uint32 arithmetic - low = middleIndex + 1; - } else if (key < min_value) { - high = middleIndex - 1; - } else { - return middleIndex; - } - } - return low; -} - -static inline int32_t rle16_count_greater(const rle16_t* array, int32_t lenarray, - uint16_t key) { - if (lenarray == 0) return 0; - int32_t low = 0; - int32_t high = lenarray - 1; - while (low <= high) { - int32_t middleIndex = (low + high) >> 1; - uint16_t min_value = array[middleIndex].value; - uint16_t max_value = array[middleIndex].value + array[middleIndex].length; - if (max_value < key) { - low = middleIndex + 1; - } else if (key + UINT32_C(1) < min_value) { // uint32 arithmetic - high = middleIndex - 1; - } else { - return lenarray - (middleIndex + 1); - } - } - return lenarray - low; -} - -/** - * increase capacity to at least min. Whether the - * existing data needs to be copied over depends on copy. If "copy" is false, - * then the new content will be uninitialized, otherwise a copy is made. - */ -void run_container_grow(run_container_t *run, int32_t min, bool copy); - -/** - * Moves the data so that we can write data at index - */ -static inline void makeRoomAtIndex(run_container_t *run, uint16_t index) { - /* This function calls realloc + memmove sequentially to move by one index. - * Potentially copying twice the array. - */ - if (run->n_runs + 1 > run->capacity) - run_container_grow(run, run->n_runs + 1, true); - memmove(run->runs + 1 + index, run->runs + index, - (run->n_runs - index) * sizeof(rle16_t)); - run->n_runs++; -} - -/* Add `pos' to `run'. Returns true if `pos' was not present. */ -bool run_container_add(run_container_t *run, uint16_t pos); - -/* Remove `pos' from `run'. Returns true if `pos' was present. */ -static inline bool run_container_remove(run_container_t *run, uint16_t pos) { - int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos); - if (index >= 0) { - int32_t le = run->runs[index].length; - if (le == 0) { - recoverRoomAtIndex(run, (uint16_t)index); - } else { - run->runs[index].value++; - run->runs[index].length--; - } - return true; - } - index = -index - 2; // points to preceding value, possibly -1 - if (index >= 0) { // possible match - int32_t offset = pos - run->runs[index].value; - int32_t le = run->runs[index].length; - if (offset < le) { - // need to break in two - run->runs[index].length = (uint16_t)(offset - 1); - // need to insert - uint16_t newvalue = pos + 1; - int32_t newlength = le - offset - 1; - makeRoomAtIndex(run, (uint16_t)(index + 1)); - run->runs[index + 1].value = newvalue; - run->runs[index + 1].length = (uint16_t)newlength; - return true; - - } else if (offset == le) { - run->runs[index].length--; - return true; - } - } - // no match - return false; -} - -/* Check whether `pos' is present in `run'. */ -inline bool run_container_contains(const run_container_t *run, uint16_t pos) { - int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos); - if (index >= 0) return true; - index = -index - 2; // points to preceding value, possibly -1 - if (index != -1) { // possible match - int32_t offset = pos - run->runs[index].value; - int32_t le = run->runs[index].length; - if (offset <= le) return true; - } - return false; -} - -/* -* Check whether all positions in a range of positions from pos_start (included) -* to pos_end (excluded) is present in `run'. -*/ -static inline bool run_container_contains_range(const run_container_t *run, - uint32_t pos_start, uint32_t pos_end) { - uint32_t count = 0; - int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos_start); - if (index < 0) { - index = -index - 2; - if ((index == -1) || ((pos_start - run->runs[index].value) > run->runs[index].length)){ - return false; - } - } - for (int32_t i = index; i < run->n_runs; ++i) { - const uint32_t stop = run->runs[i].value + run->runs[i].length; - if (run->runs[i].value >= pos_end) break; - if (stop >= pos_end) { - count += (((pos_end - run->runs[i].value) > 0) ? (pos_end - run->runs[i].value) : 0); - break; - } - const uint32_t min = (stop - pos_start) > 0 ? (stop - pos_start) : 0; - count += (min < run->runs[i].length) ? min : run->runs[i].length; - } - return count >= (pos_end - pos_start - 1); -} - -#ifdef USEAVX - -/* Get the cardinality of `run'. Requires an actual computation. */ -static inline int run_container_cardinality(const run_container_t *run) { - const int32_t n_runs = run->n_runs; - const rle16_t *runs = run->runs; - - /* by initializing with n_runs, we omit counting the +1 for each pair. */ - int sum = n_runs; - int32_t k = 0; - const int32_t step = sizeof(__m256i) / sizeof(rle16_t); - if (n_runs > step) { - __m256i total = _mm256_setzero_si256(); - for (; k + step <= n_runs; k += step) { - __m256i ymm1 = _mm256_lddqu_si256((const __m256i *)(runs + k)); - __m256i justlengths = _mm256_srli_epi32(ymm1, 16); - total = _mm256_add_epi32(total, justlengths); - } - // a store might be faster than extract? - uint32_t buffer[sizeof(__m256i) / sizeof(rle16_t)]; - _mm256_storeu_si256((__m256i *)buffer, total); - sum += (buffer[0] + buffer[1]) + (buffer[2] + buffer[3]) + - (buffer[4] + buffer[5]) + (buffer[6] + buffer[7]); - } - for (; k < n_runs; ++k) { - sum += runs[k].length; - } - - return sum; -} - -#else - -/* Get the cardinality of `run'. Requires an actual computation. */ -static inline int run_container_cardinality(const run_container_t *run) { - const int32_t n_runs = run->n_runs; - const rle16_t *runs = run->runs; - - /* by initializing with n_runs, we omit counting the +1 for each pair. */ - int sum = n_runs; - for (int k = 0; k < n_runs; ++k) { - sum += runs[k].length; - } - - return sum; -} -#endif - -/* Card > 0?, see run_container_empty for the reverse */ -static inline bool run_container_nonzero_cardinality( - const run_container_t *run) { - return run->n_runs > 0; // runs never empty -} - -/* Card == 0?, see run_container_nonzero_cardinality for the reverse */ -static inline bool run_container_empty( - const run_container_t *run) { - return run->n_runs == 0; // runs never empty -} - - - -/* Copy one container into another. We assume that they are distinct. */ -void run_container_copy(const run_container_t *src, run_container_t *dst); - -/* Set the cardinality to zero (does not release memory). */ -static inline void run_container_clear(run_container_t *run) { - run->n_runs = 0; -} - -/** - * Append run described by vl to the run container, possibly merging. - * It is assumed that the run would be inserted at the end of the container, no - * check is made. - * It is assumed that the run container has the necessary capacity: caller is - * responsible for checking memory capacity. - * - * - * This is not a safe function, it is meant for performance: use with care. - */ -static inline void run_container_append(run_container_t *run, rle16_t vl, - rle16_t *previousrl) { - const uint32_t previousend = previousrl->value + previousrl->length; - if (vl.value > previousend + 1) { // we add a new one - run->runs[run->n_runs] = vl; - run->n_runs++; - *previousrl = vl; - } else { - uint32_t newend = vl.value + vl.length + UINT32_C(1); - if (newend > previousend) { // we merge - previousrl->length = (uint16_t)(newend - 1 - previousrl->value); - run->runs[run->n_runs - 1] = *previousrl; - } - } -} - -/** - * Like run_container_append but it is assumed that the content of run is empty. - */ -static inline rle16_t run_container_append_first(run_container_t *run, - rle16_t vl) { - run->runs[run->n_runs] = vl; - run->n_runs++; - return vl; -} - -/** - * append a single value given by val to the run container, possibly merging. - * It is assumed that the value would be inserted at the end of the container, - * no check is made. - * It is assumed that the run container has the necessary capacity: caller is - * responsible for checking memory capacity. - * - * This is not a safe function, it is meant for performance: use with care. - */ -static inline void run_container_append_value(run_container_t *run, - uint16_t val, - rle16_t *previousrl) { - const uint32_t previousend = previousrl->value + previousrl->length; - if (val > previousend + 1) { // we add a new one - //*previousrl = (rle16_t){.value = val, .length = 0};// requires C99 - previousrl->value = val; - previousrl->length = 0; - - run->runs[run->n_runs] = *previousrl; - run->n_runs++; - } else if (val == previousend + 1) { // we merge - previousrl->length++; - run->runs[run->n_runs - 1] = *previousrl; - } -} - -/** - * Like run_container_append_value but it is assumed that the content of run is - * empty. - */ -static inline rle16_t run_container_append_value_first(run_container_t *run, - uint16_t val) { - // rle16_t newrle = (rle16_t){.value = val, .length = 0};// requires C99 - rle16_t newrle; - newrle.value = val; - newrle.length = 0; - - run->runs[run->n_runs] = newrle; - run->n_runs++; - return newrle; -} - -/* Check whether the container spans the whole chunk (cardinality = 1<<16). - * This check can be done in constant time (inexpensive). */ -static inline bool run_container_is_full(const run_container_t *run) { - rle16_t vl = run->runs[0]; - return (run->n_runs == 1) && (vl.value == 0) && (vl.length == 0xFFFF); -} - -/* Compute the union of `src_1' and `src_2' and write the result to `dst' - * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ -void run_container_union(const run_container_t *src_1, - const run_container_t *src_2, run_container_t *dst); - -/* Compute the union of `src_1' and `src_2' and write the result to `src_1' */ -void run_container_union_inplace(run_container_t *src_1, - const run_container_t *src_2); - -/* Compute the intersection of src_1 and src_2 and write the result to - * dst. It is assumed that dst is distinct from both src_1 and src_2. */ -void run_container_intersection(const run_container_t *src_1, - const run_container_t *src_2, - run_container_t *dst); - -/* Compute the size of the intersection of src_1 and src_2 . */ -int run_container_intersection_cardinality(const run_container_t *src_1, - const run_container_t *src_2); - -/* Check whether src_1 and src_2 intersect. */ -bool run_container_intersect(const run_container_t *src_1, - const run_container_t *src_2); - -/* Compute the symmetric difference of `src_1' and `src_2' and write the result - * to `dst' - * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ -void run_container_xor(const run_container_t *src_1, - const run_container_t *src_2, run_container_t *dst); - -/* - * Write out the 16-bit integers contained in this container as a list of 32-bit - * integers using base - * as the starting value (it might be expected that base has zeros in its 16 - * least significant bits). - * The function returns the number of values written. - * The caller is responsible for allocating enough memory in out. - */ -int run_container_to_uint32_array(void *vout, const run_container_t *cont, - uint32_t base); - -/* - * Print this container using printf (useful for debugging). - */ -void run_container_printf(const run_container_t *v); - -/* - * Print this container using printf as a comma-separated list of 32-bit - * integers starting at base. - */ -void run_container_printf_as_uint32_array(const run_container_t *v, - uint32_t base); - -/** - * Return the serialized size in bytes of a container having "num_runs" runs. - */ -static inline int32_t run_container_serialized_size_in_bytes(int32_t num_runs) { - return sizeof(uint16_t) + - sizeof(rle16_t) * num_runs; // each run requires 2 2-byte entries. -} - -bool run_container_iterate(const run_container_t *cont, uint32_t base, - roaring_iterator iterator, void *ptr); -bool run_container_iterate64(const run_container_t *cont, uint32_t base, - roaring_iterator64 iterator, uint64_t high_bits, - void *ptr); - -/** - * Writes the underlying array to buf, outputs how many bytes were written. - * This is meant to be byte-by-byte compatible with the Java and Go versions of - * Roaring. - * The number of bytes written should be run_container_size_in_bytes(container). - */ -int32_t run_container_write(const run_container_t *container, char *buf); - -/** - * Reads the instance from buf, outputs how many bytes were read. - * This is meant to be byte-by-byte compatible with the Java and Go versions of - * Roaring. - * The number of bytes read should be bitset_container_size_in_bytes(container). - * The cardinality parameter is provided for consistency with other containers, - * but - * it might be effectively ignored.. - */ -int32_t run_container_read(int32_t cardinality, run_container_t *container, - const char *buf); - -/** - * Return the serialized size in bytes of a container (see run_container_write). - * This is meant to be compatible with the Java and Go versions of Roaring. - */ -static inline int32_t run_container_size_in_bytes( - const run_container_t *container) { - return run_container_serialized_size_in_bytes(container->n_runs); -} - -/** - * Return true if the two containers have the same content. - */ -static inline bool run_container_equals(const run_container_t *container1, - const run_container_t *container2) { - if (container1->n_runs != container2->n_runs) { - return false; - } - return memequals(container1->runs, container2->runs, - container1->n_runs * sizeof(rle16_t)); -} - -/** -* Return true if container1 is a subset of container2. -*/ -bool run_container_is_subset(const run_container_t *container1, - const run_container_t *container2); - -/** - * Used in a start-finish scan that appends segments, for XOR and NOT - */ - -void run_container_smart_append_exclusive(run_container_t *src, - const uint16_t start, - const uint16_t length); - -/** -* The new container consists of a single run [start,stop). -* It is required that stop>start, the caller is responsability for this check. -* It is required that stop <= (1<<16), the caller is responsability for this check. -* The cardinality of the created container is stop - start. -* Returns NULL on failure -*/ -static inline run_container_t *run_container_create_range(uint32_t start, - uint32_t stop) { - run_container_t *rc = run_container_create_given_capacity(1); - if (rc) { - rle16_t r; - r.value = (uint16_t)start; - r.length = (uint16_t)(stop - start - 1); - run_container_append_first(rc, r); - } - return rc; -} - -/** - * If the element of given rank is in this container, supposing that the first - * element has rank start_rank, then the function returns true and sets element - * accordingly. - * Otherwise, it returns false and update start_rank. - */ -bool run_container_select(const run_container_t *container, - uint32_t *start_rank, uint32_t rank, - uint32_t *element); - -/* Compute the difference of src_1 and src_2 and write the result to - * dst. It is assumed that dst is distinct from both src_1 and src_2. */ - -void run_container_andnot(const run_container_t *src_1, - const run_container_t *src_2, run_container_t *dst); - -/* Returns the smallest value (assumes not empty) */ -inline uint16_t run_container_minimum(const run_container_t *run) { - if (run->n_runs == 0) return 0; - return run->runs[0].value; -} - -/* Returns the largest value (assumes not empty) */ -inline uint16_t run_container_maximum(const run_container_t *run) { - if (run->n_runs == 0) return 0; - return run->runs[run->n_runs - 1].value + run->runs[run->n_runs - 1].length; -} - -/* Returns the number of values equal or smaller than x */ -int run_container_rank(const run_container_t *arr, uint16_t x); - -/* Returns the index of the first run containing a value at least as large as x, or -1 */ -inline int run_container_index_equalorlarger(const run_container_t *arr, uint16_t x) { - int32_t index = interleavedBinarySearch(arr->runs, arr->n_runs, x); - if (index >= 0) return index; - index = -index - 2; // points to preceding run, possibly -1 - if (index != -1) { // possible match - int32_t offset = x - arr->runs[index].value; - int32_t le = arr->runs[index].length; - if (offset <= le) return index; - } - index += 1; - if(index < arr->n_runs) { - return index; - } - return -1; -} - -/* - * Add all values in range [min, max] using hint. - */ -static inline void run_container_add_range_nruns(run_container_t* run, - uint32_t min, uint32_t max, - int32_t nruns_less, - int32_t nruns_greater) { - int32_t nruns_common = run->n_runs - nruns_less - nruns_greater; - if (nruns_common == 0) { - makeRoomAtIndex(run, nruns_less); - run->runs[nruns_less].value = min; - run->runs[nruns_less].length = max - min; - } else { - uint32_t common_min = run->runs[nruns_less].value; - uint32_t common_max = run->runs[nruns_less + nruns_common - 1].value + - run->runs[nruns_less + nruns_common - 1].length; - uint32_t result_min = (common_min < min) ? common_min : min; - uint32_t result_max = (common_max > max) ? common_max : max; - - run->runs[nruns_less].value = result_min; - run->runs[nruns_less].length = result_max - result_min; - - memmove(&(run->runs[nruns_less + 1]), - &(run->runs[run->n_runs - nruns_greater]), - nruns_greater*sizeof(rle16_t)); - run->n_runs = nruns_less + 1 + nruns_greater; - } -} - -/** - * Add all values in range [min, max] - */ -static inline void run_container_add_range(run_container_t* run, - uint32_t min, uint32_t max) { - int32_t nruns_greater = rle16_count_greater(run->runs, run->n_runs, max); - int32_t nruns_less = rle16_count_less(run->runs, run->n_runs - nruns_greater, min); - run_container_add_range_nruns(run, min, max, nruns_less, nruns_greater); -} - -/** - * Shifts last $count elements either left (distance < 0) or right (distance > 0) - */ -static inline void run_container_shift_tail(run_container_t* run, - int32_t count, int32_t distance) { - if (distance > 0) { - if (run->capacity < count+distance) { - run_container_grow(run, count+distance, true); - } - } - int32_t srcpos = run->n_runs - count; - int32_t dstpos = srcpos + distance; - memmove(&(run->runs[dstpos]), &(run->runs[srcpos]), sizeof(rle16_t) * count); - run->n_runs += distance; -} - -/** - * Remove all elements in range [min, max] - */ -static inline void run_container_remove_range(run_container_t *run, uint32_t min, uint32_t max) { - int32_t first = rle16_find_run(run->runs, run->n_runs, min); - int32_t last = rle16_find_run(run->runs, run->n_runs, max); - - if (first >= 0 && min > run->runs[first].value && - max < ((uint32_t)run->runs[first].value + (uint32_t)run->runs[first].length)) { - // split this run into two adjacent runs - - // right subinterval - makeRoomAtIndex(run, first+1); - run->runs[first+1].value = max + 1; - run->runs[first+1].length = (run->runs[first].value + run->runs[first].length) - (max + 1); - - // left subinterval - run->runs[first].length = (min - 1) - run->runs[first].value; - - return; - } - - // update left-most partial run - if (first >= 0) { - if (min > run->runs[first].value) { - run->runs[first].length = (min - 1) - run->runs[first].value; - first++; - } - } else { - first = -first-1; - } - - // update right-most run - if (last >= 0) { - uint16_t run_max = run->runs[last].value + run->runs[last].length; - if (run_max > max) { - run->runs[last].value = max + 1; - run->runs[last].length = run_max - (max + 1); - last--; - } - } else { - last = (-last-1) - 1; - } - - // remove intermediate runs - if (first <= last) { - run_container_shift_tail(run, run->n_runs - (last+1), -(last-first+1)); - } -} - - -#endif /* INCLUDE_CONTAINERS_RUN_H_ */ -/* end file include/roaring/containers/run.h */ -/* begin file include/roaring/containers/convert.h */ -/* - * convert.h - * - */ - -#ifndef INCLUDE_CONTAINERS_CONVERT_H_ -#define INCLUDE_CONTAINERS_CONVERT_H_ - - -/* Convert an array into a bitset. The input container is not freed or modified. - */ -bitset_container_t *bitset_container_from_array(const array_container_t *arr); - -/* Convert a run into a bitset. The input container is not freed or modified. */ -bitset_container_t *bitset_container_from_run(const run_container_t *arr); - -/* Convert a run into an array. The input container is not freed or modified. */ -array_container_t *array_container_from_run(const run_container_t *arr); - -/* Convert a bitset into an array. The input container is not freed or modified. - */ -array_container_t *array_container_from_bitset(const bitset_container_t *bits); - -/* Convert an array into a run. The input container is not freed or modified. - */ -run_container_t *run_container_from_array(const array_container_t *c); - -/* convert a run into either an array or a bitset - * might free the container. This does not free the input run container. */ -void *convert_to_bitset_or_array_container(run_container_t *r, int32_t card, - uint8_t *resulttype); - -/* convert containers to and from runcontainers, as is most space efficient. - * The container might be freed. */ -void *convert_run_optimize(void *c, uint8_t typecode_original, - uint8_t *typecode_after); - -/* converts a run container to either an array or a bitset, IF it saves space. - */ -/* If a conversion occurs, the caller is responsible to free the original - * container and - * he becomes reponsible to free the new one. */ -void *convert_run_to_efficient_container(run_container_t *c, - uint8_t *typecode_after); -// like convert_run_to_efficient_container but frees the old result if needed -void *convert_run_to_efficient_container_and_free(run_container_t *c, - uint8_t *typecode_after); - -/** - * Create new bitset container which is a union of run container and - * range [min, max]. Caller is responsible for freeing run container. - */ -bitset_container_t *bitset_container_from_run_range(const run_container_t *run, - uint32_t min, uint32_t max); - -#endif /* INCLUDE_CONTAINERS_CONVERT_H_ */ -/* end file include/roaring/containers/convert.h */ -/* begin file include/roaring/containers/mixed_equal.h */ -/* - * mixed_equal.h - * - */ - -#ifndef CONTAINERS_MIXED_EQUAL_H_ -#define CONTAINERS_MIXED_EQUAL_H_ - - -/** - * Return true if the two containers have the same content. - */ -bool array_container_equal_bitset(const array_container_t* container1, - const bitset_container_t* container2); - -/** - * Return true if the two containers have the same content. - */ -bool run_container_equals_array(const run_container_t* container1, - const array_container_t* container2); -/** - * Return true if the two containers have the same content. - */ -bool run_container_equals_bitset(const run_container_t* container1, - const bitset_container_t* container2); - -#endif /* CONTAINERS_MIXED_EQUAL_H_ */ -/* end file include/roaring/containers/mixed_equal.h */ -/* begin file include/roaring/containers/mixed_subset.h */ -/* - * mixed_subset.h - * - */ - -#ifndef CONTAINERS_MIXED_SUBSET_H_ -#define CONTAINERS_MIXED_SUBSET_H_ - - -/** - * Return true if container1 is a subset of container2. - */ -bool array_container_is_subset_bitset(const array_container_t* container1, - const bitset_container_t* container2); - -/** -* Return true if container1 is a subset of container2. - */ -bool run_container_is_subset_array(const run_container_t* container1, - const array_container_t* container2); - -/** -* Return true if container1 is a subset of container2. - */ -bool array_container_is_subset_run(const array_container_t* container1, - const run_container_t* container2); - -/** -* Return true if container1 is a subset of container2. - */ -bool run_container_is_subset_bitset(const run_container_t* container1, - const bitset_container_t* container2); - -/** -* Return true if container1 is a subset of container2. -*/ -bool bitset_container_is_subset_run(const bitset_container_t* container1, - const run_container_t* container2); - -#endif /* CONTAINERS_MIXED_SUBSET_H_ */ -/* end file include/roaring/containers/mixed_subset.h */ -/* begin file include/roaring/containers/mixed_andnot.h */ -/* - * mixed_andnot.h - */ -#ifndef INCLUDE_CONTAINERS_MIXED_ANDNOT_H_ -#define INCLUDE_CONTAINERS_MIXED_ANDNOT_H_ - - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst, a valid array container that could be the same as dst.*/ -void array_bitset_container_andnot(const array_container_t *src_1, - const bitset_container_t *src_2, - array_container_t *dst); - -/* Compute the andnot of src_1 and src_2 and write the result to - * src_1 */ - -void array_bitset_container_iandnot(array_container_t *src_1, - const bitset_container_t *src_2); - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst, which does not initially have a valid container. - * Return true for a bitset result; false for array - */ - -bool bitset_array_container_andnot(const bitset_container_t *src_1, - const array_container_t *src_2, void **dst); - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst (which has no container initially). It will modify src_1 - * to be dst if the result is a bitset. Otherwise, it will - * free src_1 and dst will be a new array container. In both - * cases, the caller is responsible for deallocating dst. - * Returns true iff dst is a bitset */ - -bool bitset_array_container_iandnot(bitset_container_t *src_1, - const array_container_t *src_2, void **dst); - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst. Result may be either a bitset or an array container - * (returns "result is bitset"). dst does not initially have - * any container, but becomes either a bitset container (return - * result true) or an array container. - */ - -bool run_bitset_container_andnot(const run_container_t *src_1, - const bitset_container_t *src_2, void **dst); - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst. Result may be either a bitset or an array container - * (returns "result is bitset"). dst does not initially have - * any container, but becomes either a bitset container (return - * result true) or an array container. - */ - -bool run_bitset_container_iandnot(run_container_t *src_1, - const bitset_container_t *src_2, void **dst); - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst. Result may be either a bitset or an array container - * (returns "result is bitset"). dst does not initially have - * any container, but becomes either a bitset container (return - * result true) or an array container. - */ - -bool bitset_run_container_andnot(const bitset_container_t *src_1, - const run_container_t *src_2, void **dst); - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst (which has no container initially). It will modify src_1 - * to be dst if the result is a bitset. Otherwise, it will - * free src_1 and dst will be a new array container. In both - * cases, the caller is responsible for deallocating dst. - * Returns true iff dst is a bitset */ - -bool bitset_run_container_iandnot(bitset_container_t *src_1, - const run_container_t *src_2, void **dst); - -/* dst does not indicate a valid container initially. Eventually it - * can become any type of container. - */ - -int run_array_container_andnot(const run_container_t *src_1, - const array_container_t *src_2, void **dst); - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst (which has no container initially). It will modify src_1 - * to be dst if the result is a bitset. Otherwise, it will - * free src_1 and dst will be a new array container. In both - * cases, the caller is responsible for deallocating dst. - * Returns true iff dst is a bitset */ - -int run_array_container_iandnot(run_container_t *src_1, - const array_container_t *src_2, void **dst); - -/* dst must be a valid array container, allowed to be src_1 */ - -void array_run_container_andnot(const array_container_t *src_1, - const run_container_t *src_2, - array_container_t *dst); - -/* dst does not indicate a valid container initially. Eventually it - * can become any kind of container. - */ - -void array_run_container_iandnot(array_container_t *src_1, - const run_container_t *src_2); - -/* dst does not indicate a valid container initially. Eventually it - * can become any kind of container. - */ - -int run_run_container_andnot(const run_container_t *src_1, - const run_container_t *src_2, void **dst); - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst (which has no container initially). It will modify src_1 - * to be dst if the result is a bitset. Otherwise, it will - * free src_1 and dst will be a new array container. In both - * cases, the caller is responsible for deallocating dst. - * Returns true iff dst is a bitset */ - -int run_run_container_iandnot(run_container_t *src_1, - const run_container_t *src_2, void **dst); - -/* - * dst is a valid array container and may be the same as src_1 - */ - -void array_array_container_andnot(const array_container_t *src_1, - const array_container_t *src_2, - array_container_t *dst); - -/* inplace array-array andnot will always be able to reuse the space of - * src_1 */ -void array_array_container_iandnot(array_container_t *src_1, - const array_container_t *src_2); - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst (which has no container initially). Return value is - * "dst is a bitset" - */ - -bool bitset_bitset_container_andnot(const bitset_container_t *src_1, - const bitset_container_t *src_2, - void **dst); - -/* Compute the andnot of src_1 and src_2 and write the result to - * dst (which has no container initially). It will modify src_1 - * to be dst if the result is a bitset. Otherwise, it will - * free src_1 and dst will be a new array container. In both - * cases, the caller is responsible for deallocating dst. - * Returns true iff dst is a bitset */ - -bool bitset_bitset_container_iandnot(bitset_container_t *src_1, - const bitset_container_t *src_2, - void **dst); -#endif -/* end file include/roaring/containers/mixed_andnot.h */ -/* begin file include/roaring/containers/mixed_intersection.h */ -/* - * mixed_intersection.h - * - */ - -#ifndef INCLUDE_CONTAINERS_MIXED_INTERSECTION_H_ -#define INCLUDE_CONTAINERS_MIXED_INTERSECTION_H_ - -/* These functions appear to exclude cases where the - * inputs have the same type and the output is guaranteed - * to have the same type as the inputs. Eg, array intersection - */ - - -/* Compute the intersection of src_1 and src_2 and write the result to - * dst. It is allowed for dst to be equal to src_1. We assume that dst is a - * valid container. */ -void array_bitset_container_intersection(const array_container_t *src_1, - const bitset_container_t *src_2, - array_container_t *dst); - -/* Compute the size of the intersection of src_1 and src_2. */ -int array_bitset_container_intersection_cardinality( - const array_container_t *src_1, const bitset_container_t *src_2); - - - -/* Checking whether src_1 and src_2 intersect. */ -bool array_bitset_container_intersect(const array_container_t *src_1, - const bitset_container_t *src_2); - -/* - * Compute the intersection between src_1 and src_2 and write the result - * to *dst. If the return function is true, the result is a bitset_container_t - * otherwise is a array_container_t. We assume that dst is not pre-allocated. In - * case of failure, *dst will be NULL. - */ -bool bitset_bitset_container_intersection(const bitset_container_t *src_1, - const bitset_container_t *src_2, - void **dst); - -/* Compute the intersection between src_1 and src_2 and write the result to - * dst. It is allowed for dst to be equal to src_1. We assume that dst is a - * valid container. */ -void array_run_container_intersection(const array_container_t *src_1, - const run_container_t *src_2, - array_container_t *dst); - -/* Compute the intersection between src_1 and src_2 and write the result to - * *dst. If the result is true then the result is a bitset_container_t - * otherwise is a array_container_t. - * If *dst == src_2, then an in-place intersection is attempted - **/ -bool run_bitset_container_intersection(const run_container_t *src_1, - const bitset_container_t *src_2, - void **dst); - -/* Compute the size of the intersection between src_1 and src_2 . */ -int array_run_container_intersection_cardinality(const array_container_t *src_1, - const run_container_t *src_2); - -/* Compute the size of the intersection between src_1 and src_2 - **/ -int run_bitset_container_intersection_cardinality(const run_container_t *src_1, - const bitset_container_t *src_2); - - -/* Check that src_1 and src_2 intersect. */ -bool array_run_container_intersect(const array_container_t *src_1, - const run_container_t *src_2); - -/* Check that src_1 and src_2 intersect. - **/ -bool run_bitset_container_intersect(const run_container_t *src_1, - const bitset_container_t *src_2); - -/* - * Same as bitset_bitset_container_intersection except that if the output is to - * be a - * bitset_container_t, then src_1 is modified and no allocation is made. - * If the output is to be an array_container_t, then caller is responsible - * to free the container. - * In all cases, the result is in *dst. - */ -bool bitset_bitset_container_intersection_inplace( - bitset_container_t *src_1, const bitset_container_t *src_2, void **dst); - -#endif /* INCLUDE_CONTAINERS_MIXED_INTERSECTION_H_ */ -/* end file include/roaring/containers/mixed_intersection.h */ -/* begin file include/roaring/containers/mixed_negation.h */ -/* - * mixed_negation.h - * - */ - -#ifndef INCLUDE_CONTAINERS_MIXED_NEGATION_H_ -#define INCLUDE_CONTAINERS_MIXED_NEGATION_H_ - - -/* Negation across the entire range of the container. - * Compute the negation of src and write the result - * to *dst. The complement of a - * sufficiently sparse set will always be dense and a hence a bitmap - * We assume that dst is pre-allocated and a valid bitset container - * There can be no in-place version. - */ -void array_container_negation(const array_container_t *src, - bitset_container_t *dst); - -/* Negation across the entire range of the container - * Compute the negation of src and write the result - * to *dst. A true return value indicates a bitset result, - * otherwise the result is an array container. - * We assume that dst is not pre-allocated. In - * case of failure, *dst will be NULL. - */ -bool bitset_container_negation(const bitset_container_t *src, void **dst); - -/* inplace version */ -/* - * Same as bitset_container_negation except that if the output is to - * be a - * bitset_container_t, then src is modified and no allocation is made. - * If the output is to be an array_container_t, then caller is responsible - * to free the container. - * In all cases, the result is in *dst. - */ -bool bitset_container_negation_inplace(bitset_container_t *src, void **dst); - -/* Negation across the entire range of container - * Compute the negation of src and write the result - * to *dst. - * Return values are the *_TYPECODES as defined * in containers.h - * We assume that dst is not pre-allocated. In - * case of failure, *dst will be NULL. - */ -int run_container_negation(const run_container_t *src, void **dst); - -/* - * Same as run_container_negation except that if the output is to - * be a - * run_container_t, and has the capacity to hold the result, - * then src is modified and no allocation is made. - * In all cases, the result is in *dst. - */ -int run_container_negation_inplace(run_container_t *src, void **dst); - -/* Negation across a range of the container. - * Compute the negation of src and write the result - * to *dst. Returns true if the result is a bitset container - * and false for an array container. *dst is not preallocated. - */ -bool array_container_negation_range(const array_container_t *src, - const int range_start, const int range_end, - void **dst); - -/* Even when the result would fit, it is unclear how to make an - * inplace version without inefficient copying. Thus this routine - * may be a wrapper for the non-in-place version - */ -bool array_container_negation_range_inplace(array_container_t *src, - const int range_start, - const int range_end, void **dst); - -/* Negation across a range of the container - * Compute the negation of src and write the result - * to *dst. A true return value indicates a bitset result, - * otherwise the result is an array container. - * We assume that dst is not pre-allocated. In - * case of failure, *dst will be NULL. - */ -bool bitset_container_negation_range(const bitset_container_t *src, - const int range_start, const int range_end, - void **dst); - -/* inplace version */ -/* - * Same as bitset_container_negation except that if the output is to - * be a - * bitset_container_t, then src is modified and no allocation is made. - * If the output is to be an array_container_t, then caller is responsible - * to free the container. - * In all cases, the result is in *dst. - */ -bool bitset_container_negation_range_inplace(bitset_container_t *src, - const int range_start, - const int range_end, void **dst); - -/* Negation across a range of container - * Compute the negation of src and write the result - * to *dst. Return values are the *_TYPECODES as defined * in containers.h - * We assume that dst is not pre-allocated. In - * case of failure, *dst will be NULL. - */ -int run_container_negation_range(const run_container_t *src, - const int range_start, const int range_end, - void **dst); - -/* - * Same as run_container_negation except that if the output is to - * be a - * run_container_t, and has the capacity to hold the result, - * then src is modified and no allocation is made. - * In all cases, the result is in *dst. - */ -int run_container_negation_range_inplace(run_container_t *src, - const int range_start, - const int range_end, void **dst); - -#endif /* INCLUDE_CONTAINERS_MIXED_NEGATION_H_ */ -/* end file include/roaring/containers/mixed_negation.h */ -/* begin file include/roaring/containers/mixed_union.h */ -/* - * mixed_intersection.h - * - */ - -#ifndef INCLUDE_CONTAINERS_MIXED_UNION_H_ -#define INCLUDE_CONTAINERS_MIXED_UNION_H_ - -/* These functions appear to exclude cases where the - * inputs have the same type and the output is guaranteed - * to have the same type as the inputs. Eg, bitset unions - */ - - -/* Compute the union of src_1 and src_2 and write the result to - * dst. It is allowed for src_2 to be dst. */ -void array_bitset_container_union(const array_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* Compute the union of src_1 and src_2 and write the result to - * dst. It is allowed for src_2 to be dst. This version does not - * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). */ -void array_bitset_container_lazy_union(const array_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* - * Compute the union between src_1 and src_2 and write the result - * to *dst. If the return function is true, the result is a bitset_container_t - * otherwise is a array_container_t. We assume that dst is not pre-allocated. In - * case of failure, *dst will be NULL. - */ -bool array_array_container_union(const array_container_t *src_1, - const array_container_t *src_2, void **dst); - -/* - * Compute the union between src_1 and src_2 and write the result - * to *dst if it cannot be written to src_1. If the return function is true, - * the result is a bitset_container_t - * otherwise is a array_container_t. When the result is an array_container_t, it - * it either written to src_1 (if *dst is null) or to *dst. - * If the result is a bitset_container_t and *dst is null, then there was a failure. - */ -bool array_array_container_inplace_union(array_container_t *src_1, - const array_container_t *src_2, void **dst); - -/* - * Same as array_array_container_union except that it will more eagerly produce - * a bitset. - */ -bool array_array_container_lazy_union(const array_container_t *src_1, - const array_container_t *src_2, - void **dst); - -/* - * Same as array_array_container_inplace_union except that it will more eagerly produce - * a bitset. - */ -bool array_array_container_lazy_inplace_union(array_container_t *src_1, - const array_container_t *src_2, - void **dst); - -/* Compute the union of src_1 and src_2 and write the result to - * dst. We assume that dst is a - * valid container. The result might need to be further converted to array or - * bitset container, - * the caller is responsible for the eventual conversion. */ -void array_run_container_union(const array_container_t *src_1, - const run_container_t *src_2, - run_container_t *dst); - -/* Compute the union of src_1 and src_2 and write the result to - * src2. The result might need to be further converted to array or - * bitset container, - * the caller is responsible for the eventual conversion. */ -void array_run_container_inplace_union(const array_container_t *src_1, - run_container_t *src_2); - -/* Compute the union of src_1 and src_2 and write the result to - * dst. It is allowed for dst to be src_2. - * If run_container_is_full(src_1) is true, you must not be calling this - *function. - **/ -void run_bitset_container_union(const run_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* Compute the union of src_1 and src_2 and write the result to - * dst. It is allowed for dst to be src_2. This version does not - * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). - * If run_container_is_full(src_1) is true, you must not be calling this - * function. - * */ -void run_bitset_container_lazy_union(const run_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -#endif /* INCLUDE_CONTAINERS_MIXED_UNION_H_ */ -/* end file include/roaring/containers/mixed_union.h */ -/* begin file include/roaring/containers/mixed_xor.h */ -/* - * mixed_xor.h - * - */ - -#ifndef INCLUDE_CONTAINERS_MIXED_XOR_H_ -#define INCLUDE_CONTAINERS_MIXED_XOR_H_ - -/* These functions appear to exclude cases where the - * inputs have the same type and the output is guaranteed - * to have the same type as the inputs. Eg, bitset unions - */ - -/* - * Java implementation (as of May 2016) for array_run, run_run - * and bitset_run don't do anything different for inplace. - * (They are not truly in place.) - */ - - - -/* Compute the xor of src_1 and src_2 and write the result to - * dst (which has no container initially). - * Result is true iff dst is a bitset */ -bool array_bitset_container_xor(const array_container_t *src_1, - const bitset_container_t *src_2, void **dst); - -/* Compute the xor of src_1 and src_2 and write the result to - * dst. It is allowed for src_2 to be dst. This version does not - * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). - */ - -void array_bitset_container_lazy_xor(const array_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); -/* Compute the xor of src_1 and src_2 and write the result to - * dst (which has no container initially). Return value is - * "dst is a bitset" - */ - -bool bitset_bitset_container_xor(const bitset_container_t *src_1, - const bitset_container_t *src_2, void **dst); - -/* Compute the xor of src_1 and src_2 and write the result to - * dst. Result may be either a bitset or an array container - * (returns "result is bitset"). dst does not initially have - * any container, but becomes either a bitset container (return - * result true) or an array container. - */ - -bool run_bitset_container_xor(const run_container_t *src_1, - const bitset_container_t *src_2, void **dst); - -/* lazy xor. Dst is initialized and may be equal to src_2. - * Result is left as a bitset container, even if actual - * cardinality would dictate an array container. - */ - -void run_bitset_container_lazy_xor(const run_container_t *src_1, - const bitset_container_t *src_2, - bitset_container_t *dst); - -/* dst does not indicate a valid container initially. Eventually it - * can become any kind of container. - */ - -int array_run_container_xor(const array_container_t *src_1, - const run_container_t *src_2, void **dst); - -/* dst does not initially have a valid container. Creates either - * an array or a bitset container, indicated by return code - */ - -bool array_array_container_xor(const array_container_t *src_1, - const array_container_t *src_2, void **dst); - -/* dst does not initially have a valid container. Creates either - * an array or a bitset container, indicated by return code. - * A bitset container will not have a valid cardinality and the - * container type might not be correct for the actual cardinality - */ - -bool array_array_container_lazy_xor(const array_container_t *src_1, - const array_container_t *src_2, void **dst); - -/* Dst is a valid run container. (Can it be src_2? Let's say not.) - * Leaves result as run container, even if other options are - * smaller. - */ - -void array_run_container_lazy_xor(const array_container_t *src_1, - const run_container_t *src_2, - run_container_t *dst); - -/* dst does not indicate a valid container initially. Eventually it - * can become any kind of container. - */ - -int run_run_container_xor(const run_container_t *src_1, - const run_container_t *src_2, void **dst); - -/* INPLACE versions (initial implementation may not exploit all inplace - * opportunities (if any...) - */ - -/* Compute the xor of src_1 and src_2 and write the result to - * dst (which has no container initially). It will modify src_1 - * to be dst if the result is a bitset. Otherwise, it will - * free src_1 and dst will be a new array container. In both - * cases, the caller is responsible for deallocating dst. - * Returns true iff dst is a bitset */ - -bool bitset_array_container_ixor(bitset_container_t *src_1, - const array_container_t *src_2, void **dst); - -bool bitset_bitset_container_ixor(bitset_container_t *src_1, - const bitset_container_t *src_2, void **dst); - -bool array_bitset_container_ixor(array_container_t *src_1, - const bitset_container_t *src_2, void **dst); - -/* Compute the xor of src_1 and src_2 and write the result to - * dst. Result may be either a bitset or an array container - * (returns "result is bitset"). dst does not initially have - * any container, but becomes either a bitset container (return - * result true) or an array container. - */ - -bool run_bitset_container_ixor(run_container_t *src_1, - const bitset_container_t *src_2, void **dst); - -bool bitset_run_container_ixor(bitset_container_t *src_1, - const run_container_t *src_2, void **dst); - -/* dst does not indicate a valid container initially. Eventually it - * can become any kind of container. - */ - -int array_run_container_ixor(array_container_t *src_1, - const run_container_t *src_2, void **dst); - -int run_array_container_ixor(run_container_t *src_1, - const array_container_t *src_2, void **dst); - -bool array_array_container_ixor(array_container_t *src_1, - const array_container_t *src_2, void **dst); - -int run_run_container_ixor(run_container_t *src_1, const run_container_t *src_2, - void **dst); -#endif -/* end file include/roaring/containers/mixed_xor.h */ -/* begin file include/roaring/containers/containers.h */ -#ifndef CONTAINERS_CONTAINERS_H -#define CONTAINERS_CONTAINERS_H - -#include -#include -#include - - -// would enum be possible or better? - -/** - * The switch case statements follow - * BITSET_CONTAINER_TYPE_CODE -- ARRAY_CONTAINER_TYPE_CODE -- - * RUN_CONTAINER_TYPE_CODE - * so it makes more sense to number them 1, 2, 3 (in the vague hope that the - * compiler might exploit this ordering). - */ - -#define BITSET_CONTAINER_TYPE_CODE 1 -#define ARRAY_CONTAINER_TYPE_CODE 2 -#define RUN_CONTAINER_TYPE_CODE 3 -#define SHARED_CONTAINER_TYPE_CODE 4 - -// macro for pairing container type codes -#define CONTAINER_PAIR(c1, c2) (4 * (c1) + (c2)) - -/** - * A shared container is a wrapper around a container - * with reference counting. - */ - -struct shared_container_s { - void *container; - uint8_t typecode; - uint32_t counter; // to be managed atomically -}; - -typedef struct shared_container_s shared_container_t; - -/* - * With copy_on_write = true - * Create a new shared container if the typecode is not SHARED_CONTAINER_TYPE, - * otherwise, increase the count - * If copy_on_write = false, then clone. - * Return NULL in case of failure. - **/ -void *get_copy_of_container(void *container, uint8_t *typecode, - bool copy_on_write); - -/* Frees a shared container (actually decrement its counter and only frees when - * the counter falls to zero). */ -void shared_container_free(shared_container_t *container); - -/* extract a copy from the shared container, freeing the shared container if -there is just one instance left, -clone instances when the counter is higher than one -*/ -void *shared_container_extract_copy(shared_container_t *container, - uint8_t *typecode); - -/* access to container underneath */ -inline const void *container_unwrap_shared( - const void *candidate_shared_container, uint8_t *type) { - if (*type == SHARED_CONTAINER_TYPE_CODE) { - *type = - ((const shared_container_t *)candidate_shared_container)->typecode; - assert(*type != SHARED_CONTAINER_TYPE_CODE); - return ((const shared_container_t *)candidate_shared_container)->container; - } else { - return candidate_shared_container; - } -} - - -/* access to container underneath */ -inline void *container_mutable_unwrap_shared( - void *candidate_shared_container, uint8_t *type) { - if (*type == SHARED_CONTAINER_TYPE_CODE) { - *type = - ((shared_container_t *)candidate_shared_container)->typecode; - assert(*type != SHARED_CONTAINER_TYPE_CODE); - return ((shared_container_t *)candidate_shared_container)->container; - } else { - return candidate_shared_container; - } -} - -/* access to container underneath and queries its type */ -static inline uint8_t get_container_type(const void *container, uint8_t type) { - if (type == SHARED_CONTAINER_TYPE_CODE) { - return ((const shared_container_t *)container)->typecode; - } else { - return type; - } -} - -/** - * Copies a container, requires a typecode. This allocates new memory, caller - * is responsible for deallocation. If the container is not shared, then it is - * physically cloned. Sharable containers are not cloneable. - */ -void *container_clone(const void *container, uint8_t typecode); - -/* access to container underneath, cloning it if needed */ -static inline void *get_writable_copy_if_shared( - void *candidate_shared_container, uint8_t *type) { - if (*type == SHARED_CONTAINER_TYPE_CODE) { - return shared_container_extract_copy( - (shared_container_t *)candidate_shared_container, type); - } else { - return candidate_shared_container; - } -} - -/** - * End of shared container code - */ - -static const char *container_names[] = {"bitset", "array", "run", "shared"}; -static const char *shared_container_names[] = { - "bitset (shared)", "array (shared)", "run (shared)"}; - -// no matter what the initial container was, convert it to a bitset -// if a new container is produced, caller responsible for freeing the previous -// one -// container should not be a shared container -static inline void *container_to_bitset(void *container, uint8_t typecode) { - bitset_container_t *result = NULL; - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return container; // nothing to do - case ARRAY_CONTAINER_TYPE_CODE: - result = - bitset_container_from_array((array_container_t *)container); - return result; - case RUN_CONTAINER_TYPE_CODE: - result = bitset_container_from_run((run_container_t *)container); - return result; - case SHARED_CONTAINER_TYPE_CODE: - assert(false); - } - assert(false); - __builtin_unreachable(); - return 0; // unreached -} - -/** - * Get the container name from the typecode - */ -static inline const char *get_container_name(uint8_t typecode) { - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return container_names[0]; - case ARRAY_CONTAINER_TYPE_CODE: - return container_names[1]; - case RUN_CONTAINER_TYPE_CODE: - return container_names[2]; - case SHARED_CONTAINER_TYPE_CODE: - return container_names[3]; - default: - assert(false); - __builtin_unreachable(); - return "unknown"; - } -} - -static inline const char *get_full_container_name(const void *container, - uint8_t typecode) { - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return container_names[0]; - case ARRAY_CONTAINER_TYPE_CODE: - return container_names[1]; - case RUN_CONTAINER_TYPE_CODE: - return container_names[2]; - case SHARED_CONTAINER_TYPE_CODE: - switch (((const shared_container_t *)container)->typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return shared_container_names[0]; - case ARRAY_CONTAINER_TYPE_CODE: - return shared_container_names[1]; - case RUN_CONTAINER_TYPE_CODE: - return shared_container_names[2]; - default: - assert(false); - __builtin_unreachable(); - return "unknown"; - } - break; - default: - assert(false); - __builtin_unreachable(); - return "unknown"; - } - __builtin_unreachable(); - return NULL; -} - -/** - * Get the container cardinality (number of elements), requires a typecode - */ -static inline int container_get_cardinality(const void *container, - uint8_t typecode) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_cardinality( - (const bitset_container_t *)container); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_cardinality( - (const array_container_t *)container); - case RUN_CONTAINER_TYPE_CODE: - return run_container_cardinality( - (const run_container_t *)container); - } - assert(false); - __builtin_unreachable(); - return 0; // unreached -} - - - -// returns true if a container is known to be full. Note that a lazy bitset -// container -// might be full without us knowing -static inline bool container_is_full(const void *container, uint8_t typecode) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_cardinality( - (const bitset_container_t *)container) == (1 << 16); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_cardinality( - (const array_container_t *)container) == (1 << 16); - case RUN_CONTAINER_TYPE_CODE: - return run_container_is_full((const run_container_t *)container); - } - assert(false); - __builtin_unreachable(); - return 0; // unreached -} - -static inline int container_shrink_to_fit(void *container, uint8_t typecode) { - container = container_mutable_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return 0; // no shrinking possible - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_shrink_to_fit( - (array_container_t *)container); - case RUN_CONTAINER_TYPE_CODE: - return run_container_shrink_to_fit((run_container_t *)container); - } - assert(false); - __builtin_unreachable(); - return 0; // unreached -} - - -/** - * make a container with a run of ones - */ -/* initially always use a run container, even if an array might be - * marginally - * smaller */ -static inline void *container_range_of_ones(uint32_t range_start, - uint32_t range_end, - uint8_t *result_type) { - assert(range_end >= range_start); - uint64_t cardinality = range_end - range_start + 1; - if(cardinality <= 2) { - *result_type = ARRAY_CONTAINER_TYPE_CODE; - return array_container_create_range(range_start, range_end); - } else { - *result_type = RUN_CONTAINER_TYPE_CODE; - return run_container_create_range(range_start, range_end); - } -} - - -/* Create a container with all the values between in [min,max) at a - distance k*step from min. */ -static inline void *container_from_range(uint8_t *type, uint32_t min, - uint32_t max, uint16_t step) { - if (step == 0) return NULL; // being paranoid - if (step == 1) { - return container_range_of_ones(min,max,type); - // Note: the result is not always a run (need to check the cardinality) - //*type = RUN_CONTAINER_TYPE_CODE; - //return run_container_create_range(min, max); - } - int size = (max - min + step - 1) / step; - if (size <= DEFAULT_MAX_SIZE) { // array container - *type = ARRAY_CONTAINER_TYPE_CODE; - array_container_t *array = array_container_create_given_capacity(size); - array_container_add_from_range(array, min, max, step); - assert(array->cardinality == size); - return array; - } else { // bitset container - *type = BITSET_CONTAINER_TYPE_CODE; - bitset_container_t *bitset = bitset_container_create(); - bitset_container_add_from_range(bitset, min, max, step); - assert(bitset->cardinality == size); - return bitset; - } -} - -/** - * "repair" the container after lazy operations. - */ -static inline void *container_repair_after_lazy(void *container, - uint8_t *typecode) { - container = get_writable_copy_if_shared( - container, typecode); // TODO: this introduces unnecessary cloning - void *result = NULL; - switch (*typecode) { - case BITSET_CONTAINER_TYPE_CODE: - ((bitset_container_t *)container)->cardinality = - bitset_container_compute_cardinality( - (bitset_container_t *)container); - if (((bitset_container_t *)container)->cardinality <= - DEFAULT_MAX_SIZE) { - result = array_container_from_bitset( - (const bitset_container_t *)container); - bitset_container_free((bitset_container_t *)container); - *typecode = ARRAY_CONTAINER_TYPE_CODE; - return result; - } - return container; - case ARRAY_CONTAINER_TYPE_CODE: - return container; // nothing to do - case RUN_CONTAINER_TYPE_CODE: - return convert_run_to_efficient_container_and_free( - (run_container_t *)container, typecode); - case SHARED_CONTAINER_TYPE_CODE: - assert(false); - } - assert(false); - __builtin_unreachable(); - return 0; // unreached -} - -/** - * Writes the underlying array to buf, outputs how many bytes were written. - * This is meant to be byte-by-byte compatible with the Java and Go versions of - * Roaring. - * The number of bytes written should be - * container_write(container, buf). - * - */ -static inline int32_t container_write(const void *container, uint8_t typecode, - char *buf) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_write((const bitset_container_t *)container, buf); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_write((const array_container_t *)container, buf); - case RUN_CONTAINER_TYPE_CODE: - return run_container_write((const run_container_t *)container, buf); - } - assert(false); - __builtin_unreachable(); - return 0; // unreached -} - -/** - * Get the container size in bytes under portable serialization (see - * container_write), requires a - * typecode - */ -static inline int32_t container_size_in_bytes(const void *container, - uint8_t typecode) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_size_in_bytes( - (const bitset_container_t *)container); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_size_in_bytes( - (const array_container_t *)container); - case RUN_CONTAINER_TYPE_CODE: - return run_container_size_in_bytes((const run_container_t *)container); - } - assert(false); - __builtin_unreachable(); - return 0; // unreached -} - -/** - * print the container (useful for debugging), requires a typecode - */ -void container_printf(const void *container, uint8_t typecode); - -/** - * print the content of the container as a comma-separated list of 32-bit values - * starting at base, requires a typecode - */ -void container_printf_as_uint32_array(const void *container, uint8_t typecode, - uint32_t base); - -/** - * Checks whether a container is not empty, requires a typecode - */ -static inline bool container_nonzero_cardinality(const void *container, - uint8_t typecode) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_const_nonzero_cardinality( - (const bitset_container_t *)container); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_nonzero_cardinality( - (const array_container_t *)container); - case RUN_CONTAINER_TYPE_CODE: - return run_container_nonzero_cardinality( - (const run_container_t *)container); - } - assert(false); - __builtin_unreachable(); - return 0; // unreached -} - -/** - * Recover memory from a container, requires a typecode - */ -void container_free(void *container, uint8_t typecode); - -/** - * Convert a container to an array of values, requires a typecode as well as a - * "base" (most significant values) - * Returns number of ints added. - */ -static inline int container_to_uint32_array(uint32_t *output, - const void *container, - uint8_t typecode, uint32_t base) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_to_uint32_array( - output, (const bitset_container_t *)container, base); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_to_uint32_array( - output, (const array_container_t *)container, base); - case RUN_CONTAINER_TYPE_CODE: - return run_container_to_uint32_array( - output, (const run_container_t *)container, base); - } - assert(false); - __builtin_unreachable(); - return 0; // unreached -} - -/** - * Add a value to a container, requires a typecode, fills in new_typecode and - * return (possibly different) container. - * This function may allocate a new container, and caller is responsible for - * memory deallocation - */ -static inline void *container_add(void *container, uint16_t val, - uint8_t typecode, uint8_t *new_typecode) { - container = get_writable_copy_if_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - bitset_container_set((bitset_container_t *)container, val); - *new_typecode = BITSET_CONTAINER_TYPE_CODE; - return container; - case ARRAY_CONTAINER_TYPE_CODE: { - array_container_t *ac = (array_container_t *)container; - if (array_container_try_add(ac, val, DEFAULT_MAX_SIZE) != -1) { - *new_typecode = ARRAY_CONTAINER_TYPE_CODE; - return ac; - } else { - bitset_container_t* bitset = bitset_container_from_array(ac); - bitset_container_add(bitset, val); - *new_typecode = BITSET_CONTAINER_TYPE_CODE; - return bitset; - } - } break; - case RUN_CONTAINER_TYPE_CODE: - // per Java, no container type adjustments are done (revisit?) - run_container_add((run_container_t *)container, val); - *new_typecode = RUN_CONTAINER_TYPE_CODE; - return container; - default: - assert(false); - __builtin_unreachable(); - return NULL; - } -} - -/** - * Remove a value from a container, requires a typecode, fills in new_typecode - * and - * return (possibly different) container. - * This function may allocate a new container, and caller is responsible for - * memory deallocation - */ -static inline void *container_remove(void *container, uint16_t val, - uint8_t typecode, uint8_t *new_typecode) { - container = get_writable_copy_if_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - if (bitset_container_remove((bitset_container_t *)container, val)) { - if (bitset_container_cardinality( - (bitset_container_t *)container) <= DEFAULT_MAX_SIZE) { - *new_typecode = ARRAY_CONTAINER_TYPE_CODE; - return array_container_from_bitset( - (bitset_container_t *)container); - } - } - *new_typecode = typecode; - return container; - case ARRAY_CONTAINER_TYPE_CODE: - *new_typecode = typecode; - array_container_remove((array_container_t *)container, val); - return container; - case RUN_CONTAINER_TYPE_CODE: - // per Java, no container type adjustments are done (revisit?) - run_container_remove((run_container_t *)container, val); - *new_typecode = RUN_CONTAINER_TYPE_CODE; - return container; - default: - assert(false); - __builtin_unreachable(); - return NULL; - } -} - -/** - * Check whether a value is in a container, requires a typecode - */ -inline bool container_contains(const void *container, uint16_t val, - uint8_t typecode) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_get((const bitset_container_t *)container, - val); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_contains( - (const array_container_t *)container, val); - case RUN_CONTAINER_TYPE_CODE: - return run_container_contains((const run_container_t *)container, - val); - default: - assert(false); - __builtin_unreachable(); - return false; - } -} - -/** - * Check whether a range of values from range_start (included) to range_end (excluded) - * is in a container, requires a typecode - */ -static inline bool container_contains_range(const void *container, uint32_t range_start, - uint32_t range_end, uint8_t typecode) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_get_range((const bitset_container_t *)container, - range_start, range_end); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_contains_range((const array_container_t *)container, - range_start, range_end); - case RUN_CONTAINER_TYPE_CODE: - return run_container_contains_range((const run_container_t *)container, - range_start, range_end); - default: - assert(false); - __builtin_unreachable(); - return false; - } -} - -int32_t container_serialize(const void *container, uint8_t typecode, - char *buf) WARN_UNUSED; - -uint32_t container_serialization_len(const void *container, uint8_t typecode); - -void *container_deserialize(uint8_t typecode, const char *buf, size_t buf_len); - -/** - * Returns true if the two containers have the same content. Note that - * two containers having different types can be "equal" in this sense. - */ -static inline bool container_equals(const void *c1, uint8_t type1, - const void *c2, uint8_t type2) { - c1 = container_unwrap_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - return bitset_container_equals((const bitset_container_t *)c1, - (const bitset_container_t *)c2); - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - return run_container_equals_bitset((const run_container_t *)c2, - (const bitset_container_t *)c1); - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - return run_container_equals_bitset((const run_container_t *)c1, - (const bitset_container_t *)c2); - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - // java would always return false? - return array_container_equal_bitset((const array_container_t *)c2, - (const bitset_container_t *)c1); - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - // java would always return false? - return array_container_equal_bitset((const array_container_t *)c1, - (const bitset_container_t *)c2); - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - return run_container_equals_array((const run_container_t *)c2, - (const array_container_t *)c1); - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - return run_container_equals_array((const run_container_t *)c1, - (const array_container_t *)c2); - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - return array_container_equals((const array_container_t *)c1, - (const array_container_t *)c2); - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - return run_container_equals((const run_container_t *)c1, - (const run_container_t *)c2); - default: - assert(false); - __builtin_unreachable(); - return false; - } -} - -/** - * Returns true if the container c1 is a subset of the container c2. Note that - * c1 can be a subset of c2 even if they have a different type. - */ -static inline bool container_is_subset(const void *c1, uint8_t type1, - const void *c2, uint8_t type2) { - c1 = container_unwrap_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - return bitset_container_is_subset((const bitset_container_t *)c1, - (const bitset_container_t *)c2); - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - return bitset_container_is_subset_run((const bitset_container_t *)c1, - (const run_container_t *)c2); - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - return run_container_is_subset_bitset((const run_container_t *)c1, - (const bitset_container_t *)c2); - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - return false; // by construction, size(c1) > size(c2) - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - return array_container_is_subset_bitset((const array_container_t *)c1, - (const bitset_container_t *)c2); - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - return array_container_is_subset_run((const array_container_t *)c1, - (const run_container_t *)c2); - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - return run_container_is_subset_array((const run_container_t *)c1, - (const array_container_t *)c2); - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - return array_container_is_subset((const array_container_t *)c1, - (const array_container_t *)c2); - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - return run_container_is_subset((const run_container_t *)c1, - (const run_container_t *)c2); - default: - assert(false); - __builtin_unreachable(); - return false; - } -} - -// macro-izations possibilities for generic non-inplace binary-op dispatch - -/** - * Compute intersection between two containers, generate a new container (having - * type result_type), requires a typecode. This allocates new memory, caller - * is responsible for deallocation. - */ -static inline void *container_and(const void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type) { - c1 = container_unwrap_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - void *result = NULL; - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = bitset_bitset_container_intersection( - (const bitset_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - result = array_container_create(); - array_container_intersection((const array_container_t *)c1, - (const array_container_t *)c2, - (array_container_t *)result); - *result_type = ARRAY_CONTAINER_TYPE_CODE; // never bitset - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - result = run_container_create(); - run_container_intersection((const run_container_t *)c1, - (const run_container_t *)c2, - (run_container_t *)result); - return convert_run_to_efficient_container_and_free( - (run_container_t *)result, result_type); - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - result = array_container_create(); - array_bitset_container_intersection((const array_container_t *)c2, - (const bitset_container_t *)c1, - (array_container_t *)result); - *result_type = ARRAY_CONTAINER_TYPE_CODE; // never bitset - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - result = array_container_create(); - *result_type = ARRAY_CONTAINER_TYPE_CODE; // never bitset - array_bitset_container_intersection((const array_container_t *)c1, - (const bitset_container_t *)c2, - (array_container_t *)result); - return result; - - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - *result_type = run_bitset_container_intersection( - (const run_container_t *)c2, - (const bitset_container_t *)c1, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = run_bitset_container_intersection( - (const run_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - result = array_container_create(); - *result_type = ARRAY_CONTAINER_TYPE_CODE; // never bitset - array_run_container_intersection((const array_container_t *)c1, - (const run_container_t *)c2, - (array_container_t *)result); - return result; - - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - result = array_container_create(); - *result_type = ARRAY_CONTAINER_TYPE_CODE; // never bitset - array_run_container_intersection((const array_container_t *)c2, - (const run_container_t *)c1, - (array_container_t *)result); - return result; - default: - assert(false); - __builtin_unreachable(); - return NULL; - } -} - -/** - * Compute the size of the intersection between two containers. - */ -static inline int container_and_cardinality(const void *c1, uint8_t type1, - const void *c2, uint8_t type2) { - c1 = container_unwrap_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - return bitset_container_and_justcard( - (const bitset_container_t *)c1, (const bitset_container_t *)c2); - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - return array_container_intersection_cardinality( - (const array_container_t *)c1, (const array_container_t *)c2); - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - return run_container_intersection_cardinality( - (const run_container_t *)c1, (const run_container_t *)c2); - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - return array_bitset_container_intersection_cardinality( - (const array_container_t *)c2, (const bitset_container_t *)c1); - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - return array_bitset_container_intersection_cardinality( - (const array_container_t *)c1, (const bitset_container_t *)c2); - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - return run_bitset_container_intersection_cardinality( - (const run_container_t *)c2, (const bitset_container_t *)c1); - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - return run_bitset_container_intersection_cardinality( - (const run_container_t *)c1, (const bitset_container_t *)c2); - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - return array_run_container_intersection_cardinality( - (const array_container_t *)c1, (const run_container_t *)c2); - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - return array_run_container_intersection_cardinality( - (const array_container_t *)c2, (const run_container_t *)c1); - default: - assert(false); - __builtin_unreachable(); - return 0; - } -} - -/** - * Check whether two containers intersect. - */ -static inline bool container_intersect(const void *c1, uint8_t type1, const void *c2, - uint8_t type2) { - c1 = container_unwrap_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - return bitset_container_intersect( - (const bitset_container_t *)c1, - (const bitset_container_t *)c2); - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - return array_container_intersect((const array_container_t *)c1, - (const array_container_t *)c2); - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - return run_container_intersect((const run_container_t *)c1, - (const run_container_t *)c2); - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - return array_bitset_container_intersect((const array_container_t *)c2, - (const bitset_container_t *)c1); - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - return array_bitset_container_intersect((const array_container_t *)c1, - (const bitset_container_t *)c2); - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - return run_bitset_container_intersect( - (const run_container_t *)c2, - (const bitset_container_t *)c1); - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - return run_bitset_container_intersect( - (const run_container_t *)c1, - (const bitset_container_t *)c2); - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - return array_run_container_intersect((const array_container_t *)c1, - (const run_container_t *)c2); - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - return array_run_container_intersect((const array_container_t *)c2, - (const run_container_t *)c1); - default: - assert(false); - __builtin_unreachable(); - return 0; - } -} - -/** - * Compute intersection between two containers, with result in the first - container if possible. If the returned pointer is identical to c1, - then the container has been modified. If the returned pointer is different - from c1, then a new container has been created and the caller is responsible - for freeing it. - The type of the first container may change. Returns the modified - (and possibly new) container. -*/ -static inline void *container_iand(void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type) { - c1 = get_writable_copy_if_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - void *result = NULL; - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = - bitset_bitset_container_intersection_inplace( - (bitset_container_t *)c1, (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - array_container_intersection_inplace((array_container_t *)c1, - (const array_container_t *)c2); - *result_type = ARRAY_CONTAINER_TYPE_CODE; - return c1; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - result = run_container_create(); - run_container_intersection((const run_container_t *)c1, - (const run_container_t *)c2, - (run_container_t *)result); - // as of January 2016, Java code used non-in-place intersection for - // two runcontainers - return convert_run_to_efficient_container_and_free( - (run_container_t *)result, result_type); - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - // c1 is a bitmap so no inplace possible - result = array_container_create(); - array_bitset_container_intersection((const array_container_t *)c2, - (const bitset_container_t *)c1, - (array_container_t *)result); - *result_type = ARRAY_CONTAINER_TYPE_CODE; // never bitset - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = ARRAY_CONTAINER_TYPE_CODE; // never bitset - array_bitset_container_intersection( - (const array_container_t *)c1, (const bitset_container_t *)c2, - (array_container_t *)c1); // allowed - return c1; - - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - // will attempt in-place computation - *result_type = run_bitset_container_intersection( - (const run_container_t *)c2, - (const bitset_container_t *)c1, &c1) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return c1; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = run_bitset_container_intersection( - (const run_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - result = array_container_create(); - *result_type = ARRAY_CONTAINER_TYPE_CODE; // never bitset - array_run_container_intersection((const array_container_t *)c1, - (const run_container_t *)c2, - (array_container_t *)result); - return result; - - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - result = array_container_create(); - *result_type = ARRAY_CONTAINER_TYPE_CODE; // never bitset - array_run_container_intersection((const array_container_t *)c2, - (const run_container_t *)c1, - (array_container_t *)result); - return result; - default: - assert(false); - __builtin_unreachable(); - return NULL; - } -} - -/** - * Compute union between two containers, generate a new container (having type - * result_type), requires a typecode. This allocates new memory, caller - * is responsible for deallocation. - */ -static inline void *container_or(const void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type) { - c1 = container_unwrap_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - void *result = NULL; - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - result = bitset_container_create(); - bitset_container_or((const bitset_container_t *)c1, - (const bitset_container_t *)c2, - (bitset_container_t *)result); - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - *result_type = array_array_container_union( - (const array_container_t *)c1, - (const array_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - result = run_container_create(); - run_container_union((const run_container_t *)c1, - (const run_container_t *)c2, - (run_container_t *)result); - *result_type = RUN_CONTAINER_TYPE_CODE; - // todo: could be optimized since will never convert to array - result = convert_run_to_efficient_container_and_free( - (run_container_t *)result, (uint8_t *)result_type); - return result; - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - result = bitset_container_create(); - array_bitset_container_union((const array_container_t *)c2, - (const bitset_container_t *)c1, - (bitset_container_t *)result); - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - result = bitset_container_create(); - array_bitset_container_union((const array_container_t *)c1, - (const bitset_container_t *)c2, - (bitset_container_t *)result); - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - if (run_container_is_full((const run_container_t *)c2)) { - result = run_container_create(); - *result_type = RUN_CONTAINER_TYPE_CODE; - run_container_copy((const run_container_t *)c2, - (run_container_t *)result); - return result; - } - result = bitset_container_create(); - run_bitset_container_union((const run_container_t *)c2, - (const bitset_container_t *)c1, - (bitset_container_t *)result); - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - if (run_container_is_full((const run_container_t *)c1)) { - result = run_container_create(); - *result_type = RUN_CONTAINER_TYPE_CODE; - run_container_copy((const run_container_t *)c1, - (run_container_t *)result); - return result; - } - result = bitset_container_create(); - run_bitset_container_union((const run_container_t *)c1, - (const bitset_container_t *)c2, - (bitset_container_t *)result); - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - result = run_container_create(); - array_run_container_union((const array_container_t *)c1, - (const run_container_t *)c2, - (run_container_t *)result); - result = convert_run_to_efficient_container_and_free( - (run_container_t *)result, (uint8_t *)result_type); - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - result = run_container_create(); - array_run_container_union((const array_container_t *)c2, - (const run_container_t *)c1, - (run_container_t *)result); - result = convert_run_to_efficient_container_and_free( - (run_container_t *)result, (uint8_t *)result_type); - return result; - default: - assert(false); - __builtin_unreachable(); - return NULL; // unreached - } -} - -/** - * Compute union between two containers, generate a new container (having type - * result_type), requires a typecode. This allocates new memory, caller - * is responsible for deallocation. - * - * This lazy version delays some operations such as the maintenance of the - * cardinality. It requires repair later on the generated containers. - */ -static inline void *container_lazy_or(const void *c1, uint8_t type1, - const void *c2, uint8_t type2, - uint8_t *result_type) { - c1 = container_unwrap_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - void *result = NULL; - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - result = bitset_container_create(); - bitset_container_or_nocard( - (const bitset_container_t *)c1, (const bitset_container_t *)c2, - (bitset_container_t *)result); // is lazy - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - *result_type = array_array_container_lazy_union( - (const array_container_t *)c1, - (const array_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - result = run_container_create(); - run_container_union((const run_container_t *)c1, - (const run_container_t *)c2, - (run_container_t *)result); - *result_type = RUN_CONTAINER_TYPE_CODE; - // we are being lazy - result = convert_run_to_efficient_container( - (run_container_t *)result, result_type); - return result; - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - result = bitset_container_create(); - array_bitset_container_lazy_union( - (const array_container_t *)c2, (const bitset_container_t *)c1, - (bitset_container_t *)result); // is lazy - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - result = bitset_container_create(); - array_bitset_container_lazy_union( - (const array_container_t *)c1, (const bitset_container_t *)c2, - (bitset_container_t *)result); // is lazy - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - if (run_container_is_full((const run_container_t *)c2)) { - result = run_container_create(); - *result_type = RUN_CONTAINER_TYPE_CODE; - run_container_copy((const run_container_t *)c2, - (run_container_t *)result); - return result; - } - result = bitset_container_create(); - run_bitset_container_lazy_union( - (const run_container_t *)c2, (const bitset_container_t *)c1, - (bitset_container_t *)result); // is lazy - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - if (run_container_is_full((const run_container_t *)c1)) { - result = run_container_create(); - *result_type = RUN_CONTAINER_TYPE_CODE; - run_container_copy((const run_container_t *)c1, - (run_container_t *)result); - return result; - } - result = bitset_container_create(); - run_bitset_container_lazy_union( - (const run_container_t *)c1, (const bitset_container_t *)c2, - (bitset_container_t *)result); // is lazy - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - result = run_container_create(); - array_run_container_union((const array_container_t *)c1, - (const run_container_t *)c2, - (run_container_t *)result); - *result_type = RUN_CONTAINER_TYPE_CODE; - // next line skipped since we are lazy - // result = convert_run_to_efficient_container(result, result_type); - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - result = run_container_create(); - array_run_container_union( - (const array_container_t *)c2, (const run_container_t *)c1, - (run_container_t *)result); // TODO make lazy - *result_type = RUN_CONTAINER_TYPE_CODE; - // next line skipped since we are lazy - // result = convert_run_to_efficient_container(result, result_type); - return result; - default: - assert(false); - __builtin_unreachable(); - return NULL; // unreached - } -} - -/** - * Compute the union between two containers, with result in the first container. - * If the returned pointer is identical to c1, then the container has been - * modified. - * If the returned pointer is different from c1, then a new container has been - * created and the caller is responsible for freeing it. - * The type of the first container may change. Returns the modified - * (and possibly new) container -*/ -static inline void *container_ior(void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type) { - c1 = get_writable_copy_if_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - void *result = NULL; - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - bitset_container_or((const bitset_container_t *)c1, - (const bitset_container_t *)c2, - (bitset_container_t *)c1); -#ifdef OR_BITSET_CONVERSION_TO_FULL - if (((bitset_container_t *)c1)->cardinality == - (1 << 16)) { // we convert - result = run_container_create_range(0, (1 << 16)); - *result_type = RUN_CONTAINER_TYPE_CODE; - return result; - } -#endif - *result_type = BITSET_CONTAINER_TYPE_CODE; - return c1; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - *result_type = array_array_container_inplace_union( - (array_container_t *)c1, - (const array_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - if((result == NULL) - && (*result_type == ARRAY_CONTAINER_TYPE_CODE)) { - return c1; // the computation was done in-place! - } - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - run_container_union_inplace((run_container_t *)c1, - (const run_container_t *)c2); - return convert_run_to_efficient_container((run_container_t *)c1, - result_type); - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - array_bitset_container_union((const array_container_t *)c2, - (const bitset_container_t *)c1, - (bitset_container_t *)c1); - *result_type = BITSET_CONTAINER_TYPE_CODE; // never array - return c1; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - // c1 is an array, so no in-place possible - result = bitset_container_create(); - *result_type = BITSET_CONTAINER_TYPE_CODE; - array_bitset_container_union((const array_container_t *)c1, - (const bitset_container_t *)c2, - (bitset_container_t *)result); - return result; - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - if (run_container_is_full((const run_container_t *)c2)) { - result = run_container_create(); - *result_type = RUN_CONTAINER_TYPE_CODE; - run_container_copy((const run_container_t *)c2, - (run_container_t *)result); - return result; - } - run_bitset_container_union((const run_container_t *)c2, - (const bitset_container_t *)c1, - (bitset_container_t *)c1); // allowed - *result_type = BITSET_CONTAINER_TYPE_CODE; - return c1; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - if (run_container_is_full((const run_container_t *)c1)) { - *result_type = RUN_CONTAINER_TYPE_CODE; - - return c1; - } - result = bitset_container_create(); - run_bitset_container_union((const run_container_t *)c1, - (const bitset_container_t *)c2, - (bitset_container_t *)result); - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - result = run_container_create(); - array_run_container_union((const array_container_t *)c1, - (const run_container_t *)c2, - (run_container_t *)result); - result = convert_run_to_efficient_container_and_free( - (run_container_t *)result, result_type); - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - array_run_container_inplace_union((const array_container_t *)c2, - (run_container_t *)c1); - c1 = convert_run_to_efficient_container((run_container_t *)c1, - result_type); - return c1; - default: - assert(false); - __builtin_unreachable(); - return NULL; - } -} - -/** - * Compute the union between two containers, with result in the first container. - * If the returned pointer is identical to c1, then the container has been - * modified. - * If the returned pointer is different from c1, then a new container has been - * created and the caller is responsible for freeing it. - * The type of the first container may change. Returns the modified - * (and possibly new) container - * - * This lazy version delays some operations such as the maintenance of the - * cardinality. It requires repair later on the generated containers. -*/ -static inline void *container_lazy_ior(void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type) { - assert(type1 != SHARED_CONTAINER_TYPE_CODE); - // c1 = get_writable_copy_if_shared(c1,&type1); - c2 = container_unwrap_shared(c2, &type2); - void *result = NULL; - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): -#ifdef LAZY_OR_BITSET_CONVERSION_TO_FULL - // if we have two bitsets, we might as well compute the cardinality - bitset_container_or((const bitset_container_t *)c1, - (const bitset_container_t *)c2, - (bitset_container_t *)c1); - // it is possible that two bitsets can lead to a full container - if (((bitset_container_t *)c1)->cardinality == - (1 << 16)) { // we convert - result = run_container_create_range(0, (1 << 16)); - *result_type = RUN_CONTAINER_TYPE_CODE; - return result; - } -#else - bitset_container_or_nocard((const bitset_container_t *)c1, - (const bitset_container_t *)c2, - (bitset_container_t *)c1); - -#endif - *result_type = BITSET_CONTAINER_TYPE_CODE; - return c1; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - *result_type = array_array_container_lazy_inplace_union( - (array_container_t *)c1, - (const array_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - if((result == NULL) - && (*result_type == ARRAY_CONTAINER_TYPE_CODE)) { - return c1; // the computation was done in-place! - } - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - run_container_union_inplace((run_container_t *)c1, - (const run_container_t *)c2); - *result_type = RUN_CONTAINER_TYPE_CODE; - return convert_run_to_efficient_container((run_container_t *)c1, - result_type); - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - array_bitset_container_lazy_union( - (const array_container_t *)c2, (const bitset_container_t *)c1, - (bitset_container_t *)c1); // is lazy - *result_type = BITSET_CONTAINER_TYPE_CODE; // never array - return c1; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - // c1 is an array, so no in-place possible - result = bitset_container_create(); - *result_type = BITSET_CONTAINER_TYPE_CODE; - array_bitset_container_lazy_union( - (const array_container_t *)c1, (const bitset_container_t *)c2, - (bitset_container_t *)result); // is lazy - return result; - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - if (run_container_is_full((const run_container_t *)c2)) { - result = run_container_create(); - *result_type = RUN_CONTAINER_TYPE_CODE; - run_container_copy((const run_container_t *)c2, - (run_container_t *)result); - return result; - } - run_bitset_container_lazy_union( - (const run_container_t *)c2, (const bitset_container_t *)c1, - (bitset_container_t *)c1); // allowed // lazy - *result_type = BITSET_CONTAINER_TYPE_CODE; - return c1; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - if (run_container_is_full((const run_container_t *)c1)) { - *result_type = RUN_CONTAINER_TYPE_CODE; - return c1; - } - result = bitset_container_create(); - run_bitset_container_lazy_union( - (const run_container_t *)c1, (const bitset_container_t *)c2, - (bitset_container_t *)result); // lazy - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - result = run_container_create(); - array_run_container_union((const array_container_t *)c1, - (const run_container_t *)c2, - (run_container_t *)result); - *result_type = RUN_CONTAINER_TYPE_CODE; - // next line skipped since we are lazy - // result = convert_run_to_efficient_container_and_free(result, - // result_type); - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - array_run_container_inplace_union((const array_container_t *)c2, - (run_container_t *)c1); - *result_type = RUN_CONTAINER_TYPE_CODE; - // next line skipped since we are lazy - // result = convert_run_to_efficient_container_and_free(result, - // result_type); - return c1; - default: - assert(false); - __builtin_unreachable(); - return NULL; - } -} - -/** - * Compute symmetric difference (xor) between two containers, generate a new - * container (having type result_type), requires a typecode. This allocates new - * memory, caller is responsible for deallocation. - */ -static inline void *container_xor(const void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type) { - c1 = container_unwrap_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - void *result = NULL; - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = bitset_bitset_container_xor( - (const bitset_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - *result_type = array_array_container_xor( - (const array_container_t *)c1, - (const array_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - *result_type = - run_run_container_xor((const run_container_t *)c1, - (const run_container_t *)c2, &result); - return result; - - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - *result_type = array_bitset_container_xor( - (const array_container_t *)c2, - (const bitset_container_t *)c1, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = array_bitset_container_xor( - (const array_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - *result_type = run_bitset_container_xor( - (const run_container_t *)c2, - (const bitset_container_t *)c1, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - - *result_type = run_bitset_container_xor( - (const run_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - *result_type = - array_run_container_xor((const array_container_t *)c1, - (const run_container_t *)c2, &result); - return result; - - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - *result_type = - array_run_container_xor((const array_container_t *)c2, - (const run_container_t *)c1, &result); - return result; - - default: - assert(false); - __builtin_unreachable(); - return NULL; // unreached - } -} - -/** - * Compute xor between two containers, generate a new container (having type - * result_type), requires a typecode. This allocates new memory, caller - * is responsible for deallocation. - * - * This lazy version delays some operations such as the maintenance of the - * cardinality. It requires repair later on the generated containers. - */ -static inline void *container_lazy_xor(const void *c1, uint8_t type1, - const void *c2, uint8_t type2, - uint8_t *result_type) { - c1 = container_unwrap_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - void *result = NULL; - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - result = bitset_container_create(); - bitset_container_xor_nocard( - (const bitset_container_t *)c1, (const bitset_container_t *)c2, - (bitset_container_t *)result); // is lazy - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - *result_type = array_array_container_lazy_xor( - (const array_container_t *)c1, - (const array_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - // nothing special done yet. - *result_type = - run_run_container_xor((const run_container_t *)c1, - (const run_container_t *)c2, &result); - return result; - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - result = bitset_container_create(); - *result_type = BITSET_CONTAINER_TYPE_CODE; - array_bitset_container_lazy_xor((const array_container_t *)c2, - (const bitset_container_t *)c1, - (bitset_container_t *)result); - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - result = bitset_container_create(); - *result_type = BITSET_CONTAINER_TYPE_CODE; - array_bitset_container_lazy_xor((const array_container_t *)c1, - (const bitset_container_t *)c2, - (bitset_container_t *)result); - return result; - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - result = bitset_container_create(); - run_bitset_container_lazy_xor((const run_container_t *)c2, - (const bitset_container_t *)c1, - (bitset_container_t *)result); - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - result = bitset_container_create(); - run_bitset_container_lazy_xor((const run_container_t *)c1, - (const bitset_container_t *)c2, - (bitset_container_t *)result); - *result_type = BITSET_CONTAINER_TYPE_CODE; - return result; - - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - result = run_container_create(); - array_run_container_lazy_xor((const array_container_t *)c1, - (const run_container_t *)c2, - (run_container_t *)result); - *result_type = RUN_CONTAINER_TYPE_CODE; - // next line skipped since we are lazy - // result = convert_run_to_efficient_container(result, result_type); - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - result = run_container_create(); - array_run_container_lazy_xor((const array_container_t *)c2, - (const run_container_t *)c1, - (run_container_t *)result); - *result_type = RUN_CONTAINER_TYPE_CODE; - // next line skipped since we are lazy - // result = convert_run_to_efficient_container(result, result_type); - return result; - default: - assert(false); - __builtin_unreachable(); - return NULL; // unreached - } -} - -/** - * Compute the xor between two containers, with result in the first container. - * If the returned pointer is identical to c1, then the container has been - * modified. - * If the returned pointer is different from c1, then a new container has been - * created and the caller is responsible for freeing it. - * The type of the first container may change. Returns the modified - * (and possibly new) container -*/ -static inline void *container_ixor(void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type) { - c1 = get_writable_copy_if_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - void *result = NULL; - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = bitset_bitset_container_ixor( - (bitset_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - *result_type = array_array_container_ixor( - (array_container_t *)c1, - (const array_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - *result_type = run_run_container_ixor( - (run_container_t *)c1, (const run_container_t *)c2, &result); - return result; - - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - *result_type = bitset_array_container_ixor( - (bitset_container_t *)c1, - (const array_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = array_bitset_container_ixor( - (array_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - - return result; - - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - *result_type = - bitset_run_container_ixor((bitset_container_t *)c1, - (const run_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - - return result; - - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = run_bitset_container_ixor( - (run_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - - return result; - - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - *result_type = array_run_container_ixor( - (array_container_t *)c1, (const run_container_t *)c2, &result); - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - *result_type = run_array_container_ixor( - (run_container_t *)c1, (const array_container_t *)c2, &result); - return result; - default: - assert(false); - __builtin_unreachable(); - return NULL; - } -} - -/** - * Compute the xor between two containers, with result in the first container. - * If the returned pointer is identical to c1, then the container has been - * modified. - * If the returned pointer is different from c1, then a new container has been - * created and the caller is responsible for freeing it. - * The type of the first container may change. Returns the modified - * (and possibly new) container - * - * This lazy version delays some operations such as the maintenance of the - * cardinality. It requires repair later on the generated containers. -*/ -static inline void *container_lazy_ixor(void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type) { - assert(type1 != SHARED_CONTAINER_TYPE_CODE); - // c1 = get_writable_copy_if_shared(c1,&type1); - c2 = container_unwrap_shared(c2, &type2); - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - bitset_container_xor_nocard((bitset_container_t *)c1, - (const bitset_container_t *)c2, - (bitset_container_t *)c1); // is lazy - *result_type = BITSET_CONTAINER_TYPE_CODE; - return c1; - // TODO: other cases being lazy, esp. when we know inplace not likely - // could see the corresponding code for union - default: - // we may have a dirty bitset (without a precomputed cardinality) and - // calling container_ixor on it might be unsafe. - if( (type1 == BITSET_CONTAINER_TYPE_CODE) - && (((const bitset_container_t *)c1)->cardinality == BITSET_UNKNOWN_CARDINALITY)) { - ((bitset_container_t *)c1)->cardinality = bitset_container_compute_cardinality((bitset_container_t *)c1); - } - return container_ixor(c1, type1, c2, type2, result_type); - } -} - -/** - * Compute difference (andnot) between two containers, generate a new - * container (having type result_type), requires a typecode. This allocates new - * memory, caller is responsible for deallocation. - */ -static inline void *container_andnot(const void *c1, uint8_t type1, - const void *c2, uint8_t type2, - uint8_t *result_type) { - c1 = container_unwrap_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - void *result = NULL; - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = bitset_bitset_container_andnot( - (const bitset_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - result = array_container_create(); - array_array_container_andnot((const array_container_t *)c1, - (const array_container_t *)c2, - (array_container_t *)result); - *result_type = ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - if (run_container_is_full((const run_container_t *)c2)) { - result = array_container_create(); - *result_type = ARRAY_CONTAINER_TYPE_CODE; - return result; - } - *result_type = - run_run_container_andnot((const run_container_t *)c1, - (const run_container_t *)c2, &result); - return result; - - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - *result_type = bitset_array_container_andnot( - (const bitset_container_t *)c1, - (const array_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - result = array_container_create(); - array_bitset_container_andnot((const array_container_t *)c1, - (const bitset_container_t *)c2, - (array_container_t *)result); - *result_type = ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - if (run_container_is_full((const run_container_t *)c2)) { - result = array_container_create(); - *result_type = ARRAY_CONTAINER_TYPE_CODE; - return result; - } - *result_type = bitset_run_container_andnot( - (const bitset_container_t *)c1, - (const run_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - - *result_type = run_bitset_container_andnot( - (const run_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - if (run_container_is_full((const run_container_t *)c2)) { - result = array_container_create(); - *result_type = ARRAY_CONTAINER_TYPE_CODE; - return result; - } - result = array_container_create(); - array_run_container_andnot((const array_container_t *)c1, - (const run_container_t *)c2, - (array_container_t *)result); - *result_type = ARRAY_CONTAINER_TYPE_CODE; - return result; - - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - *result_type = run_array_container_andnot( - (const run_container_t *)c1, (const array_container_t *)c2, - &result); - return result; - - default: - assert(false); - __builtin_unreachable(); - return NULL; // unreached - } -} - -/** - * Compute the andnot between two containers, with result in the first - * container. - * If the returned pointer is identical to c1, then the container has been - * modified. - * If the returned pointer is different from c1, then a new container has been - * created and the caller is responsible for freeing it. - * The type of the first container may change. Returns the modified - * (and possibly new) container -*/ -static inline void *container_iandnot(void *c1, uint8_t type1, const void *c2, - uint8_t type2, uint8_t *result_type) { - c1 = get_writable_copy_if_shared(c1, &type1); - c2 = container_unwrap_shared(c2, &type2); - void *result = NULL; - switch (CONTAINER_PAIR(type1, type2)) { - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = bitset_bitset_container_iandnot( - (bitset_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - array_array_container_iandnot((array_container_t *)c1, - (const array_container_t *)c2); - *result_type = ARRAY_CONTAINER_TYPE_CODE; - return c1; - - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - *result_type = run_run_container_iandnot( - (run_container_t *)c1, (const run_container_t *)c2, &result); - return result; - - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - ARRAY_CONTAINER_TYPE_CODE): - *result_type = bitset_array_container_iandnot( - (bitset_container_t *)c1, - (const array_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = ARRAY_CONTAINER_TYPE_CODE; - - array_bitset_container_iandnot((array_container_t *)c1, - (const bitset_container_t *)c2); - return c1; - - case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, - RUN_CONTAINER_TYPE_CODE): - *result_type = bitset_run_container_iandnot( - (bitset_container_t *)c1, - (const run_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - - return result; - - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, - BITSET_CONTAINER_TYPE_CODE): - *result_type = run_bitset_container_iandnot( - (run_container_t *)c1, - (const bitset_container_t *)c2, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - - return result; - - case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): - *result_type = ARRAY_CONTAINER_TYPE_CODE; - array_run_container_iandnot((array_container_t *)c1, - (const run_container_t *)c2); - return c1; - case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): - *result_type = run_array_container_iandnot( - (run_container_t *)c1, (const array_container_t *)c2, &result); - return result; - default: - assert(false); - __builtin_unreachable(); - return NULL; - } -} - -/** - * Visit all values x of the container once, passing (base+x,ptr) - * to iterator. You need to specify a container and its type. - * Returns true if the iteration should continue. - */ -static inline bool container_iterate(const void *container, uint8_t typecode, - uint32_t base, roaring_iterator iterator, - void *ptr) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_iterate( - (const bitset_container_t *)container, base, iterator, ptr); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_iterate((const array_container_t *)container, - base, iterator, ptr); - case RUN_CONTAINER_TYPE_CODE: - return run_container_iterate((const run_container_t *)container, - base, iterator, ptr); - default: - assert(false); - __builtin_unreachable(); - } - assert(false); - __builtin_unreachable(); - return false; -} - -static inline bool container_iterate64(const void *container, uint8_t typecode, - uint32_t base, - roaring_iterator64 iterator, - uint64_t high_bits, void *ptr) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_iterate64( - (const bitset_container_t *)container, base, iterator, - high_bits, ptr); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_iterate64( - (const array_container_t *)container, base, iterator, high_bits, - ptr); - case RUN_CONTAINER_TYPE_CODE: - return run_container_iterate64((const run_container_t *)container, - base, iterator, high_bits, ptr); - default: - assert(false); - __builtin_unreachable(); - } - assert(false); - __builtin_unreachable(); - return false; -} - -static inline void *container_not(const void *c, uint8_t typ, - uint8_t *result_type) { - c = container_unwrap_shared(c, &typ); - void *result = NULL; - switch (typ) { - case BITSET_CONTAINER_TYPE_CODE: - *result_type = bitset_container_negation( - (const bitset_container_t *)c, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case ARRAY_CONTAINER_TYPE_CODE: - result = bitset_container_create(); - *result_type = BITSET_CONTAINER_TYPE_CODE; - array_container_negation((const array_container_t *)c, - (bitset_container_t *)result); - return result; - case RUN_CONTAINER_TYPE_CODE: - *result_type = - run_container_negation((const run_container_t *)c, &result); - return result; - - default: - assert(false); - __builtin_unreachable(); - } - assert(false); - __builtin_unreachable(); - return NULL; -} - -static inline void *container_not_range(const void *c, uint8_t typ, - uint32_t range_start, - uint32_t range_end, - uint8_t *result_type) { - c = container_unwrap_shared(c, &typ); - void *result = NULL; - switch (typ) { - case BITSET_CONTAINER_TYPE_CODE: - *result_type = - bitset_container_negation_range((const bitset_container_t *)c, - range_start, range_end, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case ARRAY_CONTAINER_TYPE_CODE: - *result_type = - array_container_negation_range((const array_container_t *)c, - range_start, range_end, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case RUN_CONTAINER_TYPE_CODE: - *result_type = run_container_negation_range( - (const run_container_t *)c, range_start, range_end, &result); - return result; - - default: - assert(false); - __builtin_unreachable(); - } - assert(false); - __builtin_unreachable(); - return NULL; -} - -static inline void *container_inot(void *c, uint8_t typ, uint8_t *result_type) { - c = get_writable_copy_if_shared(c, &typ); - void *result = NULL; - switch (typ) { - case BITSET_CONTAINER_TYPE_CODE: - *result_type = bitset_container_negation_inplace( - (bitset_container_t *)c, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case ARRAY_CONTAINER_TYPE_CODE: - // will never be inplace - result = bitset_container_create(); - *result_type = BITSET_CONTAINER_TYPE_CODE; - array_container_negation((array_container_t *)c, - (bitset_container_t *)result); - array_container_free((array_container_t *)c); - return result; - case RUN_CONTAINER_TYPE_CODE: - *result_type = - run_container_negation_inplace((run_container_t *)c, &result); - return result; - - default: - assert(false); - __builtin_unreachable(); - } - assert(false); - __builtin_unreachable(); - return NULL; -} - -static inline void *container_inot_range(void *c, uint8_t typ, - uint32_t range_start, - uint32_t range_end, - uint8_t *result_type) { - c = get_writable_copy_if_shared(c, &typ); - void *result = NULL; - switch (typ) { - case BITSET_CONTAINER_TYPE_CODE: - *result_type = - bitset_container_negation_range_inplace( - (bitset_container_t *)c, range_start, range_end, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case ARRAY_CONTAINER_TYPE_CODE: - *result_type = - array_container_negation_range_inplace( - (array_container_t *)c, range_start, range_end, &result) - ? BITSET_CONTAINER_TYPE_CODE - : ARRAY_CONTAINER_TYPE_CODE; - return result; - case RUN_CONTAINER_TYPE_CODE: - *result_type = run_container_negation_range_inplace( - (run_container_t *)c, range_start, range_end, &result); - return result; - - default: - assert(false); - __builtin_unreachable(); - } - assert(false); - __builtin_unreachable(); - return NULL; -} - -/** - * If the element of given rank is in this container, supposing that - * the first - * element has rank start_rank, then the function returns true and - * sets element - * accordingly. - * Otherwise, it returns false and update start_rank. - */ -static inline bool container_select(const void *container, uint8_t typecode, - uint32_t *start_rank, uint32_t rank, - uint32_t *element) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_select((const bitset_container_t *)container, - start_rank, rank, element); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_select((const array_container_t *)container, - start_rank, rank, element); - case RUN_CONTAINER_TYPE_CODE: - return run_container_select((const run_container_t *)container, - start_rank, rank, element); - default: - assert(false); - __builtin_unreachable(); - } - assert(false); - __builtin_unreachable(); - return false; -} - -static inline uint16_t container_maximum(const void *container, - uint8_t typecode) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_maximum((const bitset_container_t *)container); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_maximum((const array_container_t *)container); - case RUN_CONTAINER_TYPE_CODE: - return run_container_maximum((const run_container_t *)container); - default: - assert(false); - __builtin_unreachable(); - } - assert(false); - __builtin_unreachable(); - return false; -} - -static inline uint16_t container_minimum(const void *container, - uint8_t typecode) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_minimum((const bitset_container_t *)container); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_minimum((const array_container_t *)container); - case RUN_CONTAINER_TYPE_CODE: - return run_container_minimum((const run_container_t *)container); - default: - assert(false); - __builtin_unreachable(); - } - assert(false); - __builtin_unreachable(); - return false; -} - -// number of values smaller or equal to x -static inline int container_rank(const void *container, uint8_t typecode, - uint16_t x) { - container = container_unwrap_shared(container, &typecode); - switch (typecode) { - case BITSET_CONTAINER_TYPE_CODE: - return bitset_container_rank((const bitset_container_t *)container, x); - case ARRAY_CONTAINER_TYPE_CODE: - return array_container_rank((const array_container_t *)container, x); - case RUN_CONTAINER_TYPE_CODE: - return run_container_rank((const run_container_t *)container, x); - default: - assert(false); - __builtin_unreachable(); - } - assert(false); - __builtin_unreachable(); - return false; -} - -/** - * Add all values in range [min, max] to a given container. - * - * If the returned pointer is different from $container, then a new container - * has been created and the caller is responsible for freeing it. - * The type of the first container may change. Returns the modified - * (and possibly new) container. - */ -static inline void *container_add_range(void *container, uint8_t type, - uint32_t min, uint32_t max, - uint8_t *result_type) { - // NB: when selecting new container type, we perform only inexpensive checks - switch (type) { - case BITSET_CONTAINER_TYPE_CODE: { - bitset_container_t *bitset = (bitset_container_t *) container; - - int32_t union_cardinality = 0; - union_cardinality += bitset->cardinality; - union_cardinality += max - min + 1; - union_cardinality -= bitset_lenrange_cardinality(bitset->array, min, max-min); - - if (union_cardinality == INT32_C(0x10000)) { - *result_type = RUN_CONTAINER_TYPE_CODE; - return run_container_create_range(0, INT32_C(0x10000)); - } else { - *result_type = BITSET_CONTAINER_TYPE_CODE; - bitset_set_lenrange(bitset->array, min, max - min); - bitset->cardinality = union_cardinality; - return bitset; - } - } - case ARRAY_CONTAINER_TYPE_CODE: { - array_container_t *array = (array_container_t *) container; - - int32_t nvals_greater = count_greater(array->array, array->cardinality, max); - int32_t nvals_less = count_less(array->array, array->cardinality - nvals_greater, min); - int32_t union_cardinality = nvals_less + (max - min + 1) + nvals_greater; - - if (union_cardinality == INT32_C(0x10000)) { - *result_type = RUN_CONTAINER_TYPE_CODE; - return run_container_create_range(0, INT32_C(0x10000)); - } else if (union_cardinality <= DEFAULT_MAX_SIZE) { - *result_type = ARRAY_CONTAINER_TYPE_CODE; - array_container_add_range_nvals(array, min, max, nvals_less, nvals_greater); - return array; - } else { - *result_type = BITSET_CONTAINER_TYPE_CODE; - bitset_container_t *bitset = bitset_container_from_array(array); - bitset_set_lenrange(bitset->array, min, max - min); - bitset->cardinality = union_cardinality; - return bitset; - } - } - case RUN_CONTAINER_TYPE_CODE: { - run_container_t *run = (run_container_t *) container; - - int32_t nruns_greater = rle16_count_greater(run->runs, run->n_runs, max); - int32_t nruns_less = rle16_count_less(run->runs, run->n_runs - nruns_greater, min); - - int32_t run_size_bytes = (nruns_less + 1 + nruns_greater) * sizeof(rle16_t); - int32_t bitset_size_bytes = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); - - if (run_size_bytes <= bitset_size_bytes) { - run_container_add_range_nruns(run, min, max, nruns_less, nruns_greater); - *result_type = RUN_CONTAINER_TYPE_CODE; - return run; - } else { - *result_type = BITSET_CONTAINER_TYPE_CODE; - return bitset_container_from_run_range(run, min, max); - } - } - default: - __builtin_unreachable(); - } -} - -/* - * Removes all elements in range [min, max]. - * Returns one of: - * - NULL if no elements left - * - pointer to the original container - * - pointer to a newly-allocated container (if it is more efficient) - * - * If the returned pointer is different from $container, then a new container - * has been created and the caller is responsible for freeing the original container. - */ -static inline void *container_remove_range(void *container, uint8_t type, - uint32_t min, uint32_t max, - uint8_t *result_type) { - switch (type) { - case BITSET_CONTAINER_TYPE_CODE: { - bitset_container_t *bitset = (bitset_container_t *) container; - - int32_t result_cardinality = bitset->cardinality - - bitset_lenrange_cardinality(bitset->array, min, max-min); - - if (result_cardinality == 0) { - return NULL; - } else if (result_cardinality < DEFAULT_MAX_SIZE) { - *result_type = ARRAY_CONTAINER_TYPE_CODE; - bitset_reset_range(bitset->array, min, max+1); - bitset->cardinality = result_cardinality; - return array_container_from_bitset(bitset); - } else { - *result_type = BITSET_CONTAINER_TYPE_CODE; - bitset_reset_range(bitset->array, min, max+1); - bitset->cardinality = result_cardinality; - return bitset; - } - } - case ARRAY_CONTAINER_TYPE_CODE: { - array_container_t *array = (array_container_t *) container; - - int32_t nvals_greater = count_greater(array->array, array->cardinality, max); - int32_t nvals_less = count_less(array->array, array->cardinality - nvals_greater, min); - int32_t result_cardinality = nvals_less + nvals_greater; - - if (result_cardinality == 0) { - return NULL; - } else { - *result_type = ARRAY_CONTAINER_TYPE_CODE; - array_container_remove_range(array, nvals_less, - array->cardinality - result_cardinality); - return array; - } - } - case RUN_CONTAINER_TYPE_CODE: { - run_container_t *run = (run_container_t *) container; - - if (run->n_runs == 0) { - return NULL; - } - if (min <= run_container_minimum(run) && max >= run_container_maximum(run)) { - return NULL; - } - - run_container_remove_range(run, min, max); - - if (run_container_serialized_size_in_bytes(run->n_runs) <= - bitset_container_serialized_size_in_bytes()) { - *result_type = RUN_CONTAINER_TYPE_CODE; - return run; - } else { - *result_type = BITSET_CONTAINER_TYPE_CODE; - return bitset_container_from_run(run); - } - } - default: - __builtin_unreachable(); - } -} - -#endif -/* end file include/roaring/containers/containers.h */ -/* begin file include/roaring/roaring_array.h */ -#ifndef INCLUDE_ROARING_ARRAY_H -#define INCLUDE_ROARING_ARRAY_H -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include -#include - -#define MAX_CONTAINERS 65536 - -#define SERIALIZATION_ARRAY_UINT32 1 -#define SERIALIZATION_CONTAINER 2 - -#define ROARING_FLAG_COW UINT8_C(0x1) -#define ROARING_FLAG_FROZEN UINT8_C(0x2) - -enum { - SERIAL_COOKIE_NO_RUNCONTAINER = 12346, - SERIAL_COOKIE = 12347, - FROZEN_COOKIE = 13766, - NO_OFFSET_THRESHOLD = 4 -}; - -/** - * Roaring arrays are array-based key-value pairs having containers as values - * and 16-bit integer keys. A roaring bitmap might be implemented as such. - */ - -// parallel arrays. Element sizes quite different. -// Alternative is array -// of structs. Which would have better -// cache performance through binary searches? - -typedef struct roaring_array_s { - int32_t size; - int32_t allocation_size; - void **containers; - uint16_t *keys; - uint8_t *typecodes; - uint8_t flags; -} roaring_array_t; - -/** - * Create a new roaring array - */ -roaring_array_t *ra_create(void); - -/** - * Initialize an existing roaring array with the specified capacity (in number - * of containers) - */ -bool ra_init_with_capacity(roaring_array_t *new_ra, uint32_t cap); - -/** - * Initialize with zero capacity - */ -void ra_init(roaring_array_t *t); - -/** - * Copies this roaring array, we assume that dest is not initialized - */ -bool ra_copy(const roaring_array_t *source, roaring_array_t *dest, - bool copy_on_write); - -/* - * Shrinks the capacity, returns the number of bytes saved. - */ -int ra_shrink_to_fit(roaring_array_t *ra); - -/** - * Copies this roaring array, we assume that dest is initialized - */ -bool ra_overwrite(const roaring_array_t *source, roaring_array_t *dest, - bool copy_on_write); - -/** - * Frees the memory used by a roaring array - */ -void ra_clear(roaring_array_t *r); - -/** - * Frees the memory used by a roaring array, but does not free the containers - */ -void ra_clear_without_containers(roaring_array_t *r); - -/** - * Frees just the containers - */ -void ra_clear_containers(roaring_array_t *ra); - -/** - * Get the index corresponding to a 16-bit key - */ -inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x) { - if ((ra->size == 0) || ra->keys[ra->size - 1] == x) return ra->size - 1; - return binarySearch(ra->keys, (int32_t)ra->size, x); -} - -/** - * Retrieves the container at index i, filling in the typecode - */ -inline void *ra_get_container_at_index(const roaring_array_t *ra, uint16_t i, - uint8_t *typecode) { - *typecode = ra->typecodes[i]; - return ra->containers[i]; -} - -/** - * Retrieves the key at index i - */ -uint16_t ra_get_key_at_index(const roaring_array_t *ra, uint16_t i); - -/** - * Add a new key-value pair at index i - */ -void ra_insert_new_key_value_at(roaring_array_t *ra, int32_t i, uint16_t key, - void *container, uint8_t typecode); - -/** - * Append a new key-value pair - */ -void ra_append(roaring_array_t *ra, uint16_t s, void *c, uint8_t typecode); - -/** - * Append a new key-value pair to ra, cloning (in COW sense) a value from sa - * at index index - */ -void ra_append_copy(roaring_array_t *ra, const roaring_array_t *sa, - uint16_t index, bool copy_on_write); - -/** - * Append new key-value pairs to ra, cloning (in COW sense) values from sa - * at indexes - * [start_index, end_index) - */ -void ra_append_copy_range(roaring_array_t *ra, const roaring_array_t *sa, - int32_t start_index, int32_t end_index, - bool copy_on_write); - -/** appends from sa to ra, ending with the greatest key that is - * is less or equal stopping_key - */ -void ra_append_copies_until(roaring_array_t *ra, const roaring_array_t *sa, - uint16_t stopping_key, bool copy_on_write); - -/** appends from sa to ra, starting with the smallest key that is - * is strictly greater than before_start - */ - -void ra_append_copies_after(roaring_array_t *ra, const roaring_array_t *sa, - uint16_t before_start, bool copy_on_write); - -/** - * Move the key-value pairs to ra from sa at indexes - * [start_index, end_index), old array should not be freed - * (use ra_clear_without_containers) - **/ -void ra_append_move_range(roaring_array_t *ra, roaring_array_t *sa, - int32_t start_index, int32_t end_index); -/** - * Append new key-value pairs to ra, from sa at indexes - * [start_index, end_index) - */ -void ra_append_range(roaring_array_t *ra, roaring_array_t *sa, - int32_t start_index, int32_t end_index, - bool copy_on_write); - -/** - * Set the container at the corresponding index using the specified - * typecode. - */ -inline void ra_set_container_at_index(const roaring_array_t *ra, int32_t i, - void *c, uint8_t typecode) { - assert(i < ra->size); - ra->containers[i] = c; - ra->typecodes[i] = typecode; -} - -/** - * If needed, increase the capacity of the array so that it can fit k values - * (at - * least); - */ -bool extend_array(roaring_array_t *ra, int32_t k); - -inline int32_t ra_get_size(const roaring_array_t *ra) { return ra->size; } - -static inline int32_t ra_advance_until(const roaring_array_t *ra, uint16_t x, - int32_t pos) { - return advanceUntil(ra->keys, pos, ra->size, x); -} - -int32_t ra_advance_until_freeing(roaring_array_t *ra, uint16_t x, int32_t pos); - -void ra_downsize(roaring_array_t *ra, int32_t new_length); - -inline void ra_replace_key_and_container_at_index(roaring_array_t *ra, - int32_t i, uint16_t key, - void *c, uint8_t typecode) { - assert(i < ra->size); - - ra->keys[i] = key; - ra->containers[i] = c; - ra->typecodes[i] = typecode; -} - -// write set bits to an array -void ra_to_uint32_array(const roaring_array_t *ra, uint32_t *ans); - -bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, size_t limit, uint32_t *ans); - -/** - * write a bitmap to a buffer. This is meant to be compatible with - * the - * Java and Go versions. Return the size in bytes of the serialized - * output (which should be ra_portable_size_in_bytes(ra)). - */ -size_t ra_portable_serialize(const roaring_array_t *ra, char *buf); - -/** - * read a bitmap from a serialized version. This is meant to be compatible - * with the Java and Go versions. - * maxbytes indicates how many bytes available from buf. - * When the function returns true, roaring_array_t is populated with the data - * and *readbytes indicates how many bytes were read. In all cases, if the function - * returns true, then maxbytes >= *readbytes. - */ -bool ra_portable_deserialize(roaring_array_t *ra, const char *buf, const size_t maxbytes, size_t * readbytes); - -/** - * Quickly checks whether there is a serialized bitmap at the pointer, - * not exceeding size "maxbytes" in bytes. This function does not allocate - * memory dynamically. - * - * This function returns 0 if and only if no valid bitmap is found. - * Otherwise, it returns how many bytes are occupied by the bitmap data. - */ -size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes); - -/** - * How many bytes are required to serialize this bitmap (meant to be - * compatible - * with Java and Go versions) - */ -size_t ra_portable_size_in_bytes(const roaring_array_t *ra); - -/** - * return true if it contains at least one run container. - */ -bool ra_has_run_container(const roaring_array_t *ra); - -/** - * Size of the header when serializing (meant to be compatible - * with Java and Go versions) - */ -uint32_t ra_portable_header_size(const roaring_array_t *ra); - -/** - * If the container at the index i is share, unshare it (creating a local - * copy if needed). - */ -static inline void ra_unshare_container_at_index(roaring_array_t *ra, - uint16_t i) { - assert(i < ra->size); - ra->containers[i] = - get_writable_copy_if_shared(ra->containers[i], &ra->typecodes[i]); -} - -/** - * remove at index i, sliding over all entries after i - */ -void ra_remove_at_index(roaring_array_t *ra, int32_t i); - - -/** -* clears all containers, sets the size at 0 and shrinks the memory usage. -*/ -void ra_reset(roaring_array_t *ra); - -/** - * remove at index i, sliding over all entries after i. Free removed container. - */ -void ra_remove_at_index_and_free(roaring_array_t *ra, int32_t i); - -/** - * remove a chunk of indices, sliding over entries after it - */ -// void ra_remove_index_range(roaring_array_t *ra, int32_t begin, int32_t end); - -// used in inplace andNot only, to slide left the containers from -// the mutated RoaringBitmap that are after the largest container of -// the argument RoaringBitmap. It is followed by a call to resize. -// -void ra_copy_range(roaring_array_t *ra, uint32_t begin, uint32_t end, - uint32_t new_begin); - -/** - * Shifts rightmost $count containers to the left (distance < 0) or - * to the right (distance > 0). - * Allocates memory if necessary. - * This function doesn't free or create new containers. - * Caller is responsible for that. - */ -void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance); - -#ifdef __cplusplus -} -#endif - -#endif -/* end file include/roaring/roaring_array.h */ -/* begin file include/roaring/misc/configreport.h */ -/* - * configreport.h - * - */ +/* compute the intersection in-place (to b1), to generate a new bitset first + * call bitset_copy */ +void bitset_inplace_intersection(bitset_t *CBITSET_RESTRICT b1, + const bitset_t *CBITSET_RESTRICT b2); -#ifndef INCLUDE_MISC_CONFIGREPORT_H_ -#define INCLUDE_MISC_CONFIGREPORT_H_ +/* report the size of the intersection (without materializing it) */ +size_t bitset_intersection_count(const bitset_t *CBITSET_RESTRICT b1, + const bitset_t *CBITSET_RESTRICT b2); -#include // for size_t -#include -#include +/* returns true if the bitsets contain no common elements */ +bool bitsets_disjoint(const bitset_t *CBITSET_RESTRICT b1, const bitset_t *CBITSET_RESTRICT b2); +/* returns true if the bitsets contain any common elements */ +bool bitsets_intersect(const bitset_t *CBITSET_RESTRICT b1, const bitset_t *CBITSET_RESTRICT b2); -#ifdef IS_X64 -// useful for basic info (0) -static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) { -#ifdef ROARING_INLINE_ASM - __asm volatile("cpuid" - : "=a"(*eax), "=b"(*ebx), "=c"(*ecx), "=d"(*edx) - : "0"(*eax), "2"(*ecx)); -#endif /* not sure what to do when inline assembly is unavailable*/ -} +/* returns true if b1 contains all of the set bits of b2 */ +bool bitset_contains_all(const bitset_t *CBITSET_RESTRICT b1, const bitset_t *CBITSET_RESTRICT b2); -// CPUID instruction takes no parameters as CPUID implicitly uses the EAX -// register. -// The EAX register should be loaded with a value specifying what information to -// return -static inline void cpuinfo(int code, int *eax, int *ebx, int *ecx, int *edx) { -#ifdef ROARING_INLINE_ASM - __asm__ volatile("cpuid;" // call cpuid instruction - : "=a"(*eax), "=b"(*ebx), "=c"(*ecx), - "=d"(*edx) // output equal to "movl %%eax %1" - : "a"(code) // input equal to "movl %1, %%eax" - //:"%eax","%ebx","%ecx","%edx"// clobbered register - ); -#endif /* not sure what to do when inline assembly is unavailable*/ -} +/* compute the difference in-place (to b1), to generate a new bitset first call + * bitset_copy */ +void bitset_inplace_difference(bitset_t *CBITSET_RESTRICT b1, + const bitset_t *CBITSET_RESTRICT b2); + +/* compute the size of the difference */ +size_t bitset_difference_count(const bitset_t *CBITSET_RESTRICT b1, + const bitset_t *CBITSET_RESTRICT b2); + +/* compute the symmetric difference in-place (to b1), return true if successful, + * to generate a new bitset first call bitset_copy */ +bool bitset_inplace_symmetric_difference(bitset_t *CBITSET_RESTRICT b1, + const bitset_t *CBITSET_RESTRICT b2); + +/* compute the size of the symmetric difference */ +size_t bitset_symmetric_difference_count(const bitset_t *CBITSET_RESTRICT b1, + const bitset_t *CBITSET_RESTRICT b2); -static inline int computecacheline() { - int eax = 0, ebx = 0, ecx = 0, edx = 0; - cpuinfo((int)0x80000006, &eax, &ebx, &ecx, &edx); - return ecx & 0xFF; +/* iterate over the set bits + like so : + for(size_t i = 0; bitset_next_set_bit(b,&i) ; i++) { + //..... + } + */ +inline bool bitset_next_set_bit(const bitset_t *bitset, size_t *i) { + size_t x = *i / 64; + if (x >= bitset->arraysize) { + return false; + } + uint64_t w = bitset->array[x]; + w >>= (*i & 63); + if (w != 0) { + *i += roaring_trailing_zeroes(w); + return true; + } + x++; + while (x < bitset->arraysize) { + w = bitset->array[x]; + if (w != 0) { + *i = x * 64 + roaring_trailing_zeroes(w); + return true; + } + x++; + } + return false; } -// this is quite imperfect, but can be handy -static inline const char *guessprocessor() { - unsigned eax = 1, ebx = 0, ecx = 0, edx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - const char *codename; - switch (eax >> 4) { - case 0x506E: - codename = "Skylake"; - break; - case 0x406C: - codename = "CherryTrail"; - break; - case 0x306D: - codename = "Broadwell"; - break; - case 0x306C: - codename = "Haswell"; - break; - case 0x306A: - codename = "IvyBridge"; - break; - case 0x206A: - case 0x206D: - codename = "SandyBridge"; - break; - case 0x2065: - case 0x206C: - case 0x206F: - codename = "Westmere"; - break; - case 0x106E: - case 0x106A: - case 0x206E: - codename = "Nehalem"; - break; - case 0x1067: - case 0x106D: - codename = "Penryn"; - break; - case 0x006F: - case 0x1066: - codename = "Merom"; - break; - case 0x0066: - codename = "Presler"; - break; - case 0x0063: - case 0x0064: - codename = "Prescott"; - break; - case 0x006D: - codename = "Dothan"; - break; - case 0x0366: - codename = "Cedarview"; - break; - case 0x0266: - codename = "Lincroft"; - break; - case 0x016C: - codename = "Pineview"; - break; - default: - codename = "UNKNOWN"; +/* iterate over the set bits + like so : + size_t buffer[256]; + size_t howmany = 0; + for(size_t startfrom = 0; (howmany = bitset_next_set_bits(b,buffer,256, &startfrom)) > + 0 ; startfrom++) { + //..... + } + */ +inline size_t bitset_next_set_bits(const bitset_t *bitset, size_t *buffer, + size_t capacity, size_t *startfrom) { + if (capacity == 0) return 0; // sanity check + size_t x = *startfrom / 64; + if (x >= bitset->arraysize) { + return 0; // nothing more to iterate over + } + uint64_t w = bitset->array[x]; + w >>= (*startfrom & 63); + size_t howmany = 0; + size_t base = x << 6; + while (howmany < capacity) { + while (w != 0) { + uint64_t t = w & (~w + 1); + int r = roaring_trailing_zeroes(w); + buffer[howmany++] = r + base; + if (howmany == capacity) goto end; + w ^= t; + } + x += 1; + if (x == bitset->arraysize) { break; + } + base += 64; + w = bitset->array[x]; } - return codename; + end: + if (howmany > 0) { + *startfrom = buffer[howmany - 1]; + } + return howmany; } -static inline void tellmeall() { - printf("Intel processor: %s\t", guessprocessor()); - -#ifdef __VERSION__ - printf(" compiler version: %s\t", __VERSION__); -#endif - printf("\tBuild option USEAVX "); -#ifdef USEAVX - printf("enabled\n"); -#else - printf("disabled\n"); -#endif -#ifndef __AVX2__ - printf("AVX2 is NOT available.\n"); -#endif +typedef bool (*bitset_iterator)(size_t value, void *param); - if ((sizeof(int) != 4) || (sizeof(long) != 8)) { - printf("number of bytes: int = %lu long = %lu \n", - (long unsigned int)sizeof(size_t), - (long unsigned int)sizeof(int)); +// return true if uninterrupted +inline bool bitset_for_each(const bitset_t *b, bitset_iterator iterator, + void *ptr) { + size_t base = 0; + for (size_t i = 0; i < b->arraysize; ++i) { + uint64_t w = b->array[i]; + while (w != 0) { + uint64_t t = w & (~w + 1); + int r = roaring_trailing_zeroes(w); + if (!iterator(r + base, ptr)) return false; + w ^= t; + } + base += 64; } -#if __LITTLE_ENDIAN__ -// This is what we expect! -// printf("you have little endian machine"); -#endif -#if __BIG_ENDIAN__ - printf("you have a big endian machine"); -#endif -#if __CHAR_BIT__ - if (__CHAR_BIT__ != 8) printf("on your machine, chars don't have 8bits???"); -#endif - if (computecacheline() != 64) - printf("cache line: %d bytes\n", computecacheline()); + return true; } -#else -static inline void tellmeall() { - printf("Non-X64 processor\n"); -#ifdef __arm__ - printf("ARM processor detected\n"); -#endif -#ifdef __VERSION__ - printf(" compiler version: %s\t", __VERSION__); -#endif - if ((sizeof(int) != 4) || (sizeof(long) != 8)) { - printf("number of bytes: int = %lu long = %lu \n", - (long unsigned int)sizeof(size_t), - (long unsigned int)sizeof(int)); +inline void bitset_print(const bitset_t *b) { + printf("{"); + for (size_t i = 0; bitset_next_set_bit(b, &i); i++) { + printf("%zu, ", i); } -#if __LITTLE_ENDIAN__ -// This is what we expect! -// printf("you have little endian machine"); -#endif -#if __BIG_ENDIAN__ - printf("you have a big endian machine"); -#endif -#if __CHAR_BIT__ - if (__CHAR_BIT__ != 8) printf("on your machine, chars don't have 8bits???"); -#endif + printf("}"); } +#ifdef __cplusplus +} } } // extern "C" { namespace roaring { namespace api { #endif -#endif /* INCLUDE_MISC_CONFIGREPORT_H_ */ -/* end file include/roaring/misc/configreport.h */ +#endif +/* end file include/roaring/bitset/bitset.h */ /* begin file include/roaring/roaring.h */ /* -An implementation of Roaring Bitmaps in C. -*/ + * An implementation of Roaring Bitmaps in C. + */ #ifndef ROARING_H #define ROARING_H -#ifdef __cplusplus -extern "C" { -#endif #include +#include +#include // for `size_t` + + +#ifdef __cplusplus +extern "C" { namespace roaring { namespace api { +#endif typedef struct roaring_bitmap_s { roaring_array_t high_low_container; } roaring_bitmap_t; /** - * Creates a new bitmap (initially empty) + * Dynamically allocates a new bitmap (initially empty). + * Returns NULL if the allocation fails. + * Capacity is a performance hint for how many "containers" the data will need. + * Client is responsible for calling `roaring_bitmap_free()`. + */ +roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap); + +/** + * Dynamically allocates a new bitmap (initially empty). + * Returns NULL if the allocation fails. + * Client is responsible for calling `roaring_bitmap_free()`. + */ +inline roaring_bitmap_t *roaring_bitmap_create(void) +{ return roaring_bitmap_create_with_capacity(0); } + +/** + * Initialize a roaring bitmap structure in memory controlled by client. + * Capacity is a performance hint for how many "containers" the data will need. + * Can return false if auxiliary allocations fail when capacity greater than 0. */ -roaring_bitmap_t *roaring_bitmap_create(void); +bool roaring_bitmap_init_with_capacity(roaring_bitmap_t *r, uint32_t cap); + +/** + * Initialize a roaring bitmap structure in memory controlled by client. + * The bitmap will be in a "clear" state, with no auxiliary allocations. + * Since this performs no allocations, the function will not fail. + */ +inline void roaring_bitmap_init_cleared(roaring_bitmap_t *r) +{ roaring_bitmap_init_with_capacity(r, 0); } /** * Add all the values between min (included) and max (excluded) that are at a @@ -6481,12 +1052,6 @@ roaring_bitmap_t *roaring_bitmap_create(void); roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max, uint32_t step); -/** - * Creates a new bitmap (initially empty) with a provided - * container-storage capacity (it is a performance hint). - */ -roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap); - /** * Creates a new bitmap from a pointer of uint32_t integers */ @@ -6494,11 +1059,12 @@ roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals); /* * Whether you want to use copy-on-write. - * Saves memory and avoids copies but needs more care in a threaded context. + * Saves memory and avoids copies, but needs more care in a threaded context. * Most users should ignore this flag. - * Note: if you do turn this flag to 'true', enabling COW, - * then ensure that you do so for all of your bitmaps since - * interactions between bitmaps with and without COW is unsafe. + * + * Note: If you do turn this flag to 'true', enabling COW, then ensure that you + * do so for all of your bitmaps, since interactions between bitmaps with and + * without COW is unsafe. */ inline bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t* r) { return r->high_low_container.flags & ROARING_FLAG_COW; @@ -6511,10 +1077,12 @@ inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t* r, bool cow) { } } +roaring_bitmap_t *roaring_bitmap_add_offset(const roaring_bitmap_t *bm, + int64_t offset); /** * Describe the inner structure of the bitmap. */ -void roaring_bitmap_printf_describe(const roaring_bitmap_t *ra); +void roaring_bitmap_printf_describe(const roaring_bitmap_t *r); /** * Creates a new bitmap from a list of uint32_t integers @@ -6522,172 +1090,169 @@ void roaring_bitmap_printf_describe(const roaring_bitmap_t *ra); roaring_bitmap_t *roaring_bitmap_of(size_t n, ...); /** - * Copies a bitmap. This does memory allocation. The caller is responsible for - * memory management. - * + * Copies a bitmap (this does memory allocation). + * The caller is responsible for memory management. */ roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r); - /** - * Copies a bitmap from src to dest. It is assumed that the pointer dest + * Copies a bitmap from src to dest. It is assumed that the pointer dest * is to an already allocated bitmap. The content of the dest bitmap is * freed/deleted. * * It might be preferable and simpler to call roaring_bitmap_copy except * that roaring_bitmap_overwrite can save on memory allocations. * + * Returns true if successful, or false if there was an error. On failure, + * the dest bitmap is left in a valid, empty state (even if it was not empty before). */ bool roaring_bitmap_overwrite(roaring_bitmap_t *dest, - const roaring_bitmap_t *src); + const roaring_bitmap_t *src); /** * Print the content of the bitmap. */ -void roaring_bitmap_printf(const roaring_bitmap_t *ra); +void roaring_bitmap_printf(const roaring_bitmap_t *r); /** * Computes the intersection between two bitmaps and returns new bitmap. The - * caller is - * responsible for memory management. + * caller is responsible for memory management. * + * Performance hint: if you are computing the intersection between several + * bitmaps, two-by-two, it is best to start with the smallest bitmap. + * You may also rely on roaring_bitmap_and_inplace to avoid creating + * many temporary bitmaps. */ -roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); +roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); /** * Computes the size of the intersection between two bitmaps. - * */ -uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); - +uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); /** * Check whether two bitmaps intersect. - * */ -bool roaring_bitmap_intersect(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); +bool roaring_bitmap_intersect(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); + +/** + * Check whether a bitmap and a closed range intersect. + */ +bool roaring_bitmap_intersect_with_range(const roaring_bitmap_t *bm, + uint64_t x, uint64_t y); /** * Computes the Jaccard index between two bitmaps. (Also known as the Tanimoto - * distance, - * or the Jaccard similarity coefficient) + * distance, or the Jaccard similarity coefficient) * * The Jaccard index is undefined if both bitmaps are empty. - * */ -double roaring_bitmap_jaccard_index(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); +double roaring_bitmap_jaccard_index(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); /** * Computes the size of the union between two bitmaps. - * */ -uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); +uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); /** * Computes the size of the difference (andnot) between two bitmaps. - * */ -uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); +uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); /** - * Computes the size of the symmetric difference (andnot) between two bitmaps. - * + * Computes the size of the symmetric difference (xor) between two bitmaps. */ -uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); +uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); /** - * Inplace version modifies x1, x1 == x2 is allowed + * Inplace version of `roaring_bitmap_and()`, modifies r1 + * r1 == r2 is allowed. + * + * Performance hint: if you are computing the intersection between several + * bitmaps, two-by-two, it is best to start with the smallest bitmap. */ -void roaring_bitmap_and_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); +void roaring_bitmap_and_inplace(roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); /** * Computes the union between two bitmaps and returns new bitmap. The caller is * responsible for memory management. */ -roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); +roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); /** - * Inplace version of roaring_bitmap_or, modifies x1. TDOO: decide whether x1 == - *x2 ok - * + * Inplace version of `roaring_bitmap_or(), modifies r1. + * TODO: decide whether r1 == r2 ok */ -void roaring_bitmap_or_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); +void roaring_bitmap_or_inplace(roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); /** - * Compute the union of 'number' bitmaps. See also roaring_bitmap_or_many_heap. - * Caller is responsible for freeing the - * result. - * + * Compute the union of 'number' bitmaps. + * Caller is responsible for freeing the result. + * See also `roaring_bitmap_or_many_heap()` */ roaring_bitmap_t *roaring_bitmap_or_many(size_t number, - const roaring_bitmap_t **x); + const roaring_bitmap_t **rs); /** - * Compute the union of 'number' bitmaps using a heap. This can - * sometimes be faster than roaring_bitmap_or_many which uses - * a naive algorithm. Caller is responsible for freeing the - * result. - * + * Compute the union of 'number' bitmaps using a heap. This can sometimes be + * faster than `roaring_bitmap_or_many() which uses a naive algorithm. + * Caller is responsible for freeing the result. */ roaring_bitmap_t *roaring_bitmap_or_many_heap(uint32_t number, - const roaring_bitmap_t **x); + const roaring_bitmap_t **rs); /** * Computes the symmetric difference (xor) between two bitmaps * and returns new bitmap. The caller is responsible for memory management. */ -roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); +roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); /** - * Inplace version of roaring_bitmap_xor, modifies x1. x1 != x2. - * + * Inplace version of roaring_bitmap_xor, modifies r1, r1 != r2. */ -void roaring_bitmap_xor_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); +void roaring_bitmap_xor_inplace(roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); /** * Compute the xor of 'number' bitmaps. - * Caller is responsible for freeing the - * result. - * + * Caller is responsible for freeing the result. */ roaring_bitmap_t *roaring_bitmap_xor_many(size_t number, - const roaring_bitmap_t **x); + const roaring_bitmap_t **rs); /** - * Computes the difference (andnot) between two bitmaps - * and returns new bitmap. The caller is responsible for memory management. + * Computes the difference (andnot) between two bitmaps and returns new bitmap. + * Caller is responsible for freeing the result. */ -roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); +roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); /** - * Inplace version of roaring_bitmap_andnot, modifies x1. x1 != x2. - * + * Inplace version of roaring_bitmap_andnot, modifies r1, r1 != r2. */ -void roaring_bitmap_andnot_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); +void roaring_bitmap_andnot_inplace(roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); /** * TODO: consider implementing: - * Compute the xor of 'number' bitmaps using a heap. This can - * sometimes be faster than roaring_bitmap_xor_many which uses - * a naive algorithm. Caller is responsible for freeing the - * result. + * + * "Compute the xor of 'number' bitmaps using a heap. This can sometimes be + * faster than roaring_bitmap_xor_many which uses a naive algorithm. Caller is + * responsible for freeing the result."" * * roaring_bitmap_t *roaring_bitmap_xor_many_heap(uint32_t number, - * const roaring_bitmap_t **x); + * const roaring_bitmap_t **rs); */ /** @@ -6695,55 +1260,101 @@ void roaring_bitmap_andnot_inplace(roaring_bitmap_t *x1, */ void roaring_bitmap_free(const roaring_bitmap_t *r); +/** + * A bit of context usable with `roaring_bitmap_*_bulk()` functions + * + * Should be initialized with `{0}` (or `memset()` to all zeros). + * Callers should treat it as an opaque type. + * + * A context may only be used with a single bitmap + * (unless re-initialized to zero), and any modification to a bitmap + * (other than modifications performed with `_bulk()` functions with the context + * passed) will invalidate any contexts associated with that bitmap. + */ +typedef struct roaring_bulk_context_s { + ROARING_CONTAINER_T *container; + int idx; + uint16_t key; + uint8_t typecode; +} roaring_bulk_context_t; + +/** + * Add an item, using context from a previous insert for speed optimization. + * + * `context` will be used to store information between calls to make bulk + * operations faster. `*context` should be zero-initialized before the first + * call to this function. + * + * Modifying the bitmap in any way (other than `-bulk` suffixed functions) + * will invalidate the stored context, calling this function with a non-zero + * context after doing any modification invokes undefined behavior. + * + * In order to exploit this optimization, the caller should call this function + * with values with the same "key" (high 16 bits of the value) consecutively. + */ +void roaring_bitmap_add_bulk(roaring_bitmap_t *r, + roaring_bulk_context_t *context, uint32_t val); + /** * Add value n_args from pointer vals, faster than repeatedly calling - * roaring_bitmap_add + * `roaring_bitmap_add()` * + * In order to exploit this optimization, the caller should attempt to keep + * values with the same "key" (high 16 bits of the value) as consecutive + * elements in `vals` */ void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args, const uint32_t *vals); /** * Add value x - * */ void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t x); /** * Add value x - * Returns true if a new value was added, false if the value was already existing. + * Returns true if a new value was added, false if the value already existed. */ bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t x); /** * Add all values in range [min, max] */ -void roaring_bitmap_add_range_closed(roaring_bitmap_t *ra, uint32_t min, uint32_t max); +void roaring_bitmap_add_range_closed(roaring_bitmap_t *r, + uint32_t min, uint32_t max); /** * Add all values in range [min, max) */ -inline void roaring_bitmap_add_range(roaring_bitmap_t *ra, uint64_t min, uint64_t max) { - if(max == min) return; - roaring_bitmap_add_range_closed(ra, (uint32_t)min, (uint32_t)(max - 1)); +inline void roaring_bitmap_add_range(roaring_bitmap_t *r, + uint64_t min, uint64_t max) { + if(max <= min) return; + roaring_bitmap_add_range_closed(r, (uint32_t)min, (uint32_t)(max - 1)); } /** * Remove value x - * */ void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t x); -/** Remove all values in range [min, max] */ -void roaring_bitmap_remove_range_closed(roaring_bitmap_t *ra, uint32_t min, uint32_t max); +/** + * Remove all values in range [min, max] + */ +void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r, + uint32_t min, uint32_t max); -/** Remove all values in range [min, max) */ -inline void roaring_bitmap_remove_range(roaring_bitmap_t *ra, uint64_t min, uint64_t max) { - if(max == min) return; - roaring_bitmap_remove_range_closed(ra, (uint32_t)min, (uint32_t)(max - 1)); +/** + * Remove all values in range [min, max) + */ +inline void roaring_bitmap_remove_range(roaring_bitmap_t *r, + uint64_t min, uint64_t max) { + if(max <= min) return; + roaring_bitmap_remove_range_closed(r, (uint32_t)min, (uint32_t)(max - 1)); } -/** Remove multiple values */ +/** + * Remove multiple values + */ void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args, const uint32_t *vals); @@ -6754,168 +1365,260 @@ void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args, bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t x); /** - * Check if value x is present + * Check if value is present */ -inline bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val) { - const uint16_t hb = val >> 16; - /* - * the next function call involves a binary search and lots of branching. - */ - int32_t i = ra_get_index(&r->high_low_container, hb); - if (i < 0) return false; +bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val); - uint8_t typecode; - // next call ought to be cheap - void *container = - ra_get_container_at_index(&r->high_low_container, i, &typecode); - // rest might be a tad expensive, possibly involving another round of binary search - return container_contains(container, val & 0xFFFF, typecode); -} +/** + * Check whether a range of values from range_start (included) + * to range_end (excluded) is present + */ +bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, + uint64_t range_start, + uint64_t range_end); /** - * Check whether a range of values from range_start (included) to range_end (excluded) is present + * Check if an items is present, using context from a previous insert or search + * for speed optimization. + * + * `context` will be used to store information between calls to make bulk + * operations faster. `*context` should be zero-initialized before the first + * call to this function. + * + * Modifying the bitmap in any way (other than `-bulk` suffixed functions) + * will invalidate the stored context, calling this function with a non-zero + * context after doing any modification invokes undefined behavior. + * + * In order to exploit this optimization, the caller should call this function + * with values with the same "key" (high 16 bits of the value) consecutively. */ -bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end); +bool roaring_bitmap_contains_bulk(const roaring_bitmap_t *r, + roaring_bulk_context_t *context, + uint32_t val); /** * Get the cardinality of the bitmap (number of elements). */ -uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *ra); +uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *r); /** * Returns the number of elements in the range [range_start, range_end). */ -uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *ra, - uint64_t range_start, uint64_t range_end); +uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *r, + uint64_t range_start, + uint64_t range_end); /** * Returns true if the bitmap is empty (cardinality is zero). */ -bool roaring_bitmap_is_empty(const roaring_bitmap_t *ra); +bool roaring_bitmap_is_empty(const roaring_bitmap_t *r); /** -* Empties the bitmap -*/ -void roaring_bitmap_clear(roaring_bitmap_t *ra); + * Empties the bitmap. It will have no auxiliary allocations (so if the bitmap + * was initialized in client memory via roaring_bitmap_init(), then a call to + * roaring_bitmap_clear() would be enough to "free" it) + */ +void roaring_bitmap_clear(roaring_bitmap_t *r); /** - * Convert the bitmap to an array. Write the output to "ans", - * caller is responsible to ensure that there is enough memory - * allocated - * (e.g., ans = malloc(roaring_bitmap_get_cardinality(mybitmap) - * * sizeof(uint32_t)) + * Convert the bitmap to a sorted array, output in `ans`. + * + * Caller is responsible to ensure that there is enough memory allocated, e.g. + * + * ans = malloc(roaring_bitmap_get_cardinality(bitmap) * sizeof(uint32_t)); */ -void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *ra, uint32_t *ans); +void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans); +/** + * Store the bitmap to a bitset. This can be useful for people + * who need the performance and simplicity of a standard bitset. + * We assume that the input bitset is originally empty (does not + * have any set bit). + * + * bitset_t * out = bitset_create(); + * // if the bitset has content in it, call "bitset_clear(out)" + * bool success = roaring_bitmap_to_bitset(mybitmap, out); + * // on failure, success will be false. + * // You can then query the bitset: + * bool is_present = bitset_get(out, 10011 ); + * // you must free the memory: + * bitset_free(out); + * + */ +bool roaring_bitmap_to_bitset(const roaring_bitmap_t *r, bitset_t * bitset); /** - * Convert the bitmap to an array from "offset" by "limit". Write the output to "ans". - * so, you can get data in paging. - * caller is responsible to ensure that there is enough memory - * allocated - * (e.g., ans = malloc(roaring_bitmap_get_cardinality(limit) - * * sizeof(uint32_t)) + * Convert the bitmap to a sorted array from `offset` by `limit`, output in `ans`. + * + * Caller is responsible to ensure that there is enough memory allocated, e.g. + * + * ans = malloc(roaring_bitmap_get_cardinality(limit) * sizeof(uint32_t)); + * * Return false in case of failure (e.g., insufficient memory) */ -bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *ra, size_t offset, size_t limit, uint32_t *ans); +bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *r, + size_t offset, size_t limit, + uint32_t *ans); /** - * Remove run-length encoding even when it is more space efficient - * return whether a change was applied + * Remove run-length encoding even when it is more space efficient. + * Return whether a change was applied. */ bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r); -/** convert array and bitmap containers to run containers when it is more - * efficient; - * also convert from run containers when more space efficient. Returns - * true if the result has at least one run container. - * Additional savings might be possible by calling shrinkToFit(). +/** + * Convert array and bitmap containers to run containers when it is more + * efficient; also convert from run containers when more space efficient. + * + * Returns true if the result has at least one run container. + * Additional savings might be possible by calling `shrinkToFit()`. */ bool roaring_bitmap_run_optimize(roaring_bitmap_t *r); /** - * If needed, reallocate memory to shrink the memory usage. Returns - * the number of bytes saved. -*/ + * If needed, reallocate memory to shrink the memory usage. + * Returns the number of bytes saved. + */ size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r); /** -* write the bitmap to an output pointer, this output buffer should refer to -* at least roaring_bitmap_size_in_bytes(ra) allocated bytes. -* -* see roaring_bitmap_portable_serialize if you want a format that's compatible -* with Java and Go implementations -* -* this format has the benefit of being sometimes more space efficient than -* roaring_bitmap_portable_serialize -* e.g., when the data is sparse. -* -* Returns how many bytes were written which should be -* roaring_bitmap_size_in_bytes(ra). -*/ -size_t roaring_bitmap_serialize(const roaring_bitmap_t *ra, char *buf); + * Write the bitmap to an output pointer, this output buffer should refer to + * at least `roaring_bitmap_size_in_bytes(r)` allocated bytes. + * + * See `roaring_bitmap_portable_serialize()` if you want a format that's + * compatible with Java and Go implementations. This format can sometimes be + * more space efficient than the portable form, e.g. when the data is sparse. + * + * Returns how many bytes written, should be `roaring_bitmap_size_in_bytes(r)`. + * + * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), + * the data format is going to be big-endian and not compatible with little-endian systems. + */ +size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf); -/** use with roaring_bitmap_serialize -* see roaring_bitmap_portable_deserialize if you want a format that's -* compatible with Java and Go implementations -*/ +/** + * Use with `roaring_bitmap_serialize()`. + * + * (See `roaring_bitmap_portable_deserialize()` if you want a format that's + * compatible with Java and Go implementations). + * + * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), + * the data format is going to be big-endian and not compatible with little-endian systems. + */ roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf); +/** + * Use with `roaring_bitmap_serialize()`. + * + * (See `roaring_bitmap_portable_deserialize_safe()` if you want a format that's + * compatible with Java and Go implementations). + * + * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), + * the data format is going to be big-endian and not compatible with little-endian systems. + * + * The difference with `roaring_bitmap_deserialize()` is that this function checks that the input buffer + * is a valid bitmap. If the buffer is too small, NULL is returned. + */ +roaring_bitmap_t *roaring_bitmap_deserialize_safe(const void *buf, size_t maxbytes); + /** * How many bytes are required to serialize this bitmap (NOT compatible * with Java and Go versions) */ -size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *ra); +size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *r); /** - * read a bitmap from a serialized version. This is meant to be compatible with - * the Java and Go versions. See format specification at - * https://github.com/RoaringBitmap/RoaringFormatSpec - * In case of failure, a null pointer is returned. + * Read bitmap from a serialized buffer. + * In case of failure, NULL is returned. + * * This function is unsafe in the sense that if there is no valid serialized - * bitmap at the pointer, then many bytes could be read, possibly causing a buffer - * overflow. For a safer approach, - * call roaring_bitmap_portable_deserialize_safe. + * bitmap at the pointer, then many bytes could be read, possibly causing a + * buffer overflow. See also roaring_bitmap_portable_deserialize_safe(). + * + * This is meant to be compatible with the Java and Go versions: + * https://github.com/RoaringBitmap/RoaringFormatSpec +* + * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), + * the data format is going to be big-endian and not compatible with little-endian systems. */ roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf); /** - * read a bitmap from a serialized version in a safe manner (reading up to maxbytes). - * This is meant to be compatible with - * the Java and Go versions. See format specification at + * Read bitmap from a serialized buffer safely (reading up to maxbytes). + * In case of failure, NULL is returned. + * + * This is meant to be compatible with the Java and Go versions: + * https://github.com/RoaringBitmap/RoaringFormatSpec + * + * The function itself is safe in the sense that it will not cause buffer overflows. + * However, for correct operations, it is assumed that the bitmap read was once + * serialized from a valid bitmap (i.e., it follows the format specification). + * If you provided an incorrect input (garbage), then the bitmap read may not be in + * a valid state and following operations may not lead to sensible results. + * In particular, the serialized array containers need to be in sorted order, and the + * run containers should be in sorted non-overlapping order. This is is guaranteed to + * happen when serializing an existing bitmap, but not for random inputs. + * + * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), + * the data format is going to be big-endian and not compatible with little-endian systems. + */ +roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, + size_t maxbytes); + +/** + * Read bitmap from a serialized buffer. + * In case of failure, NULL is returned. + * + * Bitmap returned by this function can be used in all readonly contexts. + * Bitmap must be freed as usual, by calling roaring_bitmap_free(). + * Underlying buffer must not be freed or modified while it backs any bitmaps. + * + * The function is unsafe in the following ways: + * 1) It may execute unaligned memory accesses. + * 2) A buffer overflow may occur if buf does not point to a valid serialized + * bitmap. + * + * This is meant to be compatible with the Java and Go versions: * https://github.com/RoaringBitmap/RoaringFormatSpec - * In case of failure, a null pointer is returned. + * + * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), + * the data format is going to be big-endian and not compatible with little-endian systems. */ -roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes); +roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf); /** * Check how many bytes would be read (up to maxbytes) at this pointer if there * is a bitmap, returns zero if there is no valid bitmap. - * This is meant to be compatible with - * the Java and Go versions. See format specification at + * + * This is meant to be compatible with the Java and Go versions: * https://github.com/RoaringBitmap/RoaringFormatSpec */ -size_t roaring_bitmap_portable_deserialize_size(const char *buf, size_t maxbytes); - +size_t roaring_bitmap_portable_deserialize_size(const char *buf, + size_t maxbytes); /** - * How many bytes are required to serialize this bitmap (meant to be compatible - * with Java and Go versions). See format specification at + * How many bytes are required to serialize this bitmap. + * + * This is meant to be compatible with the Java and Go versions: * https://github.com/RoaringBitmap/RoaringFormatSpec */ -size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *ra); +size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r); /** - * write a bitmap to a char buffer. The output buffer should refer to at least - * roaring_bitmap_portable_size_in_bytes(ra) bytes of allocated memory. - * This is meant to be compatible with - * the - * Java and Go versions. Returns how many bytes were written which should be - * roaring_bitmap_portable_size_in_bytes(ra). See format specification at + * Write a bitmap to a char buffer. The output buffer should refer to at least + * `roaring_bitmap_portable_size_in_bytes(r)` bytes of allocated memory. + * + * Returns how many bytes were written which should match + * `roaring_bitmap_portable_size_in_bytes(r)`. + * + * This is meant to be compatible with the Java and Go versions: * https://github.com/RoaringBitmap/RoaringFormatSpec + * + * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), + * the data format is going to be big-endian and not compatible with little-endian systems. */ -size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *ra, char *buf); +size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, char *buf); /* * "Frozen" serialization format imitates memory layout of roaring_bitmap_t. @@ -6939,66 +1642,71 @@ size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *ra, char *buf); /** * Returns number of bytes required to serialize bitmap using frozen format. */ -size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *ra); +size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *r); /** * Serializes bitmap using frozen format. * Buffer size must be at least roaring_bitmap_frozen_size_in_bytes(). + * + * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), + * the data format is going to be big-endian and not compatible with little-endian systems. */ -void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *ra, char *buf); +void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *r, char *buf); /** * Creates constant bitmap that is a view of a given buffer. - * Buffer must contain data previously written by roaring_bitmap_frozen_serialize(), - * and additionally its beginning must be aligned by 32 bytes. - * Length must be equal exactly to roaring_bitmap_frozen_size_in_bytes(). - * - * On error, NULL is returned. + * Buffer data should have been written by `roaring_bitmap_frozen_serialize()` + * Its beginning must also be aligned by 32 bytes. + * Length must be equal exactly to `roaring_bitmap_frozen_size_in_bytes()`. + * In case of failure, NULL is returned. * * Bitmap returned by this function can be used in all readonly contexts. * Bitmap must be freed as usual, by calling roaring_bitmap_free(). * Underlying buffer must not be freed or modified while it backs any bitmaps. + * + * This function is endian-sensitive. If you have a big-endian system (e.g., a mainframe IBM s390x), + * the data format is going to be big-endian and not compatible with little-endian systems. */ -const roaring_bitmap_t *roaring_bitmap_frozen_view(const char *buf, size_t length); - +const roaring_bitmap_t *roaring_bitmap_frozen_view(const char *buf, + size_t length); /** * Iterate over the bitmap elements. The function iterator is called once for - * all the values with ptr (can be NULL) as the second parameter of each call. + * all the values with ptr (can be NULL) as the second parameter of each call. + * + * `roaring_iterator` is simply a pointer to a function that returns bool + * (true means that the iteration should continue while false means that it + * should stop), and takes (uint32_t,void*) as inputs. * - * roaring_iterator is simply a pointer to a function that returns bool - * (true means that the iteration should continue while false means that it - * should stop), - * and takes (uint32_t,void*) as inputs. + * Returns true if the roaring_iterator returned true throughout (so that all + * data points were necessarily visited). * - * Returns true if the roaring_iterator returned true throughout (so that - * all data points were necessarily visited). + * Iteration is ordered: from the smallest to the largest elements. */ -bool roaring_iterate(const roaring_bitmap_t *ra, roaring_iterator iterator, +bool roaring_iterate(const roaring_bitmap_t *r, roaring_iterator iterator, void *ptr); -bool roaring_iterate64(const roaring_bitmap_t *ra, roaring_iterator64 iterator, +bool roaring_iterate64(const roaring_bitmap_t *r, roaring_iterator64 iterator, uint64_t high_bits, void *ptr); /** * Return true if the two bitmaps contain the same elements. */ -bool roaring_bitmap_equals(const roaring_bitmap_t *ra1, - const roaring_bitmap_t *ra2); +bool roaring_bitmap_equals(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); /** - * Return true if all the elements of ra1 are also in ra2. + * Return true if all the elements of r1 are also in r2. */ -bool roaring_bitmap_is_subset(const roaring_bitmap_t *ra1, - const roaring_bitmap_t *ra2); +bool roaring_bitmap_is_subset(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); /** - * Return true if all the elements of ra1 are also in ra2 and ra2 is strictly - * greater - * than ra1. + * Return true if all the elements of r1 are also in r2, and r2 is strictly + * greater than r1. */ -bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *ra1, - const roaring_bitmap_t *ra2); +bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); /** * (For expert users who seek high performance.) @@ -7007,65 +1715,66 @@ bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *ra1, * responsible for memory management. * * The lazy version defers some computations such as the maintenance of the - * cardinality counts. Thus you need - * to call roaring_bitmap_repair_after_lazy after executing "lazy" computations. + * cardinality counts. Thus you must call `roaring_bitmap_repair_after_lazy()` + * after executing "lazy" computations. + * * It is safe to repeatedly call roaring_bitmap_lazy_or_inplace on the result. - * The bitsetconversion conversion is a flag which determines - * whether container-container operations force a bitset conversion. - **/ -roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2, + * + * `bitsetconversion` is a flag which determines whether container-container + * operations force a bitset conversion. + */ +roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2, const bool bitsetconversion); /** * (For expert users who seek high performance.) - * Inplace version of roaring_bitmap_lazy_or, modifies x1 - * The bitsetconversion conversion is a flag which determines - * whether container-container operations force a bitset conversion. + * + * Inplace version of roaring_bitmap_lazy_or, modifies r1. + * + * `bitsetconversion` is a flag which determines whether container-container + * operations force a bitset conversion. */ -void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2, +void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *r1, + const roaring_bitmap_t *r2, const bool bitsetconversion); /** * (For expert users who seek high performance.) * - * Execute maintenance operations on a bitmap created from - * roaring_bitmap_lazy_or - * or modified with roaring_bitmap_lazy_or_inplace. + * Execute maintenance on a bitmap created from `roaring_bitmap_lazy_or()` + * or modified with `roaring_bitmap_lazy_or_inplace()`. */ -void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *x1); +void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *r1); /** * Computes the symmetric difference between two bitmaps and returns new bitmap. - *The caller is - * responsible for memory management. + * The caller is responsible for memory management. * * The lazy version defers some computations such as the maintenance of the - * cardinality counts. Thus you need - * to call roaring_bitmap_repair_after_lazy after executing "lazy" computations. - * It is safe to repeatedly call roaring_bitmap_lazy_xor_inplace on the result. + * cardinality counts. Thus you must call `roaring_bitmap_repair_after_lazy()` + * after executing "lazy" computations. * + * It is safe to repeatedly call `roaring_bitmap_lazy_xor_inplace()` on + * the result. */ -roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); +roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); /** * (For expert users who seek high performance.) - * Inplace version of roaring_bitmap_lazy_xor, modifies x1. x1 != x2 * + * Inplace version of roaring_bitmap_lazy_xor, modifies r1. r1 != r2 */ -void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *x1, - const roaring_bitmap_t *x2); +void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *r1, + const roaring_bitmap_t *r2); /** - * compute the negation of the roaring bitmap within a specified - * interval: [range_start, range_end). The number of negated values is - * range_end - range_start. + * Compute the negation of the bitmap in the interval [range_start, range_end). + * The number of negated values is range_end - range_start. * Areas outside the range are passed through unchanged. */ - -roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *x1, +roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *r1, uint64_t range_start, uint64_t range_end); /** @@ -7074,49 +1783,74 @@ roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *x1, * range_end - range_start. * Areas outside the range are passed through unchanged. */ - -void roaring_bitmap_flip_inplace(roaring_bitmap_t *x1, uint64_t range_start, +void roaring_bitmap_flip_inplace(roaring_bitmap_t *r1, uint64_t range_start, uint64_t range_end); /** + * Selects the element at index 'rank' where the smallest element is at index 0. * If the size of the roaring bitmap is strictly greater than rank, then this - function returns true and set element to the element of given rank. - Otherwise, it returns false. + * function returns true and sets element to the element of given rank. + * Otherwise, it returns false. */ -bool roaring_bitmap_select(const roaring_bitmap_t *ra, uint32_t rank, +bool roaring_bitmap_select(const roaring_bitmap_t *r, uint32_t rank, uint32_t *element); + /** -* roaring_bitmap_rank returns the number of integers that are smaller or equal -* to x. -*/ -uint64_t roaring_bitmap_rank(const roaring_bitmap_t *bm, uint32_t x); + * roaring_bitmap_rank returns the number of integers that are smaller or equal + * to x. Thus if x is the first element, this function will return 1. If + * x is smaller than the smallest element, this function will return 0. + * + * The indexing convention differs between roaring_bitmap_select and + * roaring_bitmap_rank: roaring_bitmap_select refers to the smallest value + * as having index 0, whereas roaring_bitmap_rank returns 1 when ranking + * the smallest value. + */ +uint64_t roaring_bitmap_rank(const roaring_bitmap_t *r, uint32_t x); /** -* roaring_bitmap_smallest returns the smallest value in the set. -* Returns UINT32_MAX if the set is empty. -*/ -uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *bm); + * Returns the index of x in the given roaring bitmap. + * If the roaring bitmap doesn't contain x , this function will return -1. + * The difference with rank function is that this function will return -1 when x + * is not the element of roaring bitmap, but the rank function will return a + * non-negative number. + */ +int64_t roaring_bitmap_get_index(const roaring_bitmap_t *r, uint32_t x); /** -* roaring_bitmap_smallest returns the greatest value in the set. -* Returns 0 if the set is empty. -*/ -uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *bm); + * Returns the smallest value in the set, or UINT32_MAX if the set is empty. + */ +uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *r); /** -* (For advanced users.) -* Collect statistics about the bitmap, see roaring_types.h for -* a description of roaring_statistics_t -*/ -void roaring_bitmap_statistics(const roaring_bitmap_t *ra, + * Returns the greatest value in the set, or 0 if the set is empty. + */ +uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *r); + +/** + * (For advanced users.) + * + * Collect statistics about the bitmap, see roaring_types.h for + * a description of roaring_statistics_t + */ +void roaring_bitmap_statistics(const roaring_bitmap_t *r, roaring_statistics_t *stat); +/** + * Perform internal consistency checks. Returns true if the bitmap is consistent. + * + * Note that some operations intentionally leave bitmaps in an inconsistent state temporarily, + * for example, `roaring_bitmap_lazy_*` functions, until `roaring_bitmap_repair_after_lazy` is called. + * + * If reason is non-null, it will be set to a string describing the first inconsistency found if any. + */ +bool roaring_bitmap_internal_validate(const roaring_bitmap_t *r, const char **reason); + /********************* * What follows is code use to iterate through values in a roaring bitmap -roaring_bitmap_t *ra =... -roaring_uint32_iterator_t i; -roaring_create_iterator(ra, &i); +roaring_bitmap_t *r =... +roaring_uint32_iterator_t i; +roaring_create_iterator(r, &i); while(i.has_value) { printf("value = %d\n", i.current_value); roaring_advance_uint32_iterator(&i); @@ -7130,79 +1864,81 @@ typedef struct roaring_uint32_iterator_s { const roaring_bitmap_t *parent; // owner int32_t container_index; // point to the current container index int32_t in_container_index; // for bitset and array container, this is out - // index + // index int32_t run_index; // for run container, this points at the run uint32_t current_value; bool has_value; - const void - *container; // should be: - // parent->high_low_container.containers[container_index]; + const ROARING_CONTAINER_T + *container; // should be: + // parent->high_low_container.containers[container_index]; uint8_t typecode; // should be: - // parent->high_low_container.typecodes[container_index]; + // parent->high_low_container.typecodes[container_index]; uint32_t highbits; // should be: - // parent->high_low_container.keys[container_index]) << - // 16; + // parent->high_low_container.keys[container_index]) << + // 16; } roaring_uint32_iterator_t; /** -* Initialize an iterator object that can be used to iterate through the -* values. If there is a value, then this iterator points to the first value -* and it->has_value is true. The value is in it->current_value. -*/ -void roaring_init_iterator(const roaring_bitmap_t *ra, + * Initialize an iterator object that can be used to iterate through the + * values. If there is a value, then this iterator points to the first value + * and `it->has_value` is true. The value is in `it->current_value`. + */ +void roaring_init_iterator(const roaring_bitmap_t *r, roaring_uint32_iterator_t *newit); /** -* Initialize an iterator object that can be used to iterate through the -* values. If there is a value, then this iterator points to the last value -* and it->has_value is true. The value is in it->current_value. -*/ -void roaring_init_iterator_last(const roaring_bitmap_t *ra, + * Initialize an iterator object that can be used to iterate through the + * values. If there is a value, then this iterator points to the last value + * and `it->has_value` is true. The value is in `it->current_value`. + */ +void roaring_init_iterator_last(const roaring_bitmap_t *r, roaring_uint32_iterator_t *newit); /** -* Create an iterator object that can be used to iterate through the -* values. Caller is responsible for calling roaring_free_iterator. -* The iterator is initialized. If there is a value, then this iterator -* points to the first value and it->has_value is true. -* The value is in it->current_value. -* -* This function calls roaring_init_iterator. -*/ -roaring_uint32_iterator_t *roaring_create_iterator(const roaring_bitmap_t *ra); + * Create an iterator object that can be used to iterate through the values. + * Caller is responsible for calling `roaring_free_iterator()`. + * + * The iterator is initialized (this function calls `roaring_init_iterator()`) + * If there is a value, then this iterator points to the first value and + * `it->has_value` is true. The value is in `it->current_value`. + */ +roaring_uint32_iterator_t *roaring_create_iterator(const roaring_bitmap_t *r); /** -* Advance the iterator. If there is a new value, then it->has_value is true. -* The new value is in it->current_value. Values are traversed in increasing -* orders. For convenience, returns it->has_value. +* Advance the iterator. If there is a new value, then `it->has_value` is true. +* The new value is in `it->current_value`. Values are traversed in increasing +* orders. For convenience, returns `it->has_value`. */ bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it); /** -* Decrement the iterator. If there is a new value, then it->has_value is true. -* The new value is in it->current_value. Values are traversed in decreasing -* orders. For convenience, returns it->has_value. +* Decrement the iterator. If there's a new value, then `it->has_value` is true. +* The new value is in `it->current_value`. Values are traversed in decreasing +* order. For convenience, returns `it->has_value`. */ bool roaring_previous_uint32_iterator(roaring_uint32_iterator_t *it); /** -* Move the iterator to the first value >= val. If there is a such a value, then it->has_value is true. -* The new value is in it->current_value. For convenience, returns it->has_value. -*/ -bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it, uint32_t val) ; + * Move the iterator to the first value >= `val`. If there is a such a value, + * then `it->has_value` is true. The new value is in `it->current_value`. + * For convenience, returns `it->has_value`. + */ +bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it, + uint32_t val); + /** -* Creates a copy of an iterator. -* Caller must free it. -*/ + * Creates a copy of an iterator. + * Caller must free it. + */ roaring_uint32_iterator_t *roaring_copy_uint32_iterator( - const roaring_uint32_iterator_t *it); + const roaring_uint32_iterator_t *it); /** -* Free memory following roaring_create_iterator -*/ + * Free memory following `roaring_create_iterator()` + */ void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it); /* @@ -7215,11 +1951,70 @@ void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it); * - first value is copied from ${it}->current_value * - after function returns, iterator is positioned at the next element */ -uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it, uint32_t* buf, uint32_t count); +uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it, + uint32_t* buf, uint32_t count); #ifdef __cplusplus -} +} } } // extern "C" { namespace roaring { namespace api { #endif +#endif /* ROARING_H */ + +#ifdef __cplusplus +/** + * Best practices for C++ headers is to avoid polluting global scope. + * But for C compatibility when just `roaring.h` is included building as + * C++, default to global access for the C public API. + * + * BUT when `roaring.hh` is included instead, it sets this flag. That way + * explicit namespacing must be used to get the C functions. + * + * This is outside the include guard so that if you include BOTH headers, + * the order won't matter; you still get the global definitions. + */ + #if !defined(ROARING_API_NOT_IN_GLOBAL_NAMESPACE) + using namespace ::roaring::api; + #endif #endif /* end file include/roaring/roaring.h */ +/* begin file include/roaring/memory.h */ +#ifndef INCLUDE_ROARING_MEMORY_H_ +#define INCLUDE_ROARING_MEMORY_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include // for size_t + +typedef void* (*roaring_malloc_p)(size_t); +typedef void* (*roaring_realloc_p)(void*, size_t); +typedef void* (*roaring_calloc_p)(size_t, size_t); +typedef void (*roaring_free_p)(void*); +typedef void* (*roaring_aligned_malloc_p)(size_t, size_t); +typedef void (*roaring_aligned_free_p)(void*); + +typedef struct roaring_memory_s { + roaring_malloc_p malloc; + roaring_realloc_p realloc; + roaring_calloc_p calloc; + roaring_free_p free; + roaring_aligned_malloc_p aligned_malloc; + roaring_aligned_free_p aligned_free; +} roaring_memory_t; + +void roaring_init_memory_hook(roaring_memory_t memory_hook); + +void* roaring_malloc(size_t); +void* roaring_realloc(void*, size_t); +void* roaring_calloc(size_t, size_t); +void roaring_free(void*); +void* roaring_aligned_malloc(size_t, size_t); +void roaring_aligned_free(void*); + +#ifdef __cplusplus +} +#endif + +#endif // INCLUDE_ROARING_MEMORY_H_ +/* end file include/roaring/memory.h */ diff --git a/roaring_buffer_reader.c b/roaring_buffer_reader.c index 16b6a08..4456f22 100644 --- a/roaring_buffer_reader.c +++ b/roaring_buffer_reader.c @@ -149,7 +149,7 @@ static void *rb_get_container_at_index(const roaring_buffer_t *rb, uint16_t i, bitset_container_read(thiscard, c, buf); answer = c; - *typecode = BITSET_CONTAINER_TYPE_CODE; + *typecode = BITSET_CONTAINER_TYPE; } else if (isrun) { // we check that the read is allowed readbytes += sizeof(uint16_t); @@ -174,7 +174,7 @@ static void *rb_get_container_at_index(const roaring_buffer_t *rb, uint16_t i, } run_container_read(thiscard, c, buf); answer = c; - *typecode = RUN_CONTAINER_TYPE_CODE; + *typecode = RUN_CONTAINER_TYPE; } else { // we check that the read is allowed size_t containersize = thiscard * sizeof(uint16_t); @@ -192,7 +192,7 @@ static void *rb_get_container_at_index(const roaring_buffer_t *rb, uint16_t i, } array_container_read(thiscard, c, buf); answer = c; - *typecode = ARRAY_CONTAINER_TYPE_CODE; + *typecode = ARRAY_CONTAINER_TYPE; } return answer; diff --git a/roaringbitmap.c b/roaringbitmap.c index 7022ed7..9729b76 100644 --- a/roaringbitmap.c +++ b/roaringbitmap.c @@ -23,7 +23,22 @@ static const struct config_enum_entry output_format_options[] = static int rbitmap_output_format; /* output format */ +void * pg_aligned_malloc(size_t alignment, size_t size); +void pg_aligned_free(void *memblock); +void* pg_realloc(void* p, size_t new_sz); +void* pg_calloc(size_t n_elements, size_t element_size); +void pg_free(void* p); void _PG_init(void); + +static roaring_memory_t pg_global_memory_hook = { + .malloc = palloc, + .realloc = pg_realloc, + .calloc = pg_calloc, + .free = pg_free, + .aligned_malloc = pg_aligned_malloc, + .aligned_free = pg_aligned_free, +}; + /* * Module load callback */ @@ -42,8 +57,45 @@ _PG_init(void) NULL, NULL, NULL); + roaring_init_memory_hook(pg_global_memory_hook); +} + +void * +pg_aligned_malloc(size_t alignment, size_t size) { + void *p; + void *porg; + assert(alignment <= 256); + porg = palloc(size + alignment); + p = (void *)((((uint64)porg + alignment) / alignment) * alignment); + *((unsigned char *)p-1) = (unsigned char)((uint64)p - (uint64)porg); + return p; +} + +void +pg_aligned_free(void *memblock) { + void *porg; + if (memblock == NULL) + return; + porg = (void *)((uint64)memblock - *((unsigned char *)memblock-1)); + if (porg == memblock) + porg = (void *)((uint64)porg - 256); + pfree(porg); +} + +void* +pg_realloc(void* p, size_t new_sz) { + return p==NULL ? palloc(new_sz) : repalloc(p,new_sz); } +void* +pg_calloc(size_t n_elements, size_t element_size) { + return palloc0(n_elements*element_size); +} + +void +pg_free(void* p) { + return p==NULL ? free(p) : pfree(p); +} bool ArrayContainsNulls(ArrayType *array) { diff --git a/roaringbitmap.h b/roaringbitmap.h index 6cc3ab5..d6c78bb 100644 --- a/roaringbitmap.h +++ b/roaringbitmap.h @@ -46,56 +46,6 @@ bool ArrayContainsNulls(ArrayType *array); #define ARRISEMPTY(x) (ARRNELEMS(x) == 0) -/* Malloc a buffer of size + alignment bytes and returns the aligned part. -The offset between the real pointer and returned value was stored in p[-1]. -*/ -static inline void *pg_aligned_malloc(size_t alignment, size_t size) { - void *p; - void *porg; - assert(alignment <= 256); - porg = palloc(size + alignment); - p = (void *)((((uint64)porg + alignment) / alignment) * alignment); - *((unsigned char *)p-1) = (unsigned char)((uint64)p - (uint64)porg); - return p; -} - -static inline void pg_aligned_free(void *memblock) { - void *porg; - if (memblock == NULL) - return; - porg = (void *)((uint64)memblock - *((unsigned char *)memblock-1)); - if (porg == memblock) - porg = (void *)((uint64)porg - 256); - pfree(porg); -} - -/* - * Redefine standard memory allocation interface to pgsql's one. -*/ -#ifdef malloc -#undef malloc -#endif -#define malloc(a) palloc(a) - -#ifdef calloc -#undef calloc -#endif -#define calloc(a, b) palloc0((a) * (b)) - -#ifdef realloc -#undef realloc -#endif -#define realloc(a, b) ((a)==NULL ? palloc(b) : repalloc((a),(b))) - -#ifdef free -#undef free -#endif -#define free(a) ((a)==NULL ? free(a) : pfree(a)) - -#define roaring_bitmap_aligned_malloc(a,b) pg_aligned_malloc((a),(b)) -#define roaring_bitmap_aligned_free(a) pg_aligned_free(a) - -/* must include "roaring.c" after redefine malloc functions */ #include "roaring.c" #include "roaring_buffer_reader.c"