From e713354be50d00f51d3055f809305cafa9109897 Mon Sep 17 00:00:00 2001 From: Soerian Lieve Date: Sat, 30 Dec 2023 12:47:04 +0100 Subject: [PATCH 01/11] Add an Adaptive Radix Trie (ART) This will be used to store the high 48 bits of 64 bit roaring bitmap entries. For more details, see include/roaring/art/art.h --- include/roaring/art/art.h | 180 ++++ src/CMakeLists.txt | 1 + src/art/art.c | 1622 +++++++++++++++++++++++++++++++++++++ tests/CMakeLists.txt | 1 + tests/art_unit.cpp | 502 ++++++++++++ 5 files changed, 2306 insertions(+) create mode 100644 include/roaring/art/art.h create mode 100644 src/art/art.c create mode 100644 tests/art_unit.cpp diff --git a/include/roaring/art/art.h b/include/roaring/art/art.h new file mode 100644 index 000000000..94130c61f --- /dev/null +++ b/include/roaring/art/art.h @@ -0,0 +1,180 @@ +#ifndef ART_ART_H +#define ART_ART_H + +#include +#include +#include + +/* + * This file contains an implementation of an Adaptive Radix Tree as described + * in https://db.in.tum.de/~leis/papers/ART.pdf. + * + * The ART contains the keys in _byte lexographical_ order. + * + * Other features: + * * Fixed 48 bit key length: all keys are assumed to be be 48 bits in size. + * This allows us to put the key and key prefixes directly in nodes, reducing + * indirection at no additional memory overhead. + * * Key compression: the only inner nodes created are at points where key + * chunks _differ_. This means that if there are two entries with different + * high 48 bits, then there is only one inner node containing the common key + * prefix, and two leaves. + * * Intrusive leaves: the leaf struct is included in user values. This removes + * a layer of indirection. + */ + +// Fixed length of keys in the ART. All keys are assumed to be of this length. +#define ART_KEY_BYTES 6 + +#ifdef __cplusplus +extern "C" { +namespace roaring { +namespace internal { +#endif + +typedef uint8_t art_key_chunk_t; +typedef struct art_node_s art_node_t; + +/** + * Wrapper to allow an empty tree. + */ +typedef struct art_s { + art_node_t *root; +} art_t; + +/** + * Values inserted into the tree have to be cast-able to art_val_t. This + * improves performance by reducing indirection. + * + * NOTE: Value pointers must be unique! This is because each value struct + * contains the key corresponding to the value. + */ +typedef struct art_val_s { + art_key_chunk_t _pad[ART_KEY_BYTES]; +} art_val_t; + +/** + * Compares two keys, returns their relative order: + * * Key 1 < key 2: returns a negative value + * * Key 1 == key 2: returns 0 + * * Key 1 > key 2: returns a positive value + */ +int art_compare_keys(const art_key_chunk_t key1[], + const art_key_chunk_t key2[]); + +/** + * Inserts the given key and value. + */ +void art_insert(art_t *art, const art_key_chunk_t *key, art_val_t *val); + +/** + * Returns the value erased, NULL if not found. + */ +art_val_t *art_erase(art_t *art, const art_key_chunk_t *key); + +/** + * Returns the value associated with the given key, NULL if not found. + */ +art_val_t *art_find(const art_t *art, const art_key_chunk_t *key); + +/** + * Returns true if the ART is empty. + */ +bool art_is_empty(const art_t *art); + +/** + * Frees the nodes of the ART except the values, which the user is expected to + * free. + */ +void art_free(art_t *art); + +/** + * Returns the size in bytes of the ART. Includes size of pointers to values, + * but not the values themselves. + */ +size_t art_size_in_bytes(const art_t *art); + +/** + * Prints the ART using printf, useful for debugging. + */ +void art_printf(const art_t *art); + +/** + * ART-internal iterator bookkeeping. Users should treat this as an opaque type. + */ +typedef struct art_iterator_frame_s { + art_node_t *node; + uint8_t index_in_node; +} art_iterator_frame_t; + +/** + * Users should only access `key` and `value` in iterators. The iterator is + * valid when `value != NULL`. + */ +typedef struct art_iterator_s { + art_key_chunk_t key[ART_KEY_BYTES]; + art_val_t *value; + + uint8_t depth; // Key depth + uint8_t frame; // Node depth + art_iterator_frame_t frames[ART_KEY_BYTES]; +} art_iterator_t; + +/** + * Creates an iterator. Must be initialized through `art_init_iterator` first. + */ +art_iterator_t art_create_iterator(); + +/** + * Moves the iterator to the first or last entry in the ART, depending on + * `first`. + */ +void art_init_iterator(const art_t *art, art_iterator_t *iterator, bool first); + +/** + * Returns an initialized iterator positioned at a key equal to or greater than + * the given key, if it exists. + */ +art_iterator_t art_lower_bound(const art_t *art, const art_key_chunk_t *key); + +/** + * Returns an initialized iterator positioned at a key greater than the given + * key, if it exists. + */ +art_iterator_t art_upper_bound(const art_t *art, const art_key_chunk_t *key); + +/** + * The following iterator movement functions return true if a new entry was + * encountered. + */ +bool art_iterator_move(art_iterator_t *iterator, bool forward); +bool art_iterator_next(art_iterator_t *iterator); +bool art_iterator_prev(art_iterator_t *iterator); + +/** + * Moves the iterator forward to a key equal to or greater than the given key. + * Assumes the given key is greater or equal to the current position of the + * iterator. + */ +bool art_iterator_lower_bound(art_iterator_t *iterator, + const art_key_chunk_t *key); + +/** + * Insert the value and positions the iterator at the key. + */ +void art_iterator_insert(art_t *art, art_iterator_t *iterator, + const art_key_chunk_t *key, art_val_t *val); + +/** + * Erase the value pointed at by the iterator. Moves the iterator to the next + * leaf. Returns the value erased or NULL if nothing was erased. + */ +art_val_t *art_iterator_erase(art_t *art, art_iterator_t *iterator); + +#ifdef __cplusplus +} // extern "C" +} // namespace roaring +} // namespace internal +#endif + +#endif diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 16007664d..1f091eddf 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -18,6 +18,7 @@ set(ROARING_SRC isadetection.c array_util.c bitset_util.c + art/art.c bitset.c containers/array.c containers/bitset.c diff --git a/src/art/art.c b/src/art/art.c new file mode 100644 index 000000000..5379823a8 --- /dev/null +++ b/src/art/art.c @@ -0,0 +1,1622 @@ +#include +#include +#include +#include +#include + +#define ART_NODE4_TYPE 0 +#define ART_NODE16_TYPE 1 +#define ART_NODE48_TYPE 2 +#define ART_NODE256_TYPE 3 +#define ART_NUM_TYPES 4 + +// Node48 placeholder value to indicate no child is present at this key index. +#define ART_NODE48_EMPTY_VAL 48 + +// We use the least significant bit of node pointers to indicate whether a node +// is a leaf or an inner node. This is never surfaced to the user. +// +// Using pointer tagging to indicate leaves not only saves a bit of memory by +// sparing the typecode, but also allows us to use an intrusive leaf struct. +// Using an intrusive leaf struct leaves leaf allocation up to the user. Upon +// deallocation of the ART, we know not to free the leaves without having to +// dereference the leaf pointers. +// +// All internal operations on leaves should use CAST_LEAF before using the leaf. +// The only places that use SET_LEAF are locations where a field is directly +// assigned to a leaf pointer. After using SET_LEAF, the leaf should be treated +// as a node of unknown type. +#define IS_LEAF(p) (((uintptr_t)p & 1)) +#define SET_LEAF(p) ((art_node_t *)((uintptr_t)p | 1)) +#define CAST_LEAF(p) ((art_leaf_t *)((void *)((uintptr_t)p & ~1))) + +#ifdef __cplusplus +extern "C" { +namespace roaring { +namespace internal { +#endif + +typedef uint8_t art_typecode_t; + +// Base node struct. +typedef struct art_node_s { +} art_node_t; + +// Should be kept in sync with art_val_t. +typedef struct art_leaf_s { + art_key_chunk_t key[ART_KEY_BYTES]; +} art_leaf_t; + +// Inner node, with prefix. +// +// We use a fixed-length array as a pointer would be larger than the array. +typedef struct art_inner_node_s { + art_node_t base; + art_typecode_t typecode; + uint8_t prefix_size; + uint8_t prefix[ART_KEY_BYTES - 1]; +} art_inner_node_t; + +// Inner node types. + +// Node4: key[i] corresponds with children[i]. Keys are sorted. +typedef struct art_node4_s { + art_inner_node_t base; + uint8_t count; + uint8_t keys[4]; + art_node_t *children[4]; +} art_node4_t; + +// Node16: key[i] corresponds with children[i]. Keys are sorted. +typedef struct art_node16_s { + art_inner_node_t base; + uint8_t count; + uint8_t keys[16]; + art_node_t *children[16]; +} art_node16_t; + +// Node48: key[i] corresponds with children[key[i]] if key[i] != +// ART_NODE48_EMPTY_VAL. Keys are naturally sorted due to direct indexing. +typedef struct art_node48_s { + art_inner_node_t base; + uint8_t count; + uint8_t keys[256]; + art_node_t *children[48]; +} art_node48_t; + +// Node256: children[i] is directly indexed by key chunk. A child is present if +// children[i] != NULL. +typedef struct art_node256_s { + art_inner_node_t base; + uint16_t count; + art_node_t *children[256]; +} art_node256_t; + +// Helper struct to refer to a child within a node at a specific index. +typedef struct art_indexed_child_s { + art_node_t *child; + art_key_chunk_t key_chunk; + uint8_t index; +} art_indexed_child_t; + +static inline bool art_is_leaf(const art_node_t *node) { return IS_LEAF(node); } + +static void art_leaf_populate(art_leaf_t *leaf, const art_key_chunk_t key[]) { + memcpy(&leaf->key, key, ART_KEY_BYTES); +} + +static inline uint8_t art_get_type(const art_inner_node_t *node) { + return node->typecode; +} + +static inline void art_init_inner_node(art_inner_node_t *node, + art_typecode_t typecode, + const art_key_chunk_t prefix[], + uint8_t prefix_size) { + node->typecode = typecode; + node->prefix_size = prefix_size; + memcpy(node->prefix, prefix, prefix_size * sizeof(art_key_chunk_t)); +} + +static void art_free_node(art_node_t *node); + +// ===================== Start of node-specific functions ====================== + +static art_node4_t *art_node4_create(const art_key_chunk_t prefix[], + uint8_t prefix_size); +static art_node16_t *art_node16_create(const art_key_chunk_t prefix[], + uint8_t prefix_size); +static art_node48_t *art_node48_create(const art_key_chunk_t prefix[], + uint8_t prefix_size); +static art_node256_t *art_node256_create(const art_key_chunk_t prefix[], + uint8_t prefix_size); + +static art_node_t *art_node4_insert(art_node4_t *node, art_node_t *child, + uint8_t key); +static art_node_t *art_node16_insert(art_node16_t *node, art_node_t *child, + uint8_t key); +static art_node_t *art_node48_insert(art_node48_t *node, art_node_t *child, + uint8_t key); +static art_node_t *art_node256_insert(art_node256_t *node, art_node_t *child, + uint8_t key); + +static art_node4_t *art_node4_create(const art_key_chunk_t prefix[], + uint8_t prefix_size) { + art_node4_t *node = roaring_malloc(sizeof(art_node4_t)); + art_init_inner_node((art_inner_node_t *)node, ART_NODE4_TYPE, prefix, + prefix_size); + node->count = 0; + return node; +} + +static void art_free_node4(art_node4_t *node) { + for (size_t i = 0; i < node->count; ++i) { + art_free_node(node->children[i]); + } + roaring_free(node); +} + +static inline art_node_t *art_node4_find_child(const art_node4_t *node, + art_key_chunk_t key) { + for (size_t i = 0; i < node->count; ++i) { + if (node->keys[i] == key) { + return node->children[i]; + } + } + return NULL; +} + +static art_node_t *art_node4_insert(art_node4_t *node, art_node_t *child, + uint8_t key) { + if (node->count < 4) { + size_t idx = 0; + for (; idx < node->count; ++idx) { + if (node->keys[idx] > key) { + break; + } + } + size_t after = node->count - idx; + // Shift other keys to maintain sorted order. + memmove(node->keys + idx + 1, node->keys + idx, + after * sizeof(art_key_chunk_t)); + memmove(node->children + idx + 1, node->children + idx, + after * sizeof(art_node_t *)); + + node->children[idx] = child; + node->keys[idx] = key; + node->count++; + return (art_node_t *)node; + } + art_node16_t *new_node = + art_node16_create(node->base.prefix, node->base.prefix_size); + // Instead of calling insert, this could be specialized to 2x memcpy and + // setting the count. + for (size_t i = 0; i < 4; ++i) { + art_node16_insert(new_node, node->children[i], node->keys[i]); + } + roaring_free(node); + return art_node16_insert(new_node, child, key); +} + +static inline art_node_t *art_node4_erase(art_node4_t *node, + art_key_chunk_t key_chunk) { + int idx = -1; + for (size_t i = 0; i < node->count; ++i) { + if (node->keys[i] == key_chunk) { + idx = i; + } + } + if (idx == -1) { + return (art_node_t *)node; + } + if (node->count == 2) { + // Only one child remains after erasing, so compress the path by + // removing this node. + art_node_t *remaining_child = node->children[idx ^ 1]; + art_key_chunk_t remaining_child_key = node->keys[idx ^ 1]; + if (!art_is_leaf(remaining_child)) { + // Correct the prefix of the child node. + art_inner_node_t *inner_node = (art_inner_node_t *)remaining_child; + memmove(inner_node->prefix + node->base.prefix_size + 1, + inner_node->prefix, inner_node->prefix_size); + memcpy(inner_node->prefix, node->base.prefix, + node->base.prefix_size); + *(inner_node->prefix + node->base.prefix_size) = + remaining_child_key; + inner_node->prefix_size += node->base.prefix_size + 1; + } + roaring_free(node); + return remaining_child; + } + // Shift other keys to maintain sorted order. + size_t after_next = node->count - idx - 1; + memmove(node->keys + idx, node->keys + idx + 1, + after_next * sizeof(art_key_chunk_t)); + memmove(node->children + idx, node->children + idx + 1, + after_next * sizeof(art_node_t *)); + node->count--; + return (art_node_t *)node; +} + +static inline void art_node4_replace(art_node4_t *node, + art_key_chunk_t key_chunk, + art_node_t *new_child) { + for (size_t i = 0; i < node->count; ++i) { + if (node->keys[i] == key_chunk) { + node->children[i] = new_child; + return; + } + } +} + +static inline art_indexed_child_t art_node4_next_child(const art_node4_t *node, + int index) { + art_indexed_child_t indexed_child; + index++; + if (index >= node->count) { + indexed_child.child = NULL; + return indexed_child; + } + indexed_child.index = index; + indexed_child.key_chunk = node->keys[index]; + indexed_child.child = node->children[index]; + return indexed_child; +} + +static inline art_indexed_child_t art_node4_prev_child(const art_node4_t *node, + int index) { + if (index > node->count) { + index = node->count; + } + index--; + art_indexed_child_t indexed_child; + if (index < 0) { + indexed_child.child = NULL; + return indexed_child; + } + indexed_child.index = index; + indexed_child.key_chunk = node->keys[index]; + indexed_child.child = node->children[index]; + return indexed_child; +} + +static inline art_indexed_child_t art_node4_child_at(const art_node4_t *node, + int index) { + art_indexed_child_t indexed_child; + if (index < 0 || index >= node->count) { + indexed_child.child = NULL; + return indexed_child; + } + indexed_child.index = index; + indexed_child.key_chunk = node->keys[index]; + indexed_child.child = node->children[index]; + return indexed_child; +} + +static inline art_indexed_child_t art_node4_lower_bound( + art_node4_t *node, art_key_chunk_t key_chunk) { + art_indexed_child_t indexed_child; + for (size_t i = 0; i < node->count; ++i) { + if (node->keys[i] >= key_chunk) { + indexed_child.index = i; + indexed_child.key_chunk = node->keys[i]; + indexed_child.child = node->children[i]; + return indexed_child; + } + } + indexed_child.child = NULL; + return indexed_child; +} + +static art_node16_t *art_node16_create(const art_key_chunk_t prefix[], + uint8_t prefix_size) { + art_node16_t *node = roaring_malloc(sizeof(art_node16_t)); + art_init_inner_node((art_inner_node_t *)node, ART_NODE16_TYPE, prefix, + prefix_size); + node->count = 0; + return node; +} + +static void art_free_node16(art_node16_t *node) { + for (size_t i = 0; i < node->count; ++i) { + art_free_node(node->children[i]); + } + roaring_free(node); +} + +static inline art_node_t *art_node16_find_child(const art_node16_t *node, + art_key_chunk_t key) { + for (size_t i = 0; i < node->count; ++i) { + if (node->keys[i] == key) { + return node->children[i]; + } + } + return NULL; +} + +static art_node_t *art_node16_insert(art_node16_t *node, art_node_t *child, + uint8_t key) { + if (node->count < 16) { + size_t idx = 0; + for (; idx < node->count; ++idx) { + if (node->keys[idx] > key) { + break; + } + } + size_t after = node->count - idx; + // Shift other keys to maintain sorted order. + memmove(node->keys + idx + 1, node->keys + idx, + after * sizeof(art_key_chunk_t)); + memmove(node->children + idx + 1, node->children + idx, + after * sizeof(art_node_t *)); + + node->children[idx] = child; + node->keys[idx] = key; + node->count++; + return (art_node_t *)node; + } + art_node48_t *new_node = + art_node48_create(node->base.prefix, node->base.prefix_size); + for (size_t i = 0; i < 16; ++i) { + art_node48_insert(new_node, node->children[i], node->keys[i]); + } + roaring_free(node); + return art_node48_insert(new_node, child, key); +} + +static inline art_node_t *art_node16_erase(art_node16_t *node, + uint8_t key_chunk) { + for (size_t i = 0; i < node->count; ++i) { + if (node->keys[i] == key_chunk) { + // Shift other keys to maintain sorted order. + size_t after_next = node->count - i - 1; + memmove(node->keys + i, node->keys + i + 1, + after_next * sizeof(key_chunk)); + memmove(node->children + i, node->children + i + 1, + after_next * sizeof(art_node_t *)); + node->count--; + break; + } + } + if (node->count > 4) { + return (art_node_t *)node; + } + art_node4_t *new_node = + art_node4_create(node->base.prefix, node->base.prefix_size); + // Instead of calling insert, this could be specialized to 2x memcpy and + // setting the count. + for (size_t i = 0; i < 4; ++i) { + art_node4_insert(new_node, node->children[i], node->keys[i]); + } + roaring_free(node); + return (art_node_t *)new_node; +} + +static inline void art_node16_replace(art_node16_t *node, + art_key_chunk_t key_chunk, + art_node_t *new_child) { + for (uint8_t i = 0; i < node->count; ++i) { + if (node->keys[i] == key_chunk) { + node->children[i] = new_child; + return; + } + } +} + +static inline art_indexed_child_t art_node16_next_child( + const art_node16_t *node, int index) { + art_indexed_child_t indexed_child; + index++; + if (index >= node->count) { + indexed_child.child = NULL; + return indexed_child; + } + indexed_child.index = index; + indexed_child.key_chunk = node->keys[index]; + indexed_child.child = node->children[index]; + return indexed_child; +} + +static inline art_indexed_child_t art_node16_prev_child( + const art_node16_t *node, int index) { + if (index > node->count) { + index = node->count; + } + index--; + art_indexed_child_t indexed_child; + if (index < 0) { + indexed_child.child = NULL; + return indexed_child; + } + indexed_child.index = index; + indexed_child.key_chunk = node->keys[index]; + indexed_child.child = node->children[index]; + return indexed_child; +} + +static inline art_indexed_child_t art_node16_child_at(const art_node16_t *node, + int index) { + art_indexed_child_t indexed_child; + if (index < 0 || index >= node->count) { + indexed_child.child = NULL; + return indexed_child; + } + indexed_child.index = index; + indexed_child.key_chunk = node->keys[index]; + indexed_child.child = node->children[index]; + return indexed_child; +} + +static inline art_indexed_child_t art_node16_lower_bound( + art_node16_t *node, art_key_chunk_t key_chunk) { + art_indexed_child_t indexed_child; + for (size_t i = 0; i < node->count; ++i) { + if (node->keys[i] >= key_chunk) { + indexed_child.index = i; + indexed_child.key_chunk = node->keys[i]; + indexed_child.child = node->children[i]; + return indexed_child; + } + } + indexed_child.child = NULL; + return indexed_child; +} + +static art_node48_t *art_node48_create(const art_key_chunk_t prefix[], + uint8_t prefix_size) { + art_node48_t *node = roaring_malloc(sizeof(art_node48_t)); + art_init_inner_node((art_inner_node_t *)node, ART_NODE48_TYPE, prefix, + prefix_size); + node->count = 0; + for (size_t i = 0; i < 256; ++i) { + node->keys[i] = ART_NODE48_EMPTY_VAL; + } + return node; +} + +static void art_free_node48(art_node48_t *node) { + for (size_t i = 0; i < 256; ++i) { + uint8_t val_idx = node->keys[i]; + if (val_idx != ART_NODE48_EMPTY_VAL) { + art_free_node(node->children[val_idx]); + } + } + roaring_free(node); +} + +static inline art_node_t *art_node48_find_child(const art_node48_t *node, + art_key_chunk_t key) { + uint8_t val_idx = node->keys[key]; + if (val_idx != ART_NODE48_EMPTY_VAL) { + return node->children[val_idx]; + } + return NULL; +} + +static art_node_t *art_node48_insert(art_node48_t *node, art_node_t *child, + uint8_t key) { + if (node->count < 48) { + uint8_t val_idx = node->count; + node->keys[key] = val_idx; + node->children[val_idx] = child; + node->count++; + return (art_node_t *)node; + } + art_node256_t *new_node = + art_node256_create(node->base.prefix, node->base.prefix_size); + for (size_t i = 0; i < 256; ++i) { + uint8_t val_idx = node->keys[i]; + if (val_idx != ART_NODE48_EMPTY_VAL) { + art_node256_insert(new_node, node->children[val_idx], i); + } + } + roaring_free(node); + return art_node256_insert(new_node, child, key); +} + +static inline art_node_t *art_node48_erase(art_node48_t *node, + uint8_t key_chunk) { + uint8_t val_idx = node->keys[key_chunk]; + if (val_idx == ART_NODE48_EMPTY_VAL) { + return (art_node_t *)node; + } + node->children[val_idx] = NULL; + node->keys[key_chunk] = ART_NODE48_EMPTY_VAL; + node->count--; + if (node->count > 16) { + return (art_node_t *)node; + } + + art_node16_t *new_node = + art_node16_create(node->base.prefix, node->base.prefix_size); + for (size_t i = 0; i < 256; ++i) { + val_idx = node->keys[i]; + if (val_idx != ART_NODE48_EMPTY_VAL) { + art_node16_insert(new_node, node->children[val_idx], i); + } + } + roaring_free(node); + return (art_node_t *)new_node; +} + +static inline void art_node48_replace(art_node48_t *node, + art_key_chunk_t key_chunk, + art_node_t *new_child) { + uint8_t val_idx = node->keys[key_chunk]; + assert(val_idx != ART_NODE48_EMPTY_VAL); + node->children[val_idx] = new_child; +} + +static inline art_indexed_child_t art_node48_next_child( + const art_node48_t *node, int index) { + art_indexed_child_t indexed_child; + index++; + for (size_t i = index; i < 256; ++i) { + if (node->keys[i] != ART_NODE48_EMPTY_VAL) { + indexed_child.child = node->children[node->keys[i]]; + indexed_child.key_chunk = node->keys[i]; + indexed_child.index = i; + return indexed_child; + } + } + indexed_child.child = NULL; + return indexed_child; +} + +static inline art_indexed_child_t art_node48_prev_child( + const art_node48_t *node, int index) { + if (index > 256) { + index = 256; + } + index--; + art_indexed_child_t indexed_child; + for (int i = index; i > 0; --i) { + if (node->keys[i] != ART_NODE48_EMPTY_VAL) { + indexed_child.child = node->children[node->keys[i]]; + indexed_child.key_chunk = node->keys[i]; + indexed_child.index = i; + return indexed_child; + } + } + indexed_child.child = NULL; + return indexed_child; +} + +static inline art_indexed_child_t art_node48_child_at(const art_node48_t *node, + int index) { + art_indexed_child_t indexed_child; + if (index < 0 || index >= 256) { + indexed_child.child = NULL; + return indexed_child; + } + indexed_child.index = index; + indexed_child.key_chunk = node->keys[index]; + indexed_child.child = node->children[node->keys[index]]; + return indexed_child; +} + +static inline art_indexed_child_t art_node48_lower_bound( + art_node48_t *node, art_key_chunk_t key_chunk) { + art_indexed_child_t indexed_child; + for (size_t i = key_chunk; i < 256; ++i) { + if (node->keys[i] != ART_NODE48_EMPTY_VAL) { + indexed_child.index = i; + indexed_child.key_chunk = node->keys[i]; + indexed_child.child = node->children[node->keys[i]]; + return indexed_child; + } + } + indexed_child.child = NULL; + return indexed_child; +} + +static art_node256_t *art_node256_create(const art_key_chunk_t prefix[], + uint8_t prefix_size) { + art_node256_t *node = roaring_malloc(sizeof(art_node256_t)); + art_init_inner_node((art_inner_node_t *)node, ART_NODE256_TYPE, prefix, + prefix_size); + node->count = 0; + for (size_t i = 0; i < 256; ++i) { + node->children[i] = NULL; + } + return node; +} + +static void art_free_node256(art_node256_t *node) { + for (size_t i = 0; i < 256; ++i) { + if (node->children[i] != NULL) { + art_free_node(node->children[i]); + } + } + roaring_free(node); +} + +static inline art_node_t *art_node256_find_child(const art_node256_t *node, + art_key_chunk_t key) { + return node->children[key]; +} + +static art_node_t *art_node256_insert(art_node256_t *node, art_node_t *child, + uint8_t key) { + node->children[key] = child; + node->count++; + return (art_node_t *)node; +} + +static inline art_node_t *art_node256_erase(art_node256_t *node, + uint8_t key_chunk) { + node->children[key_chunk] = NULL; + node->count--; + if (node->count > 48) { + return (art_node_t *)node; + } + + art_node48_t *new_node = + art_node48_create(node->base.prefix, node->base.prefix_size); + for (size_t i = 0; i < 256; ++i) { + if (node->children[i] != NULL) { + art_node48_insert(new_node, node->children[i], i); + } + } + roaring_free(node); + return (art_node_t *)new_node; +} + +static inline void art_node256_replace(art_node256_t *node, + art_key_chunk_t key_chunk, + art_node_t *new_child) { + node->children[key_chunk] = new_child; +} + +static inline art_indexed_child_t art_node256_next_child( + const art_node256_t *node, int index) { + art_indexed_child_t indexed_child; + index++; + for (size_t i = index; i < 256; ++i) { + if (node->children[i] != NULL) { + indexed_child.child = node->children[i]; + indexed_child.key_chunk = i; + indexed_child.index = i; + return indexed_child; + } + } + indexed_child.child = NULL; + return indexed_child; +} + +static inline art_indexed_child_t art_node256_prev_child( + const art_node256_t *node, int index) { + if (index > 256) { + index = 256; + } + index--; + art_indexed_child_t indexed_child; + for (int i = index; i > 0; --i) { + if (node->children[i] != NULL) { + indexed_child.child = node->children[i]; + indexed_child.key_chunk = i; + indexed_child.index = i; + return indexed_child; + } + } + indexed_child.child = NULL; + return indexed_child; +} + +static inline art_indexed_child_t art_node256_child_at( + const art_node256_t *node, int index) { + art_indexed_child_t indexed_child; + if (index < 0 || index >= 256) { + indexed_child.child = NULL; + return indexed_child; + } + indexed_child.index = index; + indexed_child.key_chunk = index; + indexed_child.child = node->children[index]; + return indexed_child; +} + +static inline art_indexed_child_t art_node256_lower_bound( + art_node256_t *node, art_key_chunk_t key_chunk) { + art_indexed_child_t indexed_child; + for (size_t i = key_chunk; i < 256; ++i) { + if (node->children[i] != NULL) { + indexed_child.index = i; + indexed_child.key_chunk = i; + indexed_child.child = node->children[i]; + return indexed_child; + } + } + indexed_child.child = NULL; + return indexed_child; +} + +// Finds the child with the given key chunk in the inner node, returns NULL if +// no such child is found. +static art_node_t *art_find_child(const art_inner_node_t *node, + art_key_chunk_t key_chunk) { + switch (art_get_type(node)) { + case ART_NODE4_TYPE: + return art_node4_find_child((art_node4_t *)node, key_chunk); + case ART_NODE16_TYPE: + return art_node16_find_child((art_node16_t *)node, key_chunk); + case ART_NODE48_TYPE: + return art_node48_find_child((art_node48_t *)node, key_chunk); + case ART_NODE256_TYPE: + return art_node256_find_child((art_node256_t *)node, key_chunk); + default: + assert(false); + return NULL; + } +} + +// Replaces the child with the given key chunk in the inner node. +static void art_replace(art_inner_node_t *node, art_key_chunk_t key_chunk, + art_node_t *new_child) { + switch (art_get_type(node)) { + case ART_NODE4_TYPE: + art_node4_replace((art_node4_t *)node, key_chunk, new_child); + break; + case ART_NODE16_TYPE: + art_node16_replace((art_node16_t *)node, key_chunk, new_child); + break; + case ART_NODE48_TYPE: + art_node48_replace((art_node48_t *)node, key_chunk, new_child); + break; + case ART_NODE256_TYPE: + art_node256_replace((art_node256_t *)node, key_chunk, new_child); + break; + default: + assert(false); + } +} + +// Erases the child with the given key chunk from the inner node, returns the +// updated node (the same as the initial node if it was not shrunk). +static art_node_t *art_node_erase(art_inner_node_t *node, + art_key_chunk_t key_chunk) { + switch (art_get_type(node)) { + case ART_NODE4_TYPE: + return art_node4_erase((art_node4_t *)node, key_chunk); + case ART_NODE16_TYPE: + return art_node16_erase((art_node16_t *)node, key_chunk); + case ART_NODE48_TYPE: + return art_node48_erase((art_node48_t *)node, key_chunk); + case ART_NODE256_TYPE: + return art_node256_erase((art_node256_t *)node, key_chunk); + default: + assert(false); + return NULL; + } +} + +// Inserts the leaf with the given key chunk in the inner node, returns a +// pointer to the (possibly expanded) node. +static art_node_t *art_node_insert_leaf(art_inner_node_t *node, + art_key_chunk_t key_chunk, + art_leaf_t *leaf) { + art_node_t *child = (art_node_t *)(SET_LEAF(leaf)); + switch (art_get_type(node)) { + case ART_NODE4_TYPE: + return art_node4_insert((art_node4_t *)node, child, key_chunk); + case ART_NODE16_TYPE: + return art_node16_insert((art_node16_t *)node, child, key_chunk); + case ART_NODE48_TYPE: + return art_node48_insert((art_node48_t *)node, child, key_chunk); + case ART_NODE256_TYPE: + return art_node256_insert((art_node256_t *)node, child, key_chunk); + default: + assert(false); + return NULL; + } +} + +// Frees the node and its children. Leaves are freed by the user. +static void art_free_node(art_node_t *node) { + if (art_is_leaf(node)) { + // We leave it up to the user to free leaves. + return; + } + switch (art_get_type((art_inner_node_t *)node)) { + case ART_NODE4_TYPE: + art_free_node4((art_node4_t *)node); + break; + case ART_NODE16_TYPE: + art_free_node16((art_node16_t *)node); + break; + case ART_NODE48_TYPE: + art_free_node48((art_node48_t *)node); + break; + case ART_NODE256_TYPE: + art_free_node256((art_node256_t *)node); + break; + default: + assert(false); + } +} + +// Returns the next child in key order, or NULL if called on a leaf. +// Provided index may be in the range [-1, 255]. +static art_indexed_child_t art_node_next_child(const art_node_t *node, + int index) { + if (art_is_leaf(node)) { + art_indexed_child_t indexed_child; + indexed_child.child = NULL; + return indexed_child; + } + switch (art_get_type((art_inner_node_t *)node)) { + case ART_NODE4_TYPE: + return art_node4_next_child((art_node4_t *)node, index); + case ART_NODE16_TYPE: + return art_node16_next_child((art_node16_t *)node, index); + case ART_NODE48_TYPE: + return art_node48_next_child((art_node48_t *)node, index); + case ART_NODE256_TYPE: + return art_node256_next_child((art_node256_t *)node, index); + default: + assert(false); + return (art_indexed_child_t){}; + } +} + +// Returns the previous child in key order, or NULL if called on a leaf. +// Provided index may be in the range [0, 256]. +static art_indexed_child_t art_node_prev_child(const art_node_t *node, + int index) { + if (art_is_leaf(node)) { + art_indexed_child_t indexed_child; + indexed_child.child = NULL; + return indexed_child; + } + switch (art_get_type((art_inner_node_t *)node)) { + case ART_NODE4_TYPE: + return art_node4_prev_child((art_node4_t *)node, index); + case ART_NODE16_TYPE: + return art_node16_prev_child((art_node16_t *)node, index); + case ART_NODE48_TYPE: + return art_node48_prev_child((art_node48_t *)node, index); + case ART_NODE256_TYPE: + return art_node256_prev_child((art_node256_t *)node, index); + default: + assert(false); + return (art_indexed_child_t){}; + } +} + +// Returns the child found at the provided index, or NULL if called on a leaf. +// Provided index is only valid if returned by art_node_(next|prev)_child. +static art_indexed_child_t art_node_child_at(const art_node_t *node, + int index) { + if (art_is_leaf(node)) { + art_indexed_child_t indexed_child; + indexed_child.child = NULL; + return indexed_child; + } + switch (art_get_type((art_inner_node_t *)node)) { + case ART_NODE4_TYPE: + return art_node4_child_at((art_node4_t *)node, index); + case ART_NODE16_TYPE: + return art_node16_child_at((art_node16_t *)node, index); + case ART_NODE48_TYPE: + return art_node48_child_at((art_node48_t *)node, index); + case ART_NODE256_TYPE: + return art_node256_child_at((art_node256_t *)node, index); + default: + assert(false); + return (art_indexed_child_t){}; + } +} + +// Returns the child with the smallest key equal to or greater than the given +// key chunk, NULL if called on a leaf or no such child was found. +static art_indexed_child_t art_node_lower_bound(const art_node_t *node, + art_key_chunk_t key_chunk) { + if (art_is_leaf(node)) { + art_indexed_child_t indexed_child; + indexed_child.child = NULL; + return indexed_child; + } + switch (art_get_type((art_inner_node_t *)node)) { + case ART_NODE4_TYPE: + return art_node4_lower_bound((art_node4_t *)node, key_chunk); + case ART_NODE16_TYPE: + return art_node16_lower_bound((art_node16_t *)node, key_chunk); + case ART_NODE48_TYPE: + return art_node48_lower_bound((art_node48_t *)node, key_chunk); + case ART_NODE256_TYPE: + return art_node256_lower_bound((art_node256_t *)node, key_chunk); + default: + assert(false); + return (art_indexed_child_t){}; + } +} + +// ====================== End of node-specific functions ======================= + +// Compares the given ranges of two keys, returns their relative order: +// * Key range 1 < key range 2: a negative value +// * Key range 1 == key range 2: 0 +// * Key range 1 > key range 2: a positive value +static inline int art_compare_prefix(const art_key_chunk_t key1[], + uint8_t key1_from, uint8_t key1_to, + const art_key_chunk_t key2[], + uint8_t key2_from, uint8_t key2_to) { + uint8_t min_len = key1_to - key1_from; + uint8_t key2_len = key2_to - key2_from; + if (key2_len < min_len) { + min_len = key2_len; + } + return memcmp(key1 + key1_from, key2 + key2_from, min_len); +} + +// Compares two keys in full, see art_compare_prefix. +int art_compare_keys(const art_key_chunk_t key1[], + const art_key_chunk_t key2[]) { + return art_compare_prefix(key1, 0, ART_KEY_BYTES, key2, 0, ART_KEY_BYTES); +} + +// Returns the length of the common prefix between two key ranges. +static uint8_t art_common_prefix(const art_key_chunk_t key1[], + uint8_t key1_from, uint8_t key1_to, + const art_key_chunk_t key2[], + uint8_t key2_from, uint8_t key2_to) { + uint8_t min_len = key1_to - key1_from; + uint8_t key2_len = key2_to - key2_from; + if (key2_len < min_len) { + min_len = key2_len; + } + uint8_t offset = 0; + for (; offset < min_len; ++offset) { + if (key1[key1_from + offset] != key2[key2_from + offset]) { + return offset; + } + } + return offset; +} + +// Returns a pointer to the rootmost node where the value was inserted, may not +// be equal to `node`. +static art_node_t *art_insert_at(art_node_t *node, const art_key_chunk_t key[], + uint8_t depth, art_leaf_t *new_leaf) { + if (art_is_leaf(node)) { + art_leaf_t *leaf = CAST_LEAF(node); + uint8_t common_prefix = art_common_prefix( + leaf->key, depth, ART_KEY_BYTES, key, depth, ART_KEY_BYTES); + + // Previously this was a leaf, create an inner node instead and add both + // the existing and new leaf to it. + art_node_t *new_node = + (art_node_t *)art_node4_create(key + depth, common_prefix); + + new_node = + art_node_insert_leaf((art_inner_node_t *)new_node, + *(leaf->key + depth + common_prefix), leaf); + new_node = + art_node_insert_leaf((art_inner_node_t *)new_node, + *(key + depth + common_prefix), new_leaf); + + // The new inner node is now the rootmost node. + return new_node; + } + art_inner_node_t *inner_node = (art_inner_node_t *)node; + // Not a leaf: inner node + uint8_t common_prefix = + art_common_prefix(inner_node->prefix, 0, inner_node->prefix_size, key, + depth, ART_KEY_BYTES); + if (common_prefix != inner_node->prefix_size) { + // Partial prefix match. Create a new internal node to hold the common + // prefix. + art_node4_t *node4 = + art_node4_create(inner_node->prefix, common_prefix); + + // Make the existing internal node a child of the new internal node. + node4 = (art_node4_t *)art_node4_insert( + node4, node, + *(art_key_chunk_t *)(inner_node->prefix + common_prefix)); + + // Correct the prefix of the moved internal node, trimming off the chunk + // inserted into the new internal node. + inner_node->prefix_size = inner_node->prefix_size - common_prefix - 1; + if (inner_node->prefix_size > 0) { + // Move the remaining prefix to the correct position. + memmove(inner_node->prefix, inner_node->prefix + common_prefix + 1, + inner_node->prefix_size); + } + + // Insert the value in the new internal node. + return art_node_insert_leaf( + (art_inner_node_t *)node4, + *(art_key_chunk_t *)(key + common_prefix + depth), new_leaf); + } + // Prefix matches entirely or node has no prefix. Look for an existing + // child. + art_key_chunk_t key_chunk = *(key + depth + common_prefix); + art_node_t *child = art_find_child(inner_node, key_chunk); + if (child != NULL) { + art_node_t *new_child = + art_insert_at(child, key, depth + common_prefix + 1, new_leaf); + if (new_child != child) { + // Node type changed. + art_replace(inner_node, key_chunk, new_child); + } + return node; + } + return art_node_insert_leaf(inner_node, key_chunk, new_leaf); +} + +// Erase helper struct. +typedef struct art_erase_result_s { + // The rootmost node where the value was erased, may not be equal to `node`. + // If no value was removed, this is null. + art_node_t *rootmost_node; + + // Value removed, null if not removed. + art_val_t *value_erased; +} art_erase_result_t; + +// Searches for the given key starting at `node`, erases it if found. +static art_erase_result_t art_erase_at(art_node_t *node, + const art_key_chunk_t *key, + uint8_t depth) { + art_erase_result_t result; + result.rootmost_node = NULL; + result.value_erased = NULL; + + if (art_is_leaf(node)) { + art_leaf_t *leaf = CAST_LEAF(node); + uint8_t common_prefix = art_common_prefix(leaf->key, 0, ART_KEY_BYTES, + key, 0, ART_KEY_BYTES); + if (common_prefix != ART_KEY_BYTES) { + // Leaf key mismatch. + return result; + } + result.value_erased = (art_val_t *)leaf; + return result; + } + art_inner_node_t *inner_node = (art_inner_node_t *)node; + uint8_t common_prefix = + art_common_prefix(inner_node->prefix, 0, inner_node->prefix_size, key, + depth, ART_KEY_BYTES); + if (common_prefix != inner_node->prefix_size) { + // Prefix mismatch. + return result; + } + art_key_chunk_t key_chunk = *(key + depth + common_prefix); + art_node_t *child = art_find_child(inner_node, key_chunk); + if (child == NULL) { + // No child with key chunk. + return result; + } + // Try to erase the key further down. Skip the key chunk associated with the + // child in the node. + art_erase_result_t child_result = + art_erase_at(child, key, depth + common_prefix + 1); + if (child_result.value_erased == NULL) { + return result; + } + result.value_erased = child_result.value_erased; + result.rootmost_node = node; + if (child_result.rootmost_node == NULL) { + // Child node was fully erased, erase it from this node's children. + result.rootmost_node = art_node_erase(inner_node, key_chunk); + } else if (child_result.rootmost_node != child) { + // Child node was not fully erased, update the pointer to it in this + // node. + art_replace(inner_node, key_chunk, child_result.rootmost_node); + } + return result; +} + +// Searches for the given key starting at `node`, returns NULL if the key was +// not found. +static art_val_t *art_find_at(const art_node_t *node, + const art_key_chunk_t *key, uint8_t depth) { + while (!art_is_leaf(node)) { + art_inner_node_t *inner_node = (art_inner_node_t *)node; + uint8_t common_prefix = + art_common_prefix(inner_node->prefix, 0, inner_node->prefix_size, + key, depth, ART_KEY_BYTES); + if (common_prefix != inner_node->prefix_size) { + return NULL; + } + art_node_t *child = art_find_child( + inner_node, + *(art_key_chunk_t *)(key + depth + inner_node->prefix_size)); + if (child == NULL) { + return NULL; + } + node = child; + // Include both the prefix and the child key chunk in the depth. + depth += inner_node->prefix_size + 1; + } + art_leaf_t *leaf = CAST_LEAF(node); + if (depth >= ART_KEY_BYTES - 1) { + return (art_val_t *)leaf; + } + uint8_t common_prefix = + art_common_prefix(leaf->key, 0, ART_KEY_BYTES, key, 0, ART_KEY_BYTES); + if (common_prefix == ART_KEY_BYTES) { + return (art_val_t *)leaf; + } + return NULL; +} + +// Returns the size in bytes of the subtrie. +size_t art_size_in_bytes_at(const art_node_t *node) { + if (art_is_leaf(node)) { + return 0; + } + size_t size = 0; + switch (art_get_type((art_inner_node_t *)node)) { + case ART_NODE4_TYPE: { + size += sizeof(art_node4_t); + } break; + case ART_NODE16_TYPE: { + size += sizeof(art_node16_t); + } break; + case ART_NODE48_TYPE: { + size += sizeof(art_node48_t); + } break; + case ART_NODE256_TYPE: { + size += sizeof(art_node256_t); + } break; + default: + assert(false); + break; + } + art_indexed_child_t indexed_child = art_node_next_child(node, -1); + while (indexed_child.child != NULL) { + size += art_size_in_bytes_at(indexed_child.child); + indexed_child = art_node_next_child(node, indexed_child.index); + } + return size; +} + +static void art_node_print_type(const art_node_t *node) { + if (art_is_leaf(node)) { + printf("Leaf"); + return; + } + switch (art_get_type((art_inner_node_t *)node)) { + case ART_NODE4_TYPE: + printf("Node4"); + return; + case ART_NODE16_TYPE: + printf("Node16"); + return; + case ART_NODE48_TYPE: + printf("Node48"); + return; + case ART_NODE256_TYPE: + printf("Node256"); + return; + default: + assert(false); + return; + } +} + +void art_node_printf(const art_node_t *node, uint8_t depth) { + printf("%*s", depth, ""); + printf("{\n"); + depth++; + + printf("%*s", depth, ""); + printf("type: "); + art_node_print_type(node); + printf("\n"); + + if (art_is_leaf(node)) { + art_leaf_t *leaf = CAST_LEAF(node); + printf("%*s", depth, ""); + printf("key: "); + for (size_t i = 0; i < ART_KEY_BYTES; ++i) { + printf("%x", leaf->key[i]); + } + printf("\n"); + depth--; + printf("%*s", depth, ""); + printf("}\n"); + return; + } + art_inner_node_t *inner_node = (art_inner_node_t *)node; + printf("%*s", depth, ""); + printf("prefix_size: %d\n", inner_node->prefix_size); + + printf("%*s", depth, ""); + printf("prefix: "); + for (uint8_t i = 0; i < inner_node->prefix_size; ++i) { + printf("%x", (char)*(inner_node->prefix + i)); + } + printf("\n"); + + switch (art_get_type(inner_node)) { + case ART_NODE4_TYPE: { + art_node4_t *node4 = (art_node4_t *)node; + for (uint8_t i = 0; i < node4->count; ++i) { + printf("%*s", depth, ""); + printf("key: %x\n", node4->keys[i]); + art_node_printf(node4->children[i], depth); + } + } break; + case ART_NODE16_TYPE: { + art_node16_t *node16 = (art_node16_t *)node; + for (uint8_t i = 0; i < node16->count; ++i) { + printf("%*s", depth, ""); + printf("key: %x\n", node16->keys[i]); + art_node_printf(node16->children[i], depth); + } + } break; + case ART_NODE48_TYPE: { + art_node48_t *node48 = (art_node48_t *)node; + for (int i = 0; i < 256; ++i) { + if (node48->keys[i] != ART_NODE48_EMPTY_VAL) { + printf("%*s", depth, ""); + printf("key: %x\n", node48->keys[i]); + art_node_printf(node48->children[i], depth); + } + } + } break; + case ART_NODE256_TYPE: { + art_node256_t *node256 = (art_node256_t *)node; + for (int i = 0; i < 256; ++i) { + if (node256->children[i] != NULL) { + printf("%*s", depth, ""); + printf("key: %x\n", i); + art_node_printf(node256->children[i], depth); + } + } + } break; + default: + assert(false); + break; + } + depth--; + printf("%*s", depth, ""); + printf("}\n"); +} + +void art_insert(art_t *art, const art_key_chunk_t *key, art_val_t *val) { + art_leaf_t *leaf = (art_leaf_t *)val; + art_leaf_populate(leaf, key); + if (art->root == NULL) { + art->root = (art_node_t *)SET_LEAF(leaf); + return; + } + art->root = art_insert_at(art->root, key, 0, leaf); +} + +art_val_t *art_erase(art_t *art, const art_key_chunk_t *key) { + if (art->root == NULL) { + return NULL; + } + art_erase_result_t result = art_erase_at(art->root, key, 0); + if (result.value_erased == NULL) { + return NULL; + } + art->root = result.rootmost_node; + return result.value_erased; +} + +art_val_t *art_find(const art_t *art, const art_key_chunk_t *key) { + if (art->root == NULL) { + return NULL; + } + return art_find_at(art->root, key, 0); +} + +bool art_is_empty(const art_t *art) { return art->root == NULL; } + +void art_free(art_t *art) { + if (art->root == NULL) { + return; + } + art_free_node(art->root); +} + +size_t art_size_in_bytes(const art_t *art) { + size_t size = sizeof(art_t); + if (art->root != NULL) { + size += art_size_in_bytes_at(art->root); + } + return size; +} + +void art_printf(const art_t *art) { + if (art->root == NULL) { + return; + } + art_node_printf(art->root, 0); +} + +// Returns the current node that the iterator is positioned at. +static inline art_node_t *art_iterator_node(art_iterator_t *iterator) { + return iterator->frames[iterator->frame].node; +} + +// Sets the iterator key and value to the leaf's key and value. Always returns +// true for convenience. +static inline bool art_iterator_valid_loc(art_iterator_t *iterator, + art_leaf_t *leaf) { + iterator->frames[iterator->frame].node = SET_LEAF(leaf); + iterator->frames[iterator->frame].index_in_node = 0; + memcpy(iterator->key, leaf->key, ART_KEY_BYTES); + iterator->value = (art_val_t *)leaf; + return true; +} + +// Invalidates the iterator key and value. Always returns false for convenience. +static inline bool art_iterator_invalid_loc(art_iterator_t *iterator) { + memset(iterator->key, 0, ART_KEY_BYTES); + iterator->value = NULL; + return false; +} + +// Moves the iterator one level down in the tree, given a node at the current +// level and the index of the child that we're going down to. +// +// Note: does not set the index at the new level. +static void art_iterator_down(art_iterator_t *iterator, + const art_inner_node_t *node, + uint8_t index_in_node) { + iterator->frames[iterator->frame].node = (art_node_t *)node; + iterator->frames[iterator->frame].index_in_node = index_in_node; + iterator->frame++; + art_indexed_child_t indexed_child = + art_node_child_at((art_node_t *)node, index_in_node); + assert(indexed_child.child != NULL); + iterator->frames[iterator->frame].node = indexed_child.child; + iterator->depth += node->prefix_size + 1; +} + +// Moves the iterator to the next/previous child of the current node. Returns +// the child moved to, or NULL if there is no neighboring child. +static art_node_t *art_iterator_neighbor_child( + art_iterator_t *iterator, const art_inner_node_t *inner_node, + bool forward) { + art_iterator_frame_t frame = iterator->frames[iterator->frame]; + art_indexed_child_t indexed_child; + if (forward) { + indexed_child = art_node_next_child(frame.node, frame.index_in_node); + } else { + indexed_child = art_node_prev_child(frame.node, frame.index_in_node); + } + if (indexed_child.child != NULL) { + art_iterator_down(iterator, inner_node, indexed_child.index); + } + return indexed_child.child; +} + +// Moves the iterator one level up in the tree, returns false if not possible. +static bool art_iterator_up(art_iterator_t *iterator) { + if (iterator->frame == 0) { + return false; + } + iterator->frame--; + // We went up, so we are at an inner node. + iterator->depth -= + ((art_inner_node_t *)art_iterator_node(iterator))->prefix_size + 1; + return true; +} + +// Initializes the iterator at the first / last leaf of the given node. +// Returns true for convenience. +static bool art_node_init_iterator(const art_node_t *node, + art_iterator_t *iterator, bool first) { + while (!art_is_leaf(node)) { + art_indexed_child_t indexed_child; + if (first) { + indexed_child = art_node_next_child(node, -1); + } else { + indexed_child = art_node_prev_child(node, 256); + } + art_iterator_down(iterator, (art_inner_node_t *)node, + indexed_child.index); + node = indexed_child.child; + } + // We're at a leaf. + iterator->frames[iterator->frame].node = (art_node_t *)node; + iterator->frames[iterator->frame].index_in_node = 0; // Should not matter. + return art_iterator_valid_loc(iterator, CAST_LEAF(node)); +} + +bool art_iterator_move(art_iterator_t *iterator, bool forward) { + if (art_is_leaf(art_iterator_node(iterator))) { + bool went_up = art_iterator_up(iterator); + if (!went_up) { + // This leaf is the root, we're done. + return art_iterator_invalid_loc(iterator); + } + } + // Advance within inner node. + art_node_t *neighbor_child = art_iterator_neighbor_child( + iterator, (art_inner_node_t *)art_iterator_node(iterator), forward); + if (neighbor_child != NULL) { + // There is another child at this level, go down to the first or last + // leaf. + return art_node_init_iterator(neighbor_child, iterator, forward); + } + // No more children at this level, go up. + bool went_up = art_iterator_up(iterator); + if (!went_up) { + // We're at the root. + return art_iterator_invalid_loc(iterator); + } + return art_iterator_move(iterator, forward); +} + +// Assumes the iterator is positioned at a node with an equal prefix path up to +// the depth of the iterator. +static bool art_node_iterator_lower_bound(const art_node_t *node, + art_iterator_t *iterator, + const art_key_chunk_t *key) { + while (!art_is_leaf(node)) { + art_inner_node_t *inner_node = (art_inner_node_t *)node; + int prefix_comparison = art_compare_prefix( + inner_node->prefix, 0, inner_node->prefix_size, key, + iterator->depth, iterator->depth + inner_node->prefix_size); + if (prefix_comparison < 0) { + // Prefix so far has been equal, but we've found a smaller key. + // Since we take the lower bound within each node, we can return the + // next leaf. + return art_iterator_move(iterator, true); + } else if (prefix_comparison > 0) { + // No key equal to the key we're looking for, return the first leaf. + return art_node_init_iterator(node, iterator, true); + } + // Prefix is equal, move to lower bound child. + art_key_chunk_t key_chunk = + *(key + iterator->depth + inner_node->prefix_size); + art_indexed_child_t indexed_child = + art_node_lower_bound(node, key_chunk); + if (indexed_child.child == NULL) { + // Only smaller keys among children. + bool went_up = art_iterator_up(iterator); + if (!went_up) { + return art_iterator_invalid_loc(iterator); + } + return art_iterator_move(iterator, true); + } + // We found a child with a greater or equal prefix. + art_iterator_down(iterator, inner_node, indexed_child.index); + node = indexed_child.child; + } + art_leaf_t *leaf = CAST_LEAF(node); + // Technically we don't have to re-compare the prefix if we arrived here + // through the while loop, but it simplifies the code. + if (art_compare_prefix(leaf->key, 0, ART_KEY_BYTES, key, 0, + ART_KEY_BYTES) >= 0) { + return art_iterator_valid_loc(iterator, leaf); + } + return art_iterator_invalid_loc(iterator); +} + +art_iterator_t art_create_iterator() { + art_iterator_t iterator; + iterator.depth = 0; + iterator.frame = 0; + memset(iterator.key, 0, ART_KEY_BYTES); + iterator.value = NULL; + return iterator; +} + +void art_init_iterator(const art_t *art, art_iterator_t *iterator, bool first) { + if (art->root == NULL) { + return; + } + art_node_init_iterator(art->root, iterator, first); +} + +bool art_iterator_next(art_iterator_t *iterator) { + return art_iterator_move(iterator, true); +} + +bool art_iterator_prev(art_iterator_t *iterator) { + return art_iterator_move(iterator, false); +} + +bool art_iterator_lower_bound(art_iterator_t *iterator, + const art_key_chunk_t *key) { + int compare_result = art_compare_prefix(iterator->key, 0, ART_KEY_BYTES, + key, 0, ART_KEY_BYTES); + // Move up until we have an equal or greater prefix, after which we can do a + // normal lower bound search. + while (compare_result < 0 && iterator->frame > 0) { + if (!art_iterator_up(iterator)) { + // Only smaller keys found. + return art_node_iterator_lower_bound(art_iterator_node(iterator), + iterator, key); + } + // Since we're only moving up, we can keep comparing against the + // iterator key. + compare_result = art_compare_prefix(iterator->key, 0, iterator->depth, + key, 0, iterator->depth); + } + if (compare_result > 0) { + return art_node_init_iterator(art_iterator_node(iterator), iterator, + true); + } + return art_node_iterator_lower_bound(art_iterator_node(iterator), iterator, + key); +} + +art_iterator_t art_lower_bound(const art_t *art, const art_key_chunk_t *key) { + art_iterator_t iterator = art_create_iterator(); + if (art->root != NULL) { + art_node_iterator_lower_bound(art->root, &iterator, key); + } + return iterator; +} + +art_iterator_t art_upper_bound(const art_t *art, const art_key_chunk_t *key) { + art_iterator_t iterator = art_create_iterator(); + if (art->root != NULL) { + if (art_node_iterator_lower_bound(art->root, &iterator, key) && + art_compare_prefix(iterator.key, 0, ART_KEY_BYTES, key, 0, + ART_KEY_BYTES) == 0) { + art_iterator_next(&iterator); + } + } + return iterator; +} + +void art_iterator_insert(art_t *art, art_iterator_t *iterator, + const art_key_chunk_t *key, art_val_t *val) { + // TODO: This can likely be faster. + art_insert(art, key, val); + assert(art->root != NULL); + iterator->frame = 0; + iterator->depth = 0; + art_node_iterator_lower_bound(art->root, iterator, key); +} + +// TODO: consider keeping `art_t *art` in the iterator. +art_val_t *art_iterator_erase(art_t *art, art_iterator_t *iterator) { + if (iterator->value == NULL) { + return NULL; + } + art_val_t *value_erased = iterator->value; + bool went_up = art_iterator_up(iterator); + if (!went_up) { + art->root = NULL; + art_iterator_invalid_loc(iterator); + return value_erased; + } + + // Erase the leaf. + art_node_t *child_to_replace; + { + art_inner_node_t *node = + (art_inner_node_t *)art_iterator_node(iterator); + art_key_chunk_t key_chunk = + iterator->key[iterator->depth + node->prefix_size]; + child_to_replace = art_node_erase(node, key_chunk); + } + + // Replace the pointer to the inner node we erased from in its parent (it + // may be a leaf now). + went_up = art_iterator_up(iterator); + if (went_up) { + art_inner_node_t *node = + (art_inner_node_t *)art_iterator_node(iterator); + art_key_chunk_t key_chunk = + iterator->key[iterator->depth + node->prefix_size]; + art_replace(node, key_chunk, child_to_replace); + } else { + // This node was the rootmost node. + art->root = child_to_replace; + iterator->frames[0].node = child_to_replace; + } + art_key_chunk_t initial_key[ART_KEY_BYTES]; + memcpy(initial_key, iterator->key, ART_KEY_BYTES); + // Search for the first key after the one we erased. + art_iterator_lower_bound(iterator, initial_key); + return value_erased; +} + +#ifdef __cplusplus +} // extern "C" +} // namespace roaring +} // namespace internal +#endif + diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 61f540112..673a7bd7f 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -20,6 +20,7 @@ add_c_test(format_portability_unit) add_c_test(robust_deserialization_unit) add_c_test(container_comparison_unit) add_c_test(add_offset) +add_cpp_test(art_unit) find_package(Threads) if(Threads_FOUND) message(STATUS "Your system supports threads.") diff --git a/tests/art_unit.cpp b/tests/art_unit.cpp new file mode 100644 index 000000000..05144ac4d --- /dev/null +++ b/tests/art_unit.cpp @@ -0,0 +1,502 @@ +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "test.h" + +using namespace roaring::internal; + +namespace { + +void print_key(art_key_chunk_t* key) { + for (size_t i = 0; i < ART_KEY_BYTES; ++i) { + printf("%x", *(key + i)); + } +} + +void assert_key_eq(art_key_chunk_t* key1, art_key_chunk_t* key2) { + for (size_t i = 0; i < ART_KEY_BYTES; ++i) { + if (*(key1 + i) != *(key2 + i)) { + print_key(key1); + printf(" != "); + print_key(key2); + printf("\n"); + + assert_true(false); + } + } +} + +class Key { + public: + Key(uint64_t key) { + // Reverse byte order of the low 6 bytes. Not portable to big-endian + // systems! + key_[0] = key >> 40 & 0xFF; + key_[1] = key >> 32 & 0xFF; + key_[2] = key >> 24 & 0xFF; + key_[3] = key >> 16 & 0xFF; + key_[4] = key >> 8 & 0xFF; + key_[5] = key >> 0 & 0xFF; + } + + Key(uint8_t* key) { + for (size_t i = 0; i < 6; ++i) { + key_[i] = *(key + i); + } + } + + bool operator==(const Key& other) const { return key_ == other.key_; } + bool operator!=(const Key& other) const { return !(*this == other); } + bool operator<(const Key& other) const { return key_ < other.key_; } + bool operator>(const Key& other) const { return key_ > other.key_; } + + const uint8_t* data() const { return key_.data(); } + + std::string string() const { + std::stringstream os; + os << std::hex << std::setfill('0'); + for (size_t i = 0; i < 6; ++i) { + os << std::setw(2) << static_cast(key_[i]) << " "; + } + return os.str(); + } + + private: + std::array key_; +}; + +struct Value : art_val_t { + Value() {} + Value(uint64_t val_) : val(val_) {} + bool operator==(const Value& other) { return val == other.val; } + + uint64_t val; +}; + +class ShadowedART { + public: + ~ShadowedART() { art_free(&art_); } + + void insert(Key key, Value value) { + shadow_[key] = value; + art_insert(&art_, key.data(), &shadow_[key]); + } + + void erase(Key key) { + art_erase(&art_, key.data()); + shadow_.erase(key); + } + + void assertLowerBoundValid(Key key) { + auto shadow_it = shadow_.lower_bound(key); + auto art_it = art_lower_bound(&art_, key.data()); + assertIteratorValid(shadow_it, &art_it); + } + + void assertUpperBoundValid(Key key) { + auto shadow_it = shadow_.upper_bound(key); + auto art_it = art_upper_bound(&art_, key.data()); + assertIteratorValid(shadow_it, &art_it); + } + + void assertValid() { + for (const auto& entry : shadow_) { + auto& key = entry.first; + auto& value = entry.second; + Value* found_val = (Value*)art_find(&art_, key.data()); + if (found_val == nullptr) { + printf("Key %s is not null in shadow but null in ART\n", + key.string().c_str()); + assert_true(found_val != nullptr); + break; + } + if (found_val->val != value.val) { + printf("Key %s: ART value %lu != shadow value %lu\n", + key.string().c_str(), found_val->val, value.val); + assert_true(*found_val == value); + break; + } + } + } + + private: + void assertIteratorValid(std::map::iterator& shadow_it, + art_iterator_t* art_it) { + if (shadow_it != shadow_.end() && art_it->value == nullptr) { + printf("Iterator for key %s is null\n", + shadow_it->first.string().c_str()); + assert_true(art_it->value != nullptr); + } + if (shadow_it == shadow_.end() && art_it->value != nullptr) { + printf("Iterator is not null\n"); + assert_true(art_it->value == nullptr); + } + if (shadow_it != shadow_.end() && + shadow_it->first != Key(art_it->key)) { + printf("Shadow iterator key = %s, ART key = %s\n", + shadow_it->first.string().c_str(), + Key(art_it->key).string().c_str()); + assert_true(shadow_it->first == Key(art_it->key)); + } + } + std::map shadow_; + art_t art_{NULL}; +}; + +DEFINE_TEST(test_art_simple) { + std::vector keys = { + "000001", "000002", "000003", "000004", "001005", + }; + std::vector values = {{1}, {2}, {3}, {4}, {5}}; + + art_t art{NULL}; + for (size_t i = 0; i < keys.size(); ++i) { + art_insert(&art, (art_key_chunk_t*)keys[i], &values[i]); + } + Value* found_val = (Value*)art_find(&art, (uint8_t*)keys[0]); + assert_true(*found_val == values[0]); + Value* erased_val = (Value*)art_erase(&art, (uint8_t*)keys[0]); + assert_true(*erased_val == values[0]); + art_free(&art); +} + +DEFINE_TEST(test_art_erase_all) { + std::vector keys = {"000001", "000002"}; + std::vector values = {{1}, {2}}; + + art_t art{NULL}; + art_insert(&art, (uint8_t*)keys[0], &values[0]); + art_insert(&art, (uint8_t*)keys[1], &values[1]); + + Value* erased_val1 = (Value*)art_erase(&art, (uint8_t*)keys[0]); + Value* erased_val2 = (Value*)art_erase(&art, (uint8_t*)keys[1]); + assert_true(*erased_val1 == values[0]); + assert_true(*erased_val2 == values[1]); + art_free(&art); +} + +DEFINE_TEST(test_art_many_entries) { + const size_t kValues = 10000; + std::vector> keys; + std::vector values; + keys.reserve(kValues); + values.reserve(kValues); + for (uint64_t i = 0; i < kValues; ++i) { + char key[7]; + snprintf(key, 7, "%.6lu", i); + for (size_t j = 0; j < 6; ++j) { + keys[i][j] = key[j]; + } + values[i] = {i}; + } + + art_t art{NULL}; + + for (size_t i = 0; i < kValues; ++i) { + art_insert(&art, (uint8_t*)keys[i].data(), &values[i]); + } + + for (size_t i = 0; i < kValues; ++i) { + Value* found = (Value*)art_find(&art, (uint8_t*)keys[i].data()); + assert_true(found != NULL && *found == values[i]); + } + + for (size_t i = 0; i < kValues; ++i) { + Value* erased = (Value*)art_erase(&art, (uint8_t*)keys[i].data()); + assert_true(erased != NULL && *erased == values[i]); + } + + art_free(&art); +} + +DEFINE_TEST(test_art_dense_entries) { + const size_t kValues = 300; + std::vector keys; + std::vector values; + keys.reserve(kValues); + values.reserve(kValues); + for (uint64_t i = 0; i < kValues; ++i) { + uint64_t key = i; + uint8_t* key_begin = (uint8_t*)&key; + uint8_t* key_end = key_begin + sizeof(key); + std::reverse(key_begin, key_end); + key = key >> 16; + keys[i] = key; + values[i] = {i}; + } + + art_t art{NULL}; + + for (size_t i = 0; i < kValues; ++i) { + art_insert(&art, (uint8_t*)&keys[i], &values[i]); + } + + for (size_t i = 0; i < kValues; ++i) { + Value* found = (Value*)art_find(&art, (uint8_t*)&keys[i]); + assert_true(found != NULL && *found == values[i]); + } + + for (size_t i = 0; i < kValues; ++i) { + Value* erased = (Value*)art_erase(&art, (uint8_t*)&keys[i]); + assert_true(erased != NULL && *erased == values[i]); + } + + art_free(&art); +} + +DEFINE_TEST(test_art_is_empty) { + std::vector keys = { + "000001", "000002", "000003", "000004", "001005", + }; + std::vector values = {{1}, {2}, {3}, {4}, {5}}; + + art_t art{NULL}; + assert_true(art_is_empty(&art)); + const char* key = "000001"; + Value val{1}; + art_insert(&art, (art_key_chunk_t*)key, &val); + assert_false(art_is_empty(&art)); + art_free(&art); +} + +DEFINE_TEST(test_art_iterator_next) { + std::vector keys = { + "000001", "000002", "000003", "000004", "001005", + }; + std::vector values = {{1}, {2}, {3}, {4}, {5}}; + art_t art{NULL}; + for (size_t i = 0; i < keys.size(); ++i) { + art_insert(&art, (art_key_chunk_t*)keys[i], &values[i]); + } + + art_iterator_t iterator = art_create_iterator(); + art_init_iterator(&art, &iterator, true); + size_t i = 0; + do { + assert_key_eq(iterator.key, (art_key_chunk_t*)keys[i]); + assert_true(iterator.value == &values[i]); + ++i; + } while (art_iterator_next(&iterator)); + art_free(&art); +} + +DEFINE_TEST(test_art_iterator_prev) { + std::vector keys = { + "000001", "000002", "000003", "000004", "001005", + }; + std::vector values = {{1}, {2}, {3}, {4}, {5}}; + art_t art{NULL}; + for (size_t i = 0; i < keys.size(); ++i) { + art_insert(&art, (art_key_chunk_t*)keys[i], &values[i]); + } + + art_iterator_t iterator = art_create_iterator(); + art_init_iterator(&art, &iterator, false); + size_t i = keys.size() - 1; + do { + assert_key_eq(iterator.key, (art_key_chunk_t*)keys[i]); + --i; + } while (art_iterator_prev(&iterator)); + art_free(&art); +} + +DEFINE_TEST(test_art_iterator_lower_bound) { + std::vector keys = { + "000001", "000002", "000003", "000004", "001005", + }; + std::vector values = {{1}, {2}, {3}, {4}, {5}}; + art_t art{NULL}; + for (size_t i = 0; i < keys.size(); ++i) { + art_insert(&art, (art_key_chunk_t*)keys[i], &values[i]); + } + + art_iterator_t iterator = art_create_iterator(); + art_init_iterator(&art, &iterator, true); + assert_true(art_iterator_lower_bound(&iterator, (art_key_chunk_t*)keys[2])); + assert_key_eq(iterator.key, (art_key_chunk_t*)keys[2]); + const char* key = "000005"; + assert_true(art_iterator_lower_bound(&iterator, (art_key_chunk_t*)key)); + assert_key_eq(iterator.key, (art_key_chunk_t*)keys[4]); + art_free(&art); +} + +DEFINE_TEST(test_art_lower_bound) { + std::vector keys = { + "000001", "000002", "000003", "000004", "001005", + }; + std::vector values = {{1}, {2}, {3}, {4}, {5}}; + art_t art{NULL}; + for (size_t i = 0; i < keys.size(); ++i) { + art_insert(&art, (art_key_chunk_t*)keys[i], &values[i]); + } + + { + const char* key = "000002"; + art_iterator_t iterator = art_lower_bound(&art, (art_key_chunk_t*)key); + size_t i = 1; + do { + assert_true(iterator.value != NULL); + assert_key_eq(iterator.key, (art_key_chunk_t*)keys[i]); + assert_true(iterator.value == &values[i]); + ++i; + } while (art_iterator_next(&iterator)); + } + { + const char* key = "000005"; + art_iterator_t iterator = art_lower_bound(&art, (art_key_chunk_t*)key); + assert_true(iterator.value != NULL); + assert_key_eq(iterator.key, (art_key_chunk_t*)keys[4]); + assert_true(iterator.value == &values[4]); + assert_false(art_iterator_next(&iterator)); + } + { + const char* key = "001006"; + art_iterator_t iterator = art_lower_bound(&art, (art_key_chunk_t*)key); + assert_true(iterator.value == NULL); + } + art_free(&art); +} + +DEFINE_TEST(test_art_upper_bound) { + std::vector keys = { + "000001", "000002", "000003", "000004", "001005", + }; + std::vector values = {{1}, {2}, {3}, {4}, {5}}; + art_t art{NULL}; + for (size_t i = 0; i < keys.size(); ++i) { + art_insert(&art, (art_key_chunk_t*)keys[i], &values[i]); + } + + { + const char* key = "000002"; + art_iterator_t iterator = art_upper_bound(&art, (art_key_chunk_t*)key); + size_t i = 2; + do { + assert_true(iterator.value != NULL); + assert_key_eq(iterator.key, (art_key_chunk_t*)keys[i]); + assert_true(iterator.value == &values[i]); + ++i; + } while (art_iterator_next(&iterator)); + } + { + const char* key = "000005"; + art_iterator_t iterator = art_upper_bound(&art, (art_key_chunk_t*)key); + assert_true(iterator.value != NULL); + assert_key_eq(iterator.key, (art_key_chunk_t*)keys[4]); + assert_true(iterator.value == &values[4]); + assert_false(art_iterator_next(&iterator)); + } + { + const char* key = "001006"; + art_iterator_t iterator = art_upper_bound(&art, (art_key_chunk_t*)key); + assert_true(iterator.value == NULL); + } + art_free(&art); +} + +DEFINE_TEST(test_art_iterator_erase) { + std::vector keys = { + "000001", "000002", "000003", "000004", "001005", + }; + std::vector values = {{1}, {2}, {3}, {4}, {5}}; + art_t art{NULL}; + for (size_t i = 0; i < keys.size(); ++i) { + art_insert(&art, (art_key_chunk_t*)keys[i], &values[i]); + } + art_iterator_t iterator = art_create_iterator(); + art_init_iterator(&art, &iterator, true); + size_t i = 0; + do { + assert_key_eq(iterator.key, (art_key_chunk_t*)keys[i]); + assert_true(iterator.value == &values[i]); + assert_true(art_iterator_erase(&art, &iterator) == &values[i]); + assert_false(art_find(&art, (art_key_chunk_t*)keys[i])); + ++i; + } while (iterator.value != NULL); + assert_true(i == 5); + art_free(&art); +} + +DEFINE_TEST(test_art_iterator_insert) { + std::vector keys = { + "000001", "000002", "000003", "000004", "001005", + }; + std::vector values = {{1}, {2}, {3}, {4}, {5}}; + art_t art{NULL}; + art_insert(&art, (art_key_chunk_t*)keys[0], &values[0]); + art_iterator_t iterator = art_create_iterator(); + art_init_iterator(&art, &iterator, true); + for (size_t i = 1; i < keys.size(); ++i) { + art_iterator_insert(&art, &iterator, (art_key_chunk_t*)keys[i], + &values[i]); + assert_key_eq(iterator.key, (art_key_chunk_t*)keys[i]); + assert_true(iterator.value == &values[i]); + } + art_free(&art); +} + +DEFINE_TEST(test_art_shadowed) { + ShadowedART art; + for (uint64_t i = 0; i < 10000; ++i) { + art.insert(i, i); + } + art.assertValid(); + art.assertLowerBoundValid(5000); + art.assertLowerBoundValid(10000); + for (uint64_t i = 0; i < 10000; ++i) { + art.erase(i); + } + art.assertValid(); + art.assertLowerBoundValid(1); +} + +DEFINE_TEST(test_art_size_in_bytes) { + std::vector keys = { + "000001", "000002", "000003", "000004", "001005", + }; + std::vector values = {{1}, {2}, {3}, {4}, {5}}; + + art_t art{NULL}; + for (size_t i = 0; i < keys.size(); ++i) { + art_insert(&art, (art_key_chunk_t*)keys[i], &values[i]); + } + // sizeof(art_t) = 8 + // sizeof(art_node4_t) * 2 = 48 * 2 + // -------------------------------- + // total size = 104 + assert_true(art_size_in_bytes(&art) == 104); + art_free(&art); +} + +} // namespace + +int main() { + const struct CMUnitTest tests[] = { + cmocka_unit_test(test_art_simple), + cmocka_unit_test(test_art_erase_all), + cmocka_unit_test(test_art_many_entries), + cmocka_unit_test(test_art_dense_entries), + cmocka_unit_test(test_art_is_empty), + cmocka_unit_test(test_art_iterator_next), + cmocka_unit_test(test_art_iterator_prev), + cmocka_unit_test(test_art_iterator_lower_bound), + cmocka_unit_test(test_art_lower_bound), + cmocka_unit_test(test_art_upper_bound), + cmocka_unit_test(test_art_iterator_erase), + cmocka_unit_test(test_art_iterator_insert), + cmocka_unit_test(test_art_shadowed), + cmocka_unit_test(test_art_size_in_bytes), + }; + return cmocka_run_group_tests(tests, NULL, NULL); +} + From eac2663946c4484f82a18220bbbb4d3001b343a9 Mon Sep 17 00:00:00 2001 From: Soerian Lieve Date: Sat, 30 Dec 2023 12:49:53 +0100 Subject: [PATCH 02/11] Add a 64 bit version of roaring bitmaps in C The idea is copied from the Java version of Roaring Bitmaps: use an Adaptive Radix Trie (ART) to store the high 48 bits of each entry, which storing the low 16 bits in containers. --- include/roaring/roaring64.h | 427 +++++++++++ src/CMakeLists.txt | 1 + src/roaring64.c | 1358 +++++++++++++++++++++++++++++++++++ tests/CMakeLists.txt | 1 + tests/roaring64_unit.cpp | 946 ++++++++++++++++++++++++ 5 files changed, 2733 insertions(+) create mode 100644 include/roaring/roaring64.h create mode 100644 src/roaring64.c create mode 100644 tests/roaring64_unit.cpp diff --git a/include/roaring/roaring64.h b/include/roaring/roaring64.h new file mode 100644 index 000000000..3685fcbb9 --- /dev/null +++ b/include/roaring/roaring64.h @@ -0,0 +1,427 @@ +#ifndef ROARING64_H +#define ROARING64_H + +#include +#include +#include +#include +#include +#include + +// TODO: This is messy and can likely be improved. +#if defined(__cplusplus) +#define ROARING_ART_T ::roaring::internal::art_t +#define ROARING_ART_VAL_T ::roaring::internal::art_val_t +#else +#define ROARING_ART_T art_t +#define ROARING_ART_VAL_T art_val_t +#define ROARING_CONTAINER_T void +#endif + +#ifdef __cplusplus +extern "C" { +namespace roaring { +namespace api { +#endif + +typedef struct roaring64_bitmap_s { + ROARING_ART_T art; + uint8_t flags; +} roaring64_bitmap_t; + +// TODO: Ideally we don't put this in the header. +typedef struct leaf_s { + ROARING_ART_VAL_T _pad; + uint8_t typecode; + ROARING_CONTAINER_T *container; +} leaf_t; + +/** + * A bit of context usable with `roaring64_bitmap_*_bulk()` functions. + * + * Should be initialized with `{0}` (or `memset()` to all zeros). + * Callers should treat it as an opaque type. + * + * A context may only be used with a single bitmap (unless re-initialized to + * zero), and any modification to a bitmap (other than modifications performed + * with `_bulk()` functions with the context passed) will invalidate any + * contexts associated with that bitmap. + */ +typedef struct roaring64_bulk_context_s { + uint8_t high_bytes[ART_KEY_BYTES]; + uint16_t low_bytes; + leaf_t *leaf; +} roaring64_bulk_context_t; + +/** + * Dynamically allocates a new bitmap (initially empty). + * Client is responsible for calling `roaring64_bitmap_free()`. + */ +roaring64_bitmap_t *roaring64_bitmap_create(void); +void roaring64_bitmap_free(roaring64_bitmap_t *r); + +/** + * Returns a copy of a bitmap. + */ +roaring64_bitmap_t *roaring64_bitmap_copy(const roaring64_bitmap_t *r); + +/** + * Creates a new bitmap of a pointer to N 64-bit integers. + */ +roaring64_bitmap_t *roaring64_bitmap_of_ptr(size_t n_args, + const uint64_t *vals); + +/** + * Creates a new bitmap of a pointer to N 64-bit integers. + */ +roaring64_bitmap_t *roaring64_bitmap_of(size_t n_args, ...); + +/** + * Create a new bitmap containing all the values in [min, max) that are at a + * distance k*step from min. + */ +roaring64_bitmap_t *roaring64_bitmap_from_range(uint64_t min, uint64_t max, + uint64_t step); + +/** + * Adds the provided value to the bitmap. + */ +void roaring64_bitmap_add(roaring64_bitmap_t *r, uint64_t val); + +/** + * Adds the provided value to the bitmap. + * Returns true if a new value was added, false if the value already existed. + */ +bool roaring64_bitmap_add_checked(roaring64_bitmap_t *r, uint64_t val); + +/** + * Add an item, using context from a previous insert for faster insertion. + * + * `context` will be used to store information between calls to make bulk + * operations faster. `*context` should be zero-initialized before the first + * call to this function. + * + * Modifying the bitmap in any way (other than `-bulk` suffixed functions) + * will invalidate the stored context, calling this function with a non-zero + * context after doing any modification invokes undefined behavior. + * + * In order to exploit this optimization, the caller should call this function + * with values with the same high 48 bits of the value consecutively. + */ +void roaring64_bitmap_add_bulk(roaring64_bitmap_t *r, + roaring64_bulk_context_t *context, uint64_t val); + +/** + * Add `n_args` values from `vals`, faster than repeatedly calling + * `roaring64_bitmap_add()` + * + * In order to exploit this optimization, the caller should attempt to keep + * values with the same high 48 bits of the value as consecutive elements in + * `vals`. + */ +void roaring64_bitmap_add_many(roaring64_bitmap_t *r, size_t n_args, + const uint64_t *vals); + +/** + * Add all values in range [min, max]. + */ +void roaring64_bitmap_add_range_closed(roaring64_bitmap_t *r, uint64_t min, + uint64_t max); + +/** + * Removes a value from the bitmap if present. + */ +void roaring64_bitmap_remove(roaring64_bitmap_t *r, uint64_t val); + +/** + * Removes a value from the bitmap if present, returns true if the value was + * removed and false if the value was not present. + */ +bool roaring64_bitmap_remove_checked(roaring64_bitmap_t *r, uint64_t val); + +/** + * Remove an item, using context from a previous insert for faster removal. + * + * `context` will be used to store information between calls to make bulk + * operations faster. `*context` should be zero-initialized before the first + * call to this function. + * + * Modifying the bitmap in any way (other than `-bulk` suffixed functions) + * will invalidate the stored context, calling this function with a non-zero + * context after doing any modification invokes undefined behavior. + * + * In order to exploit this optimization, the caller should call this function + * with values with the same high 48 bits of the value consecutively. + */ +void roaring64_bitmap_remove_bulk(roaring64_bitmap_t *r, + roaring64_bulk_context_t *context, + uint64_t val); + +/** + * Remove `n_args` values from `vals`, faster than repeatedly calling + * `roaring64_bitmap_remove()` + * + * In order to exploit this optimization, the caller should attempt to keep + * values with the same high 48 bits of the value as consecutive elements in + * `vals`. + */ +void roaring64_bitmap_remove_many(roaring64_bitmap_t *r, size_t n_args, + const uint64_t *vals); + +/** + * Remove all values in range [min, max]. + */ +void roaring64_bitmap_remove_range_closed(roaring64_bitmap_t *r, uint64_t min, + uint64_t max); + +/** + * Returns true if the provided value is present. + */ +bool roaring64_bitmap_contains(const roaring64_bitmap_t *r, uint64_t val); + +/** + * Check if an item is present using context from a previous insert or search + * for faster search. + * + * `context` will be used to store information between calls to make bulk + * operations faster. `*context` should be zero-initialized before the first + * call to this function. + * + * Modifying the bitmap in any way (other than `-bulk` suffixed functions) + * will invalidate the stored context, calling this function with a non-zero + * context after doing any modification invokes undefined behavior. + * + * In order to exploit this optimization, the caller should call this function + * with values with the same high 48 bits of the value consecutively. + */ +bool roaring64_bitmap_contains_bulk(const roaring64_bitmap_t *r, + roaring64_bulk_context_t *context, + uint64_t val); + +/** + * Selects the element at index 'rank' where the smallest element is at index 0. + * If the size of the bitmap is strictly greater than rank, then this function + * returns true and sets element to the element of given rank. Otherwise, it + * returns false. + */ +bool roaring64_bitmap_select(const roaring64_bitmap_t *r, uint64_t rank, + uint64_t *element); + +/** + * Returns the number of integers that are smaller or equal to x. Thus if x is + * the first element, this function will return 1. If x is smaller than the + * smallest element, this function will return 0. + * + * The indexing convention differs between roaring64_bitmap_select and + * roaring64_bitmap_rank: roaring_bitmap64_select refers to the smallest value + * as having index 0, whereas roaring64_bitmap_rank returns 1 when ranking + * the smallest value. + */ +uint64_t roaring64_bitmap_rank(const roaring64_bitmap_t *r, uint64_t val); + +/** + * Returns true if the given value is in the bitmap, and sets `out_index` to the + * (0-based) index of the value in the bitmap. Returns false if the value is not + * in the bitmap. + */ +bool roaring64_bitmap_get_index(const roaring64_bitmap_t *r, uint64_t val, + uint64_t *out_index); + +/** + * Returns the number of values in the bitmap. + */ +uint64_t roaring64_bitmap_get_cardinality(const roaring64_bitmap_t *r); + +/** + * Returns the number of elements in the range [min, max). + */ +uint64_t roaring64_bitmap_range_cardinality(const roaring64_bitmap_t *r, + uint64_t min, uint64_t max); + +/** + * Returns true if the bitmap is empty (cardinality is zero). + */ +bool roaring64_bitmap_is_empty(const roaring64_bitmap_t *r); + +/** + * Returns the smallest value in the set, or UINT64_MAX if the set is empty. + */ +uint64_t roaring64_bitmap_minimum(const roaring64_bitmap_t *r); + +/** + * Returns the largest value in the set, or 0 if empty. + */ +uint64_t roaring64_bitmap_maximum(const roaring64_bitmap_t *r); + +/** + * Returns true if the result has at least one run container. + */ +bool roaring64_bitmap_run_optimize(roaring64_bitmap_t *r); + +/** + * Returns the in-memory size of the bitmap. + * TODO: Return the serialized size. + */ +size_t roaring64_bitmap_size_in_bytes(const roaring64_bitmap_t *r); + +/** + * Return true if the two bitmaps contain the same elements. + */ +bool roaring64_bitmap_equals(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2); + +/** + * Return true if all the elements of r1 are also in r2. + */ +bool roaring64_bitmap_is_subset(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2); + +/** + * Return true if all the elements of r1 are also in r2, and r2 is strictly + * greater than r1. + */ +bool roaring64_bitmap_is_strict_subset(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2); + +/** + * Computes the intersection between two bitmaps and returns new bitmap. The + * caller is responsible for free-ing the result. + * + * Performance hint: if you are computing the intersection between several + * bitmaps, two-by-two, it is best to start with the smallest bitmaps. You may + * also rely on roaring64_bitmap_and_inplace to avoid creating many temporary + * bitmaps. + */ +roaring64_bitmap_t *roaring64_bitmap_and(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2); + +/** + * Computes the size of the intersection between two bitmaps. + */ +uint64_t roaring64_bitmap_and_cardinality(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2); + +/** + * In-place version of `roaring64_bitmap_and()`, modifies `r1`. `r1` and `r2` + * are allowed to be equal. + * + * Performance hint: if you are computing the intersection between several + * bitmaps, two-by-two, it is best to start with the smallest bitmaps. + */ +void roaring64_bitmap_and_inplace(roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2); + +/** + * Check whether two bitmaps intersect. + */ +bool roaring64_bitmap_intersect(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2); + +/** + * Computes the Jaccard index between two bitmaps. (Also known as the Tanimoto + * distance, or the Jaccard similarity coefficient) + * + * The Jaccard index is undefined if both bitmaps are empty. + */ +double roaring64_bitmap_jaccard_index(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2); + +/** + * Computes the union between two bitmaps and returns new bitmap. The caller is + * responsible for free-ing the result. + */ +roaring64_bitmap_t *roaring64_bitmap_or(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2); + +/** + * Computes the size of the union between two bitmaps. + */ +uint64_t roaring64_bitmap_or_cardinality(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2); + +/** + * In-place version of `roaring64_bitmap_or(), modifies `r1`. + */ +void roaring64_bitmap_or_inplace(roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2); + +/** + * Computes the symmetric difference (xor) between two bitmaps and returns a new + * bitmap. The caller is responsible for free-ing the result. + */ +roaring64_bitmap_t *roaring64_bitmap_xor(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2); + +/** + * Computes the size of the symmetric difference (xor) between two bitmaps. + */ +uint64_t roaring64_bitmap_xor_cardinality(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2); + +/** + * In-place version of `roaring64_bitmap_xor()`, modifies `r1`. `r1` and `r2` + * are not allowed to be equal (that would result in an empty bitmap). + */ +void roaring64_bitmap_xor_inplace(roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2); + +/** + * Computes the difference (andnot) between two bitmaps and returns a new + * bitmap. The caller is responsible for free-ing the result. + */ +roaring64_bitmap_t *roaring64_bitmap_andnot(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2); + +/** + * Computes the size of the difference (andnot) between two bitmaps. + */ +uint64_t roaring64_bitmap_andnot_cardinality(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2); + +/** + * In-place version of `roaring64_bitmap_andnot()`, modifies `r1`. `r1` and `r2` + * are not allowed to be equal (that would result in an empty bitmap). + */ +void roaring64_bitmap_andnot_inplace(roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2); + +/** + * Iterate over the bitmap elements. The function `iterator` is called once for + * all the values with `ptr` (can be NULL) as the second parameter of each call. + * + * `roaring_iterator64` is simply a pointer to a function that returns a bool + * and takes `(uint64_t, void*)` as inputs. True means that the iteration should + * continue, while false means that it should stop. + * + * Returns true if the `roaring64_iterator` returned true throughout (so that + * all data points were necessarily visited). + * + * Iteration is ordered from the smallest to the largest elements. + */ +bool roaring64_bitmap_iterate(const roaring64_bitmap_t *r, + roaring_iterator64 iterator, void *ptr); + +#ifdef __cplusplus +} // extern "C" +} // namespace roaring +} // namespace api +#endif + +#endif /* ROARING64_H */ + +#ifdef __cplusplus +/** + * Best practices for C++ headers is to avoid polluting global scope. + * But for C compatibility when just `roaring.h` is included building as + * C++, default to global access for the C public API. + * + * BUT when `roaring.hh` is included instead, it sets this flag. That way + * explicit namespacing must be used to get the C functions. + * + * This is outside the include guard so that if you include BOTH headers, + * the order won't matter; you still get the global definitions. + */ +#if !defined(ROARING_API_NOT_IN_GLOBAL_NAMESPACE) +using namespace ::roaring::api; +#endif +#endif diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1f091eddf..4bd51dbdd 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -34,6 +34,7 @@ set(ROARING_SRC containers/run.c memory.c roaring.c + roaring64.c roaring_priority_queue.c roaring_array.c) diff --git a/src/roaring64.c b/src/roaring64.c new file mode 100644 index 000000000..883ded9f9 --- /dev/null +++ b/src/roaring64.c @@ -0,0 +1,1358 @@ +#include +#include // TODO: this is only defined on GNU/Linux +#include +#include +#include +#include +#include + +#ifdef __cplusplus +using namespace ::roaring::internal; + +extern "C" { +namespace roaring { +namespace api64 { +#endif + +// TODO: Figure out how to keep art_t from being exposed in roaring64.h +// TODO: Iteration. +// * Need to create a container iterator which can be used across 32 and 64 bit +// bitmaps. +// * Iteration-based functions like roaring64_bitmap_intersect_with_range. +// TODO: Copy on write. +// TODO: Serialization. +// TODO: Error on failed allocation. + +// Splits the given uint64 key into high 48 bit and low 16 bit components. +// Expects high48_out to be of length 6. +static inline uint16_t split_key(uint64_t key, uint8_t high48_out[]) { + // Reverse byte order of the high 6 bytes. Not portable to big-endian + // systems! + uint64_t tmp = __bswap_64(key); + memcpy(high48_out, (uint8_t *)(&tmp), ART_KEY_BYTES); + return (uint16_t)key; +} + +// Recombines the high 48 bit and low 16 bit components into a uint64 key. +// Expects high48_out to be of length 6. +static inline uint64_t combine_key(const uint8_t high48[], uint16_t low16) { + uint64_t result = 0; + memcpy((uint8_t *)(&result), high48, ART_KEY_BYTES); + // Not portable to big-endian systems! + return __bswap_64(result) | low16; +} + +static inline uint64_t minimum(uint64_t a, uint64_t b) { + return (a < b) ? a : b; +} + +static inline leaf_t *create_leaf(container_t *container, uint8_t typecode) { + leaf_t *leaf = roaring_malloc(sizeof(leaf_t)); + leaf->container = container; + leaf->typecode = typecode; + return leaf; +} + +static inline leaf_t *copy_leaf_container(const leaf_t *leaf) { + leaf_t *result_leaf = roaring_malloc(sizeof(leaf_t)); + result_leaf->typecode = leaf->typecode; + // get_copy_of_container modifies the typecode passed in. + result_leaf->container = get_copy_of_container( + leaf->container, &result_leaf->typecode, /*copy_on_write=*/false); + return result_leaf; +} + +static inline void free_leaf(leaf_t *leaf) { roaring_free(leaf); } + +static inline int compare_high48(art_key_chunk_t key1[], + art_key_chunk_t key2[]) { + return art_compare_keys(key1, key2); +} + +roaring64_bitmap_t *roaring64_bitmap_create(void) { + roaring64_bitmap_t *r = roaring_malloc(sizeof(roaring64_bitmap_t)); + r->art.root = NULL; + r->flags = 0; + return r; +} + +void roaring64_bitmap_free(roaring64_bitmap_t *r) { + art_iterator_t it = art_create_iterator(); + art_init_iterator(&r->art, &it, /*first=*/true); + while (it.value != NULL) { + leaf_t *leaf = (leaf_t *)it.value; + container_free(leaf->container, leaf->typecode); + free_leaf(leaf); + art_iterator_next(&it); + } + art_free(&r->art); + roaring_free(r); +} + +roaring64_bitmap_t *roaring64_bitmap_copy(const roaring64_bitmap_t *r) { + roaring64_bitmap_t *result = roaring64_bitmap_create(); + + art_iterator_t it = art_create_iterator(); + art_init_iterator(&r->art, &it, /*first=*/true); + while (it.value != NULL) { + leaf_t *leaf = (leaf_t *)it.value; + uint8_t result_typecode = leaf->typecode; + container_t *result_container = get_copy_of_container( + leaf->container, &result_typecode, /*copy_on_write=*/false); + leaf_t *result_leaf = create_leaf(result_container, result_typecode); + art_insert(&result->art, it.key, (art_val_t *)result_leaf); + art_iterator_next(&it); + } + return result; +} + +roaring64_bitmap_t *roaring64_bitmap_from_range(uint64_t min, uint64_t max, + uint64_t step) { + if (step == 0 || max <= min) { + return NULL; + } + roaring64_bitmap_t *r = roaring64_bitmap_create(); + if (step >= (1 << 16)) { + // Only one value per container. + for (uint64_t value = min; value < max; value += step) { + roaring64_bitmap_add(r, value); + if (value > UINT64_MAX - step) { + break; + } + } + return r; + } + do { + uint64_t high_bits = min & 0xFFFFFFFFFFFF0000; + uint16_t container_min = min & 0xFFFF; + uint32_t container_max = (uint32_t)minimum(max - high_bits, 1 << 16); + + uint8_t typecode; + container_t *container = container_from_range( + &typecode, container_min, container_max, (uint16_t)step); + + uint8_t high48[ART_KEY_BYTES]; + split_key(min, high48); + leaf_t *leaf = create_leaf(container, typecode); + art_insert(&r->art, high48, (art_val_t *)leaf); + + uint64_t gap = container_max - container_min + step - 1; + uint64_t increment = gap - (gap % step); + if (min > UINT64_MAX - increment) { + break; + } + min += increment; + } while (min < max); + return r; +} + +roaring64_bitmap_t *roaring64_bitmap_of_ptr(size_t n_args, + const uint64_t *vals) { + roaring64_bitmap_t *r = roaring64_bitmap_create(); + roaring64_bitmap_add_many(r, n_args, vals); + return r; +} + +roaring64_bitmap_t *roaring64_bitmap_of(size_t n_args, ...) { + roaring64_bitmap_t *r = roaring64_bitmap_create(); + roaring64_bulk_context_t context = {0}; + va_list ap; + va_start(ap, n_args); + for (size_t i = 0; i < n_args; i++) { + uint64_t val = va_arg(ap, uint64_t); + roaring64_bitmap_add_bulk(r, &context, val); + } + va_end(ap); + return r; +} + +static inline leaf_t *containerptr_roaring64_bitmap_add(roaring64_bitmap_t *r, + uint8_t *high48, + uint16_t low16, + leaf_t *leaf) { + if (leaf != NULL) { + uint8_t typecode2; + container_t *container2 = + container_add(leaf->container, low16, leaf->typecode, &typecode2); + if (container2 != leaf->container) { + container_free(leaf->container, leaf->typecode); + leaf->container = container2; + leaf->typecode = typecode2; + } + return leaf; + } else { + array_container_t *ac = array_container_create(); + uint8_t typecode; + container_t *container = + container_add(ac, low16, ARRAY_CONTAINER_TYPE, &typecode); + assert(ac == container); + leaf = create_leaf(container, typecode); + art_insert(&r->art, high48, (art_val_t *)leaf); + return leaf; + } +} + +void roaring64_bitmap_add(roaring64_bitmap_t *r, uint64_t val) { + uint8_t high48[ART_KEY_BYTES]; + uint16_t low16 = split_key(val, high48); + leaf_t *leaf = (leaf_t *)art_find(&r->art, high48); + containerptr_roaring64_bitmap_add(r, high48, low16, leaf); +} + +bool roaring64_bitmap_add_checked(roaring64_bitmap_t *r, uint64_t val) { + uint8_t high48[ART_KEY_BYTES]; + uint16_t low16 = split_key(val, high48); + leaf_t *leaf = (leaf_t *)art_find(&r->art, high48); + + int old_cardinality = 0; + if (leaf != NULL) { + old_cardinality = + container_get_cardinality(leaf->container, leaf->typecode); + } + leaf = containerptr_roaring64_bitmap_add(r, high48, low16, leaf); + int new_cardinality = + container_get_cardinality(leaf->container, leaf->typecode); + return old_cardinality != new_cardinality; +} + +void roaring64_bitmap_add_bulk(roaring64_bitmap_t *r, + roaring64_bulk_context_t *context, + uint64_t val) { + uint8_t high48[ART_KEY_BYTES]; + uint16_t low16 = split_key(val, high48); + if (context->leaf != NULL && + compare_high48(context->high_bytes, high48) == 0) { + // We're at a container with the correct high bits. + uint8_t typecode2; + container_t *container2 = + container_add(context->leaf->container, low16, + context->leaf->typecode, &typecode2); + if (container2 != context->leaf->container) { + container_free(context->leaf->container, context->leaf->typecode); + context->leaf->container = container2; + context->leaf->typecode = typecode2; + } + } else { + // We're not positioned anywhere yet or the high bits of the key + // differ. + context->leaf = + containerptr_roaring64_bitmap_add(r, high48, low16, NULL); + context->low_bytes = low16; + memcpy(context->high_bytes, high48, ART_KEY_BYTES); + } +} + +void roaring64_bitmap_add_many(roaring64_bitmap_t *r, size_t n_args, + const uint64_t *vals) { + if (n_args == 0) { + return; + } + const uint64_t *end = vals + n_args; + roaring64_bulk_context_t context = {0}; + for (const uint64_t *current_val = vals; current_val != end; + current_val++) { + roaring64_bitmap_add_bulk(r, &context, *current_val); + } +} + +static inline void add_range_closed_at(art_t *art, uint8_t *high48, + uint16_t min, uint16_t max) { + leaf_t *leaf = (leaf_t *)art_find(art, high48); + if (leaf != NULL) { + uint8_t typecode2; + container_t *container2 = container_add_range( + leaf->container, leaf->typecode, min, max + 1, &typecode2); + if (container2 != leaf->container) { + container_free(leaf->container, leaf->typecode); + leaf->container = container2; + leaf->typecode = typecode2; + } + return; + } + uint8_t typecode; + container_t *container = container_range_of_ones(min, max + 1, &typecode); + leaf = create_leaf(container, typecode); + art_insert(art, high48, (art_val_t *)leaf); +} + +void roaring64_bitmap_add_range_closed(roaring64_bitmap_t *r, uint64_t min, + uint64_t max) { + if (min > max) { + return; + } + + art_t *art = &r->art; + uint8_t min_high48[ART_KEY_BYTES]; + uint16_t min_low16 = split_key(min, min_high48); + uint8_t max_high48[ART_KEY_BYTES]; + uint16_t max_low16 = split_key(max, max_high48); + if (compare_high48(min_high48, max_high48) == 0) { + // Only populate range within one container. + add_range_closed_at(art, min_high48, min_low16, max_low16); + return; + } + + // Populate a range across containers. Fill intermediate containers + // entirely. + add_range_closed_at(art, min_high48, min_low16, 0xffff); + uint64_t min_high_bits = min >> 16; + uint64_t max_high_bits = max >> 16; + for (uint64_t current = min_high_bits + 1; current < max_high_bits; + ++current) { + uint8_t current_high48[ART_KEY_BYTES]; + split_key(current << 16, current_high48); + add_range_closed_at(art, current_high48, 0, 0xffff); + } + add_range_closed_at(art, max_high48, 0, max_low16); +} + +bool roaring64_bitmap_contains(const roaring64_bitmap_t *r, uint64_t val) { + uint8_t high48[ART_KEY_BYTES]; + uint16_t low16 = split_key(val, high48); + leaf_t *leaf = (leaf_t *)art_find(&r->art, high48); + if (leaf != NULL) { + return container_contains(leaf->container, low16, leaf->typecode); + } + return false; +} + +bool roaring64_bitmap_contains_bulk(const roaring64_bitmap_t *r, + roaring64_bulk_context_t *context, + uint64_t val) { + uint8_t high48[ART_KEY_BYTES]; + uint16_t low16 = split_key(val, high48); + + if (context->leaf == NULL || context->high_bytes != high48) { + // We're not positioned anywhere yet or the high bits of the key + // differ. + leaf_t *leaf = (leaf_t *)art_find(&r->art, high48); + if (leaf == NULL) { + return false; + } + context->leaf = leaf; + memcpy(context->high_bytes, high48, ART_KEY_BYTES); + } + return container_contains(context->leaf->container, low16, + context->leaf->typecode); +} + +bool roaring64_bitmap_select(const roaring64_bitmap_t *r, uint64_t rank, + uint64_t *element) { + art_iterator_t it = art_create_iterator(); + art_init_iterator(&r->art, &it, /*first=*/true); + uint64_t start_rank = 0; + while (it.value != NULL) { + leaf_t *leaf = (leaf_t *)it.value; + uint64_t cardinality = + container_get_cardinality(leaf->container, leaf->typecode); + if (start_rank + cardinality > rank) { + uint32_t uint32_start = 0; + uint32_t uint32_rank = rank - start_rank; + uint32_t uint32_element = 0; + if (container_select(leaf->container, leaf->typecode, &uint32_start, + uint32_rank, &uint32_element)) { + *element = combine_key(it.key, (uint16_t)uint32_element); + return true; + } + return false; + } + start_rank += cardinality; + art_iterator_next(&it); + } + return false; +} + +uint64_t roaring64_bitmap_rank(const roaring64_bitmap_t *r, uint64_t val) { + uint8_t high48[ART_KEY_BYTES]; + uint16_t low16 = split_key(val, high48); + + art_iterator_t it = art_create_iterator(); + art_init_iterator(&r->art, &it, /*first=*/true); + uint64_t rank = 0; + while (it.value != NULL) { + leaf_t *leaf = (leaf_t *)it.value; + int compare_result = compare_high48(it.key, high48); + if (compare_result < 0) { + rank += container_get_cardinality(leaf->container, leaf->typecode); + } else if (compare_result == 0) { + return rank + + container_rank(leaf->container, leaf->typecode, low16); + } else { + return rank; + } + art_iterator_next(&it); + } + return rank; +} + +bool roaring64_bitmap_get_index(const roaring64_bitmap_t *r, uint64_t val, + uint64_t *out_index) { + uint8_t high48[ART_KEY_BYTES]; + uint16_t low16 = split_key(val, high48); + + art_iterator_t it = art_create_iterator(); + art_init_iterator(&r->art, &it, /*first=*/true); + uint64_t index = 0; + while (it.value != NULL) { + leaf_t *leaf = (leaf_t *)it.value; + int compare_result = compare_high48(it.key, high48); + if (compare_result < 0) { + index += container_get_cardinality(leaf->container, leaf->typecode); + } else if (compare_result == 0) { + int index16 = + container_get_index(leaf->container, leaf->typecode, low16); + if (index16 < 0) { + return false; + } + *out_index = index + index16; + return true; + } else { + return false; + } + art_iterator_next(&it); + } + return false; +} + +static inline leaf_t *containerptr_roaring64_bitmap_remove( + roaring64_bitmap_t *r, uint8_t *high48, uint16_t low16, leaf_t *leaf) { + if (leaf == NULL) { + return NULL; + } + + container_t *container = leaf->container; + uint8_t typecode = leaf->typecode; + uint8_t typecode2; + container_t *container2 = + container_remove(container, low16, typecode, &typecode2); + if (container2 != container) { + container_free(container, typecode); + leaf->container = container2; + leaf->typecode = typecode2; + } + if (!container_nonzero_cardinality(container2, typecode2)) { + container_free(container2, typecode2); + leaf = (leaf_t *)art_erase(&r->art, high48); + if (leaf != NULL) { + free_leaf(leaf); + } + return NULL; + } + return leaf; +} + +void roaring64_bitmap_remove(roaring64_bitmap_t *r, uint64_t val) { + art_t *art = &r->art; + uint8_t high48[ART_KEY_BYTES]; + uint16_t low16 = split_key(val, high48); + + leaf_t *leaf = (leaf_t *)art_find(art, high48); + containerptr_roaring64_bitmap_remove(r, high48, low16, leaf); +} + +bool roaring64_bitmap_remove_checked(roaring64_bitmap_t *r, uint64_t val) { + art_t *art = &r->art; + uint8_t high48[ART_KEY_BYTES]; + uint16_t low16 = split_key(val, high48); + leaf_t *leaf = (leaf_t *)art_find(art, high48); + + if (leaf == NULL) { + return false; + } + int old_cardinality = + container_get_cardinality(leaf->container, leaf->typecode); + leaf = containerptr_roaring64_bitmap_remove(r, high48, low16, leaf); + if (leaf == NULL) { + return true; + } + int new_cardinality = + container_get_cardinality(leaf->container, leaf->typecode); + return new_cardinality != old_cardinality; +} + +void roaring64_bitmap_remove_bulk(roaring64_bitmap_t *r, + roaring64_bulk_context_t *context, + uint64_t val) { + art_t *art = &r->art; + uint8_t high48[ART_KEY_BYTES]; + uint16_t low16 = split_key(val, high48); + if (context->leaf != NULL && + compare_high48(context->high_bytes, high48) == 0) { + // We're at a container with the correct high bits. + uint8_t typecode2; + container_t *container2 = + container_remove(context->leaf->container, low16, + context->leaf->typecode, &typecode2); + if (container2 != context->leaf->container) { + container_free(context->leaf->container, context->leaf->typecode); + context->leaf->container = container2; + context->leaf->typecode = typecode2; + } + if (!container_nonzero_cardinality(container2, typecode2)) { + leaf_t *leaf = (leaf_t *)art_erase(art, high48); + container_free(container2, typecode2); + free_leaf(leaf); + } + } else { + // We're not positioned anywhere yet or the high bits of the key + // differ. + leaf_t *leaf = (leaf_t *)art_find(art, high48); + context->leaf = + containerptr_roaring64_bitmap_remove(r, high48, low16, leaf); + context->low_bytes = low16; + memcpy(context->high_bytes, high48, ART_KEY_BYTES); + } +} + +void roaring64_bitmap_remove_many(roaring64_bitmap_t *r, size_t n_args, + const uint64_t *vals) { + if (n_args == 0) { + return; + } + const uint64_t *end = vals + n_args; + roaring64_bulk_context_t context = {0}; + for (const uint64_t *current_val = vals; current_val != end; + current_val++) { + roaring64_bitmap_remove_bulk(r, &context, *current_val); + } +} + +static inline void remove_range_closed_at(art_t *art, uint8_t *high48, + uint16_t min, uint16_t max) { + leaf_t *leaf = (leaf_t *)art_find(art, high48); + if (leaf == NULL) { + return; + } + uint8_t typecode2; + // container_add_range is exclusive but container_remove_range is + // inclusive... + container_t *container2 = container_remove_range( + leaf->container, leaf->typecode, min, max, &typecode2); + if (container2 != leaf->container) { + container_free(leaf->container, leaf->typecode); + leaf->container = container2; + leaf->typecode = typecode2; + } + if (!container_nonzero_cardinality(container2, typecode2)) { + art_erase(art, high48); + container_free(container2, typecode2); + free_leaf(leaf); + } +} + +void roaring64_bitmap_remove_range_closed(roaring64_bitmap_t *r, uint64_t min, + uint64_t max) { + if (min > max) { + return; + } + + art_t *art = &r->art; + uint8_t min_high48[ART_KEY_BYTES]; + uint16_t min_low16 = split_key(min, min_high48); + uint8_t max_high48[ART_KEY_BYTES]; + uint16_t max_low16 = split_key(max, max_high48); + if (compare_high48(min_high48, max_high48) == 0) { + // Only remove a range within one container. + remove_range_closed_at(art, min_high48, min_low16, max_low16); + return; + } + + // Remove a range across containers. Remove intermediate containers + // entirely. + remove_range_closed_at(art, min_high48, min_low16, 0xffff); + uint64_t min_high_bits = min >> 16; + uint64_t max_high_bits = max >> 16; + for (uint64_t current = min_high_bits + 1; current < max_high_bits; + ++current) { + uint8_t current_high48[ART_KEY_BYTES]; + split_key(current << 16, current_high48); + leaf_t *leaf = (leaf_t *)art_erase(art, current_high48); + if (leaf != NULL) { + container_free(leaf->container, leaf->typecode); + free_leaf(leaf); + } + } + remove_range_closed_at(art, max_high48, 0, max_low16); +} + +uint64_t roaring64_bitmap_get_cardinality(const roaring64_bitmap_t *r) { + art_iterator_t it = art_create_iterator(); + art_init_iterator(&r->art, &it, /*first=*/true); + uint64_t cardinality = 0; + while (it.value != NULL) { + leaf_t *leaf = (leaf_t *)it.value; + cardinality += + container_get_cardinality(leaf->container, leaf->typecode); + art_iterator_next(&it); + } + return cardinality; +} + +uint64_t roaring64_bitmap_range_cardinality(const roaring64_bitmap_t *r, + uint64_t min, uint64_t max) { + max--; // A closed range is easier to work with. + + uint64_t cardinality = 0; + uint8_t min_high48[ART_KEY_BYTES]; + uint16_t min_low16 = split_key(min, min_high48); + uint8_t max_high48[ART_KEY_BYTES]; + uint16_t max_low16 = split_key(max, max_high48); + + art_iterator_t it = art_lower_bound(&r->art, min_high48); + while (it.value != NULL) { + int max_compare_result = compare_high48(it.key, max_high48); + if (max_compare_result > 0) { + // We're outside the range. + break; + } + + leaf_t *leaf = (leaf_t *)it.value; + if (max_compare_result == 0) { + // We're at the max high key, add only the range up to the low + // 16 bits of max. + cardinality += + container_rank(leaf->container, leaf->typecode, max_low16); + } else { + // We're not yet at the max high key, add the full container + // range. + cardinality += + container_get_cardinality(leaf->container, leaf->typecode); + } + if (compare_high48(it.key, min_high48) == 0 && min_low16 > 0) { + // We're at the min high key, remove the range up to the low 16 + // bits of min. + cardinality -= + container_rank(leaf->container, leaf->typecode, min_low16 - 1); + } + art_iterator_next(&it); + } + return cardinality; +} + +bool roaring64_bitmap_is_empty(const roaring64_bitmap_t *r) { + return art_is_empty(&r->art); +} + +uint64_t roaring64_bitmap_minimum(const roaring64_bitmap_t *r) { + art_iterator_t it = art_create_iterator(); + art_init_iterator(&r->art, &it, /*first=*/true); + if (it.value == NULL) { + return UINT64_MAX; + } + leaf_t *leaf = (leaf_t *)it.value; + return combine_key(it.key, + container_minimum(leaf->container, leaf->typecode)); +} + +uint64_t roaring64_bitmap_maximum(const roaring64_bitmap_t *r) { + art_iterator_t it = art_create_iterator(); + art_init_iterator(&r->art, &it, /*first=*/false); + if (it.value == NULL) { + return 0; + } + leaf_t *leaf = (leaf_t *)it.value; + return combine_key(it.key, + container_maximum(leaf->container, leaf->typecode)); +} + +bool roaring64_bitmap_run_optimize(roaring64_bitmap_t *r) { + art_iterator_t it = art_create_iterator(); + art_init_iterator(&r->art, &it, /*first=*/true); + bool has_run_container = false; + while (it.value != NULL) { + leaf_t *leaf = (leaf_t *)it.value; + uint8_t new_typecode; + // We don't need to free the existing container if a new one was + // created, convert_run_optimize does that internally. + leaf->container = convert_run_optimize(leaf->container, leaf->typecode, + &new_typecode); + leaf->typecode = new_typecode; + has_run_container |= new_typecode == RUN_CONTAINER_TYPE; + art_iterator_next(&it); + } + return has_run_container; +} + +size_t roaring64_bitmap_size_in_bytes(const roaring64_bitmap_t *r) { + size_t size = art_size_in_bytes(&r->art); + art_iterator_t it = art_create_iterator(); + art_init_iterator(&r->art, &it, /*first=*/true); + while (it.value != NULL) { + leaf_t *leaf = (leaf_t *)it.value; + size += sizeof(leaf_t); + size += container_size_in_bytes(leaf->container, leaf->typecode); + art_iterator_next(&it); + } + return size; +} + +bool roaring64_bitmap_equals(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + art_iterator_t it1 = art_create_iterator(); + art_init_iterator(&r1->art, &it1, /*first=*/true); + art_iterator_t it2 = art_create_iterator(); + art_init_iterator(&r2->art, &it2, /*first=*/true); + + while (it1.value != NULL && it2.value != NULL) { + if (compare_high48(it1.key, it2.key) != 0) { + return false; + } + leaf_t *leaf1 = (leaf_t *)it1.value; + leaf_t *leaf2 = (leaf_t *)it2.value; + if (!container_equals(leaf1->container, leaf1->typecode, + leaf2->container, leaf2->typecode)) { + return false; + } + art_iterator_next(&it1); + art_iterator_next(&it2); + } + return it1.value == NULL && it2.value == NULL; +} + +bool roaring64_bitmap_is_subset(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + art_iterator_t it1 = art_create_iterator(); + art_init_iterator(&r1->art, &it1, /*first=*/true); + art_iterator_t it2 = art_create_iterator(); + art_init_iterator(&r2->art, &it2, /*first=*/true); + + while (it1.value != NULL) { + bool it2_present = it2.value != NULL; + + int compare_result = 0; + if (it2_present) { + compare_result = compare_high48(it1.key, it2.key); + if (compare_result == 0) { + leaf_t *leaf1 = (leaf_t *)it1.value; + leaf_t *leaf2 = (leaf_t *)it2.value; + if (!container_is_subset(leaf1->container, leaf1->typecode, + leaf2->container, leaf2->typecode)) { + return false; + } + art_iterator_next(&it1); + art_iterator_next(&it2); + } + } + if (!it2_present || compare_result < 0) { + return false; + } else if (compare_result > 0) { + art_iterator_lower_bound(&it2, it1.key); + } + } + return true; +} + +bool roaring64_bitmap_is_strict_subset(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + return roaring64_bitmap_get_cardinality(r1) < + roaring64_bitmap_get_cardinality(r2) && + roaring64_bitmap_is_subset(r1, r2); +} + +roaring64_bitmap_t *roaring64_bitmap_and(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + roaring64_bitmap_t *result = roaring64_bitmap_create(); + + art_iterator_t it1 = art_create_iterator(); + art_init_iterator(&r1->art, &it1, /*first=*/true); + art_iterator_t it2 = art_create_iterator(); + art_init_iterator(&r2->art, &it2, /*first=*/true); + + while (it1.value != NULL && it2.value != NULL) { + // Cases: + // 1. it1 < it2 -> it1++ + // 2. it1 == it1 -> output it1 & it2, it1++, it2++ + // 3. it1 > it2 -> it2++ + int compare_result = compare_high48(it1.key, it2.key); + if (compare_result == 0) { + // Case 2: iterators at the same high key position. + leaf_t *result_leaf = roaring_malloc(sizeof(leaf_t)); + leaf_t *leaf1 = (leaf_t *)it1.value; + leaf_t *leaf2 = (leaf_t *)it2.value; + result_leaf->container = container_and( + leaf1->container, leaf1->typecode, leaf2->container, + leaf2->typecode, &result_leaf->typecode); + + if (container_nonzero_cardinality(result_leaf->container, + result_leaf->typecode)) { + art_insert(&result->art, it1.key, (art_val_t *)result_leaf); + } else { + container_free(result_leaf->container, result_leaf->typecode); + free_leaf(result_leaf); + } + art_iterator_next(&it1); + art_iterator_next(&it2); + } else if (compare_result < 0) { + // Case 1: it1 is before it2. + art_iterator_lower_bound(&it1, it2.key); + } else { + // Case 3: it2 is before it1. + art_iterator_lower_bound(&it2, it1.key); + } + } + return result; +} + +uint64_t roaring64_bitmap_and_cardinality(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + uint64_t result = 0; + + art_iterator_t it1 = art_create_iterator(); + art_init_iterator(&r1->art, &it1, /*first=*/true); + art_iterator_t it2 = art_create_iterator(); + art_init_iterator(&r2->art, &it2, /*first=*/true); + + while (it1.value != NULL && it2.value != NULL) { + // Cases: + // 1. it1 < it2 -> it1++ + // 2. it1 == it1 -> output cardinaltiy it1 & it2, it1++, it2++ + // 3. it1 > it2 -> it2++ + int compare_result = compare_high48(it1.key, it2.key); + if (compare_result == 0) { + // Case 2: iterators at the same high key position. + leaf_t *leaf1 = (leaf_t *)it1.value; + leaf_t *leaf2 = (leaf_t *)it2.value; + result += + container_and_cardinality(leaf1->container, leaf1->typecode, + leaf2->container, leaf2->typecode); + art_iterator_next(&it1); + art_iterator_next(&it2); + } else if (compare_result < 0) { + // Case 1: it1 is before it2. + art_iterator_lower_bound(&it1, it2.key); + } else { + // Case 3: it2 is before it1. + art_iterator_lower_bound(&it2, it1.key); + } + } + return result; +} + +// Inplace and (modifies its first argument). +void roaring64_bitmap_and_inplace(roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + if (r1 == r2) { + return; + } + art_iterator_t it1 = art_create_iterator(); + art_init_iterator(&r1->art, &it1, /*first=*/true); + art_iterator_t it2 = art_create_iterator(); + art_init_iterator(&r2->art, &it2, /*first=*/true); + + while (it1.value != NULL) { + // Cases: + // 1. !it2_present -> erase it1 + // 2. it2_present + // a. it1 < it2 -> erase it1 + // b. it1 == it2 -> output it1 & it2, it1++, it2++ + // c. it1 > it2 -> it2++ + bool it2_present = it2.value != NULL; + int compare_result = 0; + if (it2_present) { + compare_result = compare_high48(it1.key, it2.key); + if (compare_result == 0) { + // Case 2a: iterators at the same high key position. + leaf_t *leaf1 = (leaf_t *)it1.value; + leaf_t *leaf2 = (leaf_t *)it2.value; + + // We do the computation "in place" only when c1 is not a + // shared container. Rationale: using a shared container + // safely with in place computation would require making a + // copy and then doing the computation in place which is + // likely less efficient than avoiding in place entirely and + // always generating a new container. + uint8_t typecode2; + container_t *container2; + if (leaf1->typecode == SHARED_CONTAINER_TYPE) { + container2 = container_and( + leaf1->container, leaf1->typecode, leaf2->container, + leaf2->typecode, &typecode2); + } else { + container2 = container_iand( + leaf1->container, leaf1->typecode, leaf2->container, + leaf2->typecode, &typecode2); + } + + if (container2 != leaf1->container) { + container_free(leaf1->container, leaf1->typecode); + leaf1->container = container2; + leaf1->typecode = typecode2; + } + if (!container_nonzero_cardinality(container2, typecode2)) { + container_free(container2, typecode2); + art_iterator_erase(&r1->art, &it1); + free_leaf(leaf1); + } else { + // Only advance the iterator if we didn't delete the + // leaf, as erasing advances by itself. + art_iterator_next(&it1); + } + art_iterator_next(&it2); + } + } + + if (!it2_present || compare_result < 0) { + // Cases 1 and 3a: it1 is the only iterator or is before it2. + leaf_t *leaf = (leaf_t *)art_iterator_erase(&r1->art, &it1); + assert(leaf != NULL); + container_free(leaf->container, leaf->typecode); + free_leaf(leaf); + } else if (compare_result > 0) { + // Case 2c: it1 is after it2. + art_iterator_lower_bound(&it2, it1.key); + } + } +} + +bool roaring64_bitmap_intersect(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + bool intersect = false; + art_iterator_t it1 = art_create_iterator(); + art_init_iterator(&r1->art, &it1, /*first=*/true); + art_iterator_t it2 = art_create_iterator(); + art_init_iterator(&r2->art, &it2, /*first=*/true); + + while (it1.value != NULL && it2.value != NULL) { + // Cases: + // 1. it1 < it2 -> it1++ + // 2. it1 == it1 -> intersect |= it1 & it2, it1++, it2++ + // 3. it1 > it2 -> it2++ + int compare_result = compare_high48(it1.key, it2.key); + if (compare_result == 0) { + // Case 2: iterators at the same high key position. + leaf_t *leaf1 = (leaf_t *)it1.value; + leaf_t *leaf2 = (leaf_t *)it2.value; + intersect |= container_intersect(leaf1->container, leaf1->typecode, + leaf2->container, leaf2->typecode); + art_iterator_next(&it1); + art_iterator_next(&it2); + } else if (compare_result < 0) { + // Case 1: it1 is before it2. + art_iterator_lower_bound(&it1, it2.key); + } else { + // Case 3: it2 is before it1. + art_iterator_lower_bound(&it2, it1.key); + } + } + return intersect; +} + +double roaring64_bitmap_jaccard_index(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + uint64_t c1 = roaring64_bitmap_get_cardinality(r1); + uint64_t c2 = roaring64_bitmap_get_cardinality(r2); + uint64_t inter = roaring64_bitmap_and_cardinality(r1, r2); + return (double)inter / (double)(c1 + c2 - inter); +} + +roaring64_bitmap_t *roaring64_bitmap_or(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + roaring64_bitmap_t *result = roaring64_bitmap_create(); + + art_iterator_t it1 = art_create_iterator(); + art_init_iterator(&r1->art, &it1, /*first=*/true); + art_iterator_t it2 = art_create_iterator(); + art_init_iterator(&r2->art, &it2, /*first=*/true); + + while (it1.value != NULL || it2.value != NULL) { + bool it1_present = it1.value != NULL; + bool it2_present = it2.value != NULL; + + // Cases: + // 1. it1_present && !it2_present -> output it1, it1++ + // 2. !it1_present && it2_present -> output it2, it2++ + // 3. it1_present && it2_present + // a. it1 < it2 -> output it1, it1++ + // b. it1 == it2 -> output it1 | it2, it1++, it2++ + // c. it1 > it2 -> output it2, it2++ + int compare_result = 0; + if (it1_present && it2_present) { + compare_result = compare_high48(it1.key, it2.key); + if (compare_result == 0) { + // Case 3b: iterators at the same high key position. + leaf_t *leaf1 = (leaf_t *)it1.value; + leaf_t *leaf2 = (leaf_t *)it2.value; + leaf_t *result_leaf = roaring_malloc(sizeof(leaf_t)); + result_leaf->container = container_or( + leaf1->container, leaf1->typecode, leaf2->container, + leaf2->typecode, &result_leaf->typecode); + art_insert(&result->art, it1.key, (art_val_t *)result_leaf); + art_iterator_next(&it1); + art_iterator_next(&it2); + } + } + if ((it1_present && !it2_present) || compare_result < 0) { + // Cases 1 and 3a: it1 is the only iterator or is before it2. + leaf_t *result_leaf = copy_leaf_container((leaf_t *)it1.value); + art_insert(&result->art, it1.key, (art_val_t *)result_leaf); + art_iterator_next(&it1); + } else if ((!it1_present && it2_present) || compare_result > 0) { + // Cases 2 and 3c: it2 is the only iterator or is before it1. + leaf_t *result_leaf = copy_leaf_container((leaf_t *)it2.value); + art_insert(&result->art, it2.key, (art_val_t *)result_leaf); + art_iterator_next(&it2); + } + } + return result; +} + +uint64_t roaring64_bitmap_or_cardinality(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + uint64_t c1 = roaring64_bitmap_get_cardinality(r1); + uint64_t c2 = roaring64_bitmap_get_cardinality(r2); + uint64_t inter = roaring64_bitmap_and_cardinality(r1, r2); + return c1 + c2 - inter; +} + +void roaring64_bitmap_or_inplace(roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + if (r1 == r2) { + return; + } + art_iterator_t it1 = art_create_iterator(); + art_init_iterator(&r1->art, &it1, /*first=*/true); + art_iterator_t it2 = art_create_iterator(); + art_init_iterator(&r2->art, &it2, /*first=*/true); + + while (it1.value != NULL || it2.value != NULL) { + bool it1_present = it1.value != NULL; + bool it2_present = it2.value != NULL; + + // Cases: + // 1. it1_present && !it2_present -> it1++ + // 2. !it1_present && it2_present -> add it2, it2++ + // 3. it1_present && it2_present + // a. it1 < it2 -> it1++ + // b. it1 == it2 -> it1 | it2, it1++, it2++ + // c. it1 > it2 -> add it2, it2++ + int compare_result = 0; + if (it1_present && it2_present) { + compare_result = compare_high48(it1.key, it2.key); + if (compare_result == 0) { + // Case 3b: iterators at the same high key position. + leaf_t *leaf1 = (leaf_t *)it1.value; + leaf_t *leaf2 = (leaf_t *)it2.value; + uint8_t typecode2; + container_t *container2; + if (leaf1->typecode == SHARED_CONTAINER_TYPE) { + container2 = container_or(leaf1->container, leaf1->typecode, + leaf2->container, leaf2->typecode, + &typecode2); + } else { + container2 = container_ior( + leaf1->container, leaf1->typecode, leaf2->container, + leaf2->typecode, &typecode2); + } + if (container2 != leaf1->container) { + container_free(leaf1->container, leaf1->typecode); + leaf1->container = container2; + leaf1->typecode = typecode2; + } + art_iterator_next(&it1); + art_iterator_next(&it2); + } + } + if ((it1_present && !it2_present) || compare_result < 0) { + // Cases 1 and 3a: it1 is the only iterator or is before it2. + art_iterator_next(&it1); + } else if ((!it1_present && it2_present) || compare_result > 0) { + // Cases 2 and 3c: it2 is the only iterator or is before it1. + leaf_t *result_leaf = copy_leaf_container((leaf_t *)it2.value); + art_iterator_insert(&r1->art, &it1, it2.key, + (art_val_t *)result_leaf); + art_iterator_next(&it2); + } + } +} + +roaring64_bitmap_t *roaring64_bitmap_xor(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + roaring64_bitmap_t *result = roaring64_bitmap_create(); + + art_iterator_t it1 = art_create_iterator(); + art_init_iterator(&r1->art, &it1, /*first=*/true); + art_iterator_t it2 = art_create_iterator(); + art_init_iterator(&r2->art, &it2, /*first=*/true); + + while (it1.value != NULL || it2.value != NULL) { + bool it1_present = it1.value != NULL; + bool it2_present = it2.value != NULL; + + // Cases: + // 1. it1_present && !it2_present -> output it1, it1++ + // 2. !it1_present && it2_present -> output it2, it2++ + // 3. it1_present && it2_present + // a. it1 < it2 -> output it1, it1++ + // b. it1 == it2 -> output it1 ^ it2, it1++, it2++ + // c. it1 > it2 -> output it2, it2++ + int compare_result = 0; + if (it1_present && it2_present) { + compare_result = compare_high48(it1.key, it2.key); + if (compare_result == 0) { + // Case 3b: iterators at the same high key position. + leaf_t *leaf1 = (leaf_t *)it1.value; + leaf_t *leaf2 = (leaf_t *)it2.value; + leaf_t *result_leaf = roaring_malloc(sizeof(leaf_t)); + result_leaf->container = container_xor( + leaf1->container, leaf1->typecode, leaf2->container, + leaf2->typecode, &result_leaf->typecode); + if (container_nonzero_cardinality(result_leaf->container, + result_leaf->typecode)) { + art_insert(&result->art, it1.key, (art_val_t *)result_leaf); + } else { + container_free(result_leaf->container, + result_leaf->typecode); + free_leaf(result_leaf); + } + art_iterator_next(&it1); + art_iterator_next(&it2); + } + } + if ((it1_present && !it2_present) || compare_result < 0) { + // Cases 1 and 3a: it1 is the only iterator or is before it2. + leaf_t *result_leaf = copy_leaf_container((leaf_t *)it1.value); + art_insert(&result->art, it1.key, (art_val_t *)result_leaf); + art_iterator_next(&it1); + } else if ((!it1_present && it2_present) || compare_result > 0) { + // Cases 2 and 3c: it2 is the only iterator or is before it1. + leaf_t *result_leaf = copy_leaf_container((leaf_t *)it2.value); + art_insert(&result->art, it2.key, (art_val_t *)result_leaf); + art_iterator_next(&it2); + } + } + return result; +} + +uint64_t roaring64_bitmap_xor_cardinality(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + uint64_t c1 = roaring64_bitmap_get_cardinality(r1); + uint64_t c2 = roaring64_bitmap_get_cardinality(r2); + uint64_t inter = roaring64_bitmap_and_cardinality(r1, r2); + return c1 + c2 - 2 * inter; +} + +void roaring64_bitmap_xor_inplace(roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + assert(r1 != r2); + art_iterator_t it1 = art_create_iterator(); + art_init_iterator(&r1->art, &it1, /*first=*/true); + art_iterator_t it2 = art_create_iterator(); + art_init_iterator(&r2->art, &it2, /*first=*/true); + + while (it1.value != NULL || it2.value != NULL) { + bool it1_present = it1.value != NULL; + bool it2_present = it2.value != NULL; + + // Cases: + // 1. it1_present && !it2_present -> it1++ + // 2. !it1_present && it2_present -> add it2, it2++ + // 3. it1_present && it2_present + // a. it1 < it2 -> it1++ + // b. it1 == it2 -> it1 ^ it2, it1++, it2++ + // c. it1 > it2 -> add it2, it2++ + int compare_result = 0; + if (it1_present && it2_present) { + compare_result = compare_high48(it1.key, it2.key); + if (compare_result == 0) { + // Case 3b: iterators at the same high key position. + leaf_t *leaf1 = (leaf_t *)it1.value; + leaf_t *leaf2 = (leaf_t *)it2.value; + container_t *container1 = leaf1->container; + uint8_t typecode1 = leaf1->typecode; + uint8_t typecode2; + container_t *container2; + if (leaf1->typecode == SHARED_CONTAINER_TYPE) { + container2 = container_xor( + leaf1->container, leaf1->typecode, leaf2->container, + leaf2->typecode, &typecode2); + if (container2 != container1) { + // We only free when doing container_xor, not + // container_ixor, as ixor frees the original + // internally. + container_free(container1, typecode1); + } + } else { + container2 = container_ixor( + leaf1->container, leaf1->typecode, leaf2->container, + leaf2->typecode, &typecode2); + } + leaf1->container = container2; + leaf1->typecode = typecode2; + + if (!container_nonzero_cardinality(container2, typecode2)) { + container_free(container2, typecode2); + art_iterator_erase(&r1->art, &it1); + free_leaf(leaf1); + } else { + // Only advance the iterator if we didn't delete the + // leaf, as erasing advances by itself. + art_iterator_next(&it1); + } + art_iterator_next(&it2); + } + } + if ((it1_present && !it2_present) || compare_result < 0) { + // Cases 1 and 3a: it1 is the only iterator or is before it2. + art_iterator_next(&it1); + } else if ((!it1_present && it2_present) || compare_result > 0) { + // Cases 2 and 3c: it2 is the only iterator or is before it1. + leaf_t *result_leaf = copy_leaf_container((leaf_t *)it2.value); + if (it1_present) { + art_iterator_insert(&r1->art, &it1, it2.key, + (art_val_t *)result_leaf); + art_iterator_next(&it1); + } else { + art_insert(&r1->art, it2.key, (art_val_t *)result_leaf); + } + art_iterator_next(&it2); + } + } +} + +roaring64_bitmap_t *roaring64_bitmap_andnot(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + roaring64_bitmap_t *result = roaring64_bitmap_create(); + + art_iterator_t it1 = art_create_iterator(); + art_init_iterator(&r1->art, &it1, /*first=*/true); + art_iterator_t it2 = art_create_iterator(); + art_init_iterator(&r2->art, &it2, /*first=*/true); + + while (it1.value != NULL) { + // Cases: + // 1. it1_present && !it2_present -> output it1, it1++ + // 2. it1_present && it2_present + // a. it1 < it2 -> output it1, it1++ + // b. it1 == it2 -> output it1 - it2, it1++, it2++ + // c. it1 > it2 -> it2++ + bool it2_present = it2.value != NULL; + int compare_result = 0; + if (it2_present) { + compare_result = compare_high48(it1.key, it2.key); + if (compare_result == 0) { + // Case 2b: iterators at the same high key position. + leaf_t *result_leaf = roaring_malloc(sizeof(leaf_t)); + leaf_t *leaf1 = (leaf_t *)it1.value; + leaf_t *leaf2 = (leaf_t *)it2.value; + result_leaf->container = container_andnot( + leaf1->container, leaf1->typecode, leaf2->container, + leaf2->typecode, &result_leaf->typecode); + + if (container_nonzero_cardinality(result_leaf->container, + result_leaf->typecode)) { + art_insert(&result->art, it1.key, (art_val_t *)result_leaf); + } else { + container_free(result_leaf->container, + result_leaf->typecode); + free_leaf(result_leaf); + } + art_iterator_next(&it1); + art_iterator_next(&it2); + } + } + if (!it2_present || compare_result < 0) { + // Cases 1 and 2a: it1 is the only iterator or is before it2. + leaf_t *result_leaf = copy_leaf_container((leaf_t *)it1.value); + art_insert(&result->art, it1.key, (art_val_t *)result_leaf); + art_iterator_next(&it1); + } else if (compare_result > 0) { + // Case 2c: it1 is after it2. + art_iterator_next(&it2); + } + } + return result; +} + +uint64_t roaring64_bitmap_andnot_cardinality(const roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + uint64_t c1 = roaring64_bitmap_get_cardinality(r1); + uint64_t inter = roaring64_bitmap_and_cardinality(r1, r2); + return c1 - inter; +} + +void roaring64_bitmap_andnot_inplace(roaring64_bitmap_t *r1, + const roaring64_bitmap_t *r2) { + art_iterator_t it1 = art_create_iterator(); + art_init_iterator(&r1->art, &it1, /*first=*/true); + art_iterator_t it2 = art_create_iterator(); + art_init_iterator(&r2->art, &it2, /*first=*/true); + + while (it1.value != NULL) { + // Cases: + // 1. it1_present && !it2_present -> it1++ + // 2. it1_present && it2_present + // a. it1 < it2 -> it1++ + // b. it1 == it2 -> it1 - it2, it1++, it2++ + // c. it1 > it2 -> it2++ + bool it2_present = it2.value != NULL; + int compare_result = 0; + if (it2_present) { + compare_result = compare_high48(it1.key, it2.key); + if (compare_result == 0) { + // Case 2b: iterators at the same high key position. + leaf_t *leaf1 = (leaf_t *)it1.value; + leaf_t *leaf2 = (leaf_t *)it2.value; + container_t *container1 = leaf1->container; + uint8_t typecode1 = leaf1->typecode; + uint8_t typecode2; + container_t *container2; + if (leaf1->typecode == SHARED_CONTAINER_TYPE) { + container2 = container_andnot( + leaf1->container, leaf1->typecode, leaf2->container, + leaf2->typecode, &typecode2); + } else { + container2 = container_iandnot( + leaf1->container, leaf1->typecode, leaf2->container, + leaf2->typecode, &typecode2); + } + if (container2 != container1) { + container_free(container1, typecode1); + leaf1->container = container2; + leaf1->typecode = typecode2; + } + + if (!container_nonzero_cardinality(container2, typecode2)) { + container_free(container2, typecode2); + art_iterator_erase(&r1->art, &it1); + free_leaf(leaf1); + } else { + // Only advance the iterator if we didn't delete the + // leaf, as erasing advances by itself. + art_iterator_next(&it1); + } + art_iterator_next(&it2); + } + } + if (!it2_present || compare_result < 0) { + // Cases 1 and 2a: it1 is the only iterator or is before it2. + art_iterator_next(&it1); + } else if (compare_result > 0) { + // Case 2c: it1 is after it2. + art_iterator_next(&it2); + } + } +} + +bool roaring64_bitmap_iterate(const roaring64_bitmap_t *r, + roaring_iterator64 iterator, void *ptr) { + art_iterator_t it = art_create_iterator(); + art_init_iterator(&r->art, &it, /*first=*/true); + while (it.value != NULL) { + uint64_t high48 = combine_key(it.key, 0); + uint64_t high32 = high48 & 0xFFFFFFFF00000000; + uint32_t low32 = high48; + leaf_t *leaf = (leaf_t *)it.value; + if (!container_iterate64(leaf->container, leaf->typecode, low32, + iterator, high32, ptr)) { + return false; + } + art_iterator_next(&it); + } + return true; +} + +#ifdef __cplusplus +} // extern "C" +} // namespace roaring +} // namespace api +#endif diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 673a7bd7f..69ded4048 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -21,6 +21,7 @@ add_c_test(robust_deserialization_unit) add_c_test(container_comparison_unit) add_c_test(add_offset) add_cpp_test(art_unit) +add_cpp_test(roaring64_unit) find_package(Threads) if(Threads_FOUND) message(STATUS "Your system supports threads.") diff --git a/tests/roaring64_unit.cpp b/tests/roaring64_unit.cpp new file mode 100644 index 000000000..868b2df33 --- /dev/null +++ b/tests/roaring64_unit.cpp @@ -0,0 +1,946 @@ +#include + +#include +#include +#include +#include +#include + +#include "test.h" + +using namespace roaring::api; + +namespace { + +DEFINE_TEST(test_copy) { + roaring64_bitmap_t* r1 = roaring64_bitmap_create(); + + roaring64_bitmap_add(r1, 0); + roaring64_bitmap_add(r1, 10000); + roaring64_bitmap_add(r1, 200000); + + roaring64_bitmap_t* r2 = roaring64_bitmap_copy(r1); + assert_true(roaring64_bitmap_contains(r2, 0)); + assert_true(roaring64_bitmap_contains(r2, 10000)); + assert_true(roaring64_bitmap_contains(r2, 200000)); + + roaring64_bitmap_remove(r1, 200000); + roaring64_bitmap_add(r1, 300000); + + assert_true(roaring64_bitmap_contains(r2, 200000)); + assert_false(roaring64_bitmap_contains(r2, 300000)); + + roaring64_bitmap_free(r1); + roaring64_bitmap_free(r2); +} + +DEFINE_TEST(test_from_range) { + { + // Step greater than 2 ^ 16. + roaring64_bitmap_t* r = roaring64_bitmap_from_range(0, 1000000, 200000); + assert_true(roaring64_bitmap_contains(r, 0)); + assert_true(roaring64_bitmap_contains(r, 200000)); + assert_true(roaring64_bitmap_contains(r, 400000)); + assert_true(roaring64_bitmap_contains(r, 600000)); + assert_true(roaring64_bitmap_contains(r, 800000)); + assert_false(roaring64_bitmap_contains(r, 1000000)); + roaring64_bitmap_free(r); + } + { + // Step less than 2 ^ 16 and within one container. + roaring64_bitmap_t* r = roaring64_bitmap_from_range(0, 100, 20); + assert_true(roaring64_bitmap_contains(r, 0)); + assert_true(roaring64_bitmap_contains(r, 20)); + assert_true(roaring64_bitmap_contains(r, 40)); + assert_true(roaring64_bitmap_contains(r, 60)); + assert_true(roaring64_bitmap_contains(r, 80)); + assert_false(roaring64_bitmap_contains(r, 100)); + roaring64_bitmap_free(r); + } + { + // Step less than 2 ^ 16 and across two containers. + roaring64_bitmap_t* r = + roaring64_bitmap_from_range((1 << 16) - 1, (1 << 16) + 5, 2); + assert_true(roaring64_bitmap_contains(r, (1 << 16) - 1)); + assert_true(roaring64_bitmap_contains(r, (1 << 16) + 1)); + assert_true(roaring64_bitmap_contains(r, (1 << 16) + 3)); + assert_false(roaring64_bitmap_contains(r, (1 << 16) + 5)); + roaring64_bitmap_free(r); + } + { + // Step less than 2 ^ 16 and across multiple containers. + roaring64_bitmap_t* r = + roaring64_bitmap_from_range((1 << 16) - 1, (1 << 17) + 2, 1); + assert_true(roaring64_bitmap_contains(r, (1 << 16) - 1)); + assert_true(roaring64_bitmap_contains(r, (1 << 16) + 0)); + assert_true(roaring64_bitmap_contains(r, (1 << 16) + 1)); + assert_true(roaring64_bitmap_contains(r, (1 << 17) - 1)); + assert_true(roaring64_bitmap_contains(r, (1 << 17) + 0)); + assert_true(roaring64_bitmap_contains(r, (1 << 17) + 1)); + assert_false(roaring64_bitmap_contains(r, (1 << 17) + 2)); + roaring64_bitmap_free(r); + } +} + +DEFINE_TEST(test_of_ptr) { + std::array vals; + std::iota(vals.begin(), vals.end(), 0); + roaring64_bitmap_t* r = roaring64_bitmap_of_ptr(vals.size(), vals.data()); + for (uint64_t i = 0; i < 1000; ++i) { + assert_true(roaring64_bitmap_contains(r, vals[i])); + } + roaring64_bitmap_free(r); +} + +DEFINE_TEST(test_of) { + roaring64_bitmap_t* r = roaring64_bitmap_of(3, 1, 20000, 500000); + assert_true(roaring64_bitmap_contains(r, 1)); + assert_true(roaring64_bitmap_contains(r, 20000)); + assert_true(roaring64_bitmap_contains(r, 500000)); + roaring64_bitmap_free(r); +} + +DEFINE_TEST(test_add) { + roaring64_bitmap_t* r = roaring64_bitmap_create(); + + roaring64_bitmap_add(r, 0); + roaring64_bitmap_add(r, 10000); + roaring64_bitmap_add(r, 200000); + + assert_true(roaring64_bitmap_contains(r, 0)); + assert_true(roaring64_bitmap_contains(r, 10000)); + assert_true(roaring64_bitmap_contains(r, 200000)); + + assert_false(roaring64_bitmap_contains(r, 1)); + + roaring64_bitmap_free(r); +} + +DEFINE_TEST(test_add_checked) { + roaring64_bitmap_t* r = roaring64_bitmap_create(); + + assert_true(roaring64_bitmap_add_checked(r, 0)); + assert_false(roaring64_bitmap_add_checked(r, 0)); + assert_true(roaring64_bitmap_add_checked(r, 10000)); + assert_false(roaring64_bitmap_add_checked(r, 10000)); + assert_true(roaring64_bitmap_add_checked(r, 200000)); + assert_false(roaring64_bitmap_add_checked(r, 200000)); + + assert_true(roaring64_bitmap_contains(r, 0)); + assert_true(roaring64_bitmap_contains(r, 10000)); + assert_true(roaring64_bitmap_contains(r, 200000)); + + roaring64_bitmap_free(r); +} + +DEFINE_TEST(test_add_bulk) { + roaring64_bitmap_t* r = roaring64_bitmap_create(); + + roaring64_bulk_context_t context{}; + for (uint64_t i = 0; i < 10000; ++i) { + roaring64_bitmap_add_bulk(r, &context, i * 10000); + } + for (uint64_t i = 0; i < 10000; ++i) { + assert_true(roaring64_bitmap_contains(r, i * 10000)); + } + + roaring64_bitmap_free(r); +} + +DEFINE_TEST(test_add_many) { + roaring64_bitmap_t* r = roaring64_bitmap_create(); + std::array vals; + std::iota(vals.begin(), vals.end(), 0); + + roaring64_bitmap_add_many(r, vals.size(), vals.data()); + for (uint64_t i = 0; i < 1000; ++i) { + assert_true(roaring64_bitmap_contains(r, vals[i])); + } + + roaring64_bitmap_free(r); +} + +DEFINE_TEST(test_add_range_closed) { + { + // Entire range within one container. + roaring64_bitmap_t* r = roaring64_bitmap_create(); + roaring64_bitmap_add_range_closed(r, 10, 20); + roaring64_bulk_context_t context{}; + assert_false(roaring64_bitmap_contains_bulk(r, &context, 9)); + for (uint64_t i = 10; i <= 20; ++i) { + assert_true(roaring64_bitmap_contains_bulk(r, &context, i)); + } + assert_false(roaring64_bitmap_contains_bulk(r, &context, 21)); + roaring64_bitmap_free(r); + } + { + // Range spans two containers. + roaring64_bitmap_t* r = roaring64_bitmap_create(); + roaring64_bitmap_add_range_closed(r, (1 << 16) - 10, (1 << 16) + 10); + roaring64_bulk_context_t context{}; + assert_false( + roaring64_bitmap_contains_bulk(r, &context, (1 << 16) - 11)); + for (uint64_t i = (1 << 16) - 10; i <= (1 << 16) + 10; ++i) { + assert_true(roaring64_bitmap_contains_bulk(r, &context, i)); + } + assert_false( + roaring64_bitmap_contains_bulk(r, &context, (1 << 16) + 11)); + roaring64_bitmap_free(r); + } + { + // Range spans more than two containers. + roaring64_bitmap_t* r = roaring64_bitmap_create(); + roaring64_bitmap_add_range_closed(r, 100, 300000); + roaring64_bulk_context_t context{}; + assert_false(roaring64_bitmap_contains_bulk(r, &context, 99)); + for (uint64_t i = 100; i <= 300000; ++i) { + assert_true(roaring64_bitmap_contains_bulk(r, &context, i)); + } + assert_false(roaring64_bitmap_contains_bulk(r, &context, 300001)); + roaring64_bitmap_free(r); + } +} + +DEFINE_TEST(test_contains_bulk) { + roaring64_bitmap_t* r = roaring64_bitmap_create(); + roaring64_bulk_context_t context{}; + for (uint64_t i = 0; i < 10000; ++i) { + roaring64_bitmap_add_bulk(r, &context, i * 1000); + } + context = {}; + for (uint64_t i = 0; i < 10000; ++i) { + assert_true(roaring64_bitmap_contains_bulk(r, &context, i * 1000)); + } + roaring64_bitmap_free(r); +} + +DEFINE_TEST(test_select) { + roaring64_bitmap_t* r = roaring64_bitmap_create(); + for (uint64_t i = 0; i < 100; ++i) { + roaring64_bitmap_add(r, i * 1000); + } + uint64_t element = 0; + for (uint64_t i = 0; i < 100; ++i) { + assert_true(roaring64_bitmap_select(r, i, &element)); + assert_true(element == i * 1000); + } + assert_false(roaring64_bitmap_select(r, 100, &element)); + roaring64_bitmap_free(r); +} + +DEFINE_TEST(test_rank) { + roaring64_bitmap_t* r = roaring64_bitmap_create(); + for (uint64_t i = 0; i < 100; ++i) { + roaring64_bitmap_add(r, i * 1000); + } + for (uint64_t i = 0; i < 100; ++i) { + assert_true(roaring64_bitmap_rank(r, i * 1000) == i + 1); + assert_true(roaring64_bitmap_rank(r, i * 1000 + 1) == i + 1); + } + roaring64_bitmap_free(r); +} + +DEFINE_TEST(test_get_index) { + roaring64_bitmap_t* r = roaring64_bitmap_create(); + for (uint64_t i = 0; i < 100; ++i) { + roaring64_bitmap_add(r, i * 1000); + } + for (uint64_t i = 0; i < 100; ++i) { + uint64_t index = 0; + assert_true(roaring64_bitmap_get_index(r, i * 1000, &index)); + assert_true(index == i); + assert_false(roaring64_bitmap_get_index(r, i * 1000 + 1, &index)); + } + roaring64_bitmap_free(r); +} + +DEFINE_TEST(test_remove) { + roaring64_bitmap_t* r = roaring64_bitmap_create(); + for (uint64_t i = 0; i < 100; ++i) { + roaring64_bitmap_add(r, i * 10000); + } + for (uint64_t i = 0; i < 100; ++i) { + assert_true(roaring64_bitmap_contains(r, i * 10000)); + } + for (uint64_t i = 0; i < 100; ++i) { + roaring64_bitmap_remove(r, i * 10000); + } + for (uint64_t i = 0; i < 100; ++i) { + assert_false(roaring64_bitmap_contains(r, i * 10000)); + } + roaring64_bitmap_free(r); +} + +DEFINE_TEST(test_remove_checked) { + roaring64_bitmap_t* r = roaring64_bitmap_create(); + for (uint64_t i = 0; i < 100; ++i) { + roaring64_bitmap_add(r, i * 10000); + } + for (uint64_t i = 0; i < 100; ++i) { + assert_true(roaring64_bitmap_remove_checked(r, i * 10000)); + assert_false(roaring64_bitmap_remove_checked(r, i * 10000)); + } + for (uint64_t i = 0; i < 100; ++i) { + assert_false(roaring64_bitmap_contains(r, i * 10000)); + } + roaring64_bitmap_free(r); +} + +DEFINE_TEST(test_remove_bulk) { + roaring64_bitmap_t* r = roaring64_bitmap_create(); + roaring64_bulk_context_t context{}; + for (uint64_t i = 0; i < 10000; ++i) { + roaring64_bitmap_add_bulk(r, &context, i * 1000); + } + context = {}; + for (uint64_t i = 1; i < 9999; ++i) { + roaring64_bitmap_remove_bulk(r, &context, i * 1000); + } + context = {}; + assert_true(roaring64_bitmap_contains_bulk(r, &context, 0)); + for (uint64_t i = 1; i < 9999; ++i) { + assert_false(roaring64_bitmap_contains_bulk(r, &context, i * 1000)); + } + assert_true(roaring64_bitmap_contains_bulk(r, &context, 9999000)); + roaring64_bitmap_free(r); +} + +DEFINE_TEST(test_remove_many) { + roaring64_bitmap_t* r = roaring64_bitmap_create(); + std::array vals; + std::iota(vals.begin(), vals.end(), 0); + + roaring64_bitmap_add_many(r, vals.size(), vals.data()); + roaring64_bitmap_remove_many(r, vals.size(), vals.data()); + for (uint64_t i = 0; i < 1000; ++i) { + assert_false(roaring64_bitmap_contains(r, vals[i])); + } + roaring64_bitmap_free(r); +} + +DEFINE_TEST(test_remove_range_closed) { + { + // Entire range within one container. + roaring64_bitmap_t* r = roaring64_bitmap_create(); + roaring64_bitmap_add_range_closed(r, 10, 20); + roaring64_bitmap_remove_range_closed(r, 11, 21); + roaring64_bulk_context_t context{}; + assert_true(roaring64_bitmap_contains_bulk(r, &context, 10)); + for (uint64_t i = 11; i <= 21; ++i) { + assert_false(roaring64_bitmap_contains_bulk(r, &context, i)); + } + roaring64_bitmap_free(r); + } + { + // Range spans two containers. + roaring64_bitmap_t* r = roaring64_bitmap_create(); + roaring64_bitmap_add_range_closed(r, (1 << 16) - 10, (1 << 16) + 10); + roaring64_bitmap_remove_range_closed(r, (1 << 16) - 9, (1 << 16) + 9); + roaring64_bulk_context_t context{}; + assert_true( + roaring64_bitmap_contains_bulk(r, &context, (1 << 16) - 10)); + for (uint64_t i = (1 << 16) - 9; i <= (1 << 16) + 9; ++i) { + assert_false(roaring64_bitmap_contains_bulk(r, &context, i)); + } + assert_true( + roaring64_bitmap_contains_bulk(r, &context, (1 << 16) + 10)); + roaring64_bitmap_free(r); + } + { + // Range spans more than two containers. + roaring64_bitmap_t* r = roaring64_bitmap_create(); + roaring64_bitmap_add_range_closed(r, 100, 300000); + roaring64_bitmap_remove_range_closed(r, 101, 299999); + roaring64_bulk_context_t context{}; + assert_true(roaring64_bitmap_contains_bulk(r, &context, 100)); + for (uint64_t i = 101; i <= 299999; ++i) { + assert_false(roaring64_bitmap_contains_bulk(r, &context, i)); + } + assert_true(roaring64_bitmap_contains_bulk(r, &context, 300000)); + roaring64_bitmap_free(r); + } +} + +DEFINE_TEST(test_get_cardinality) { + roaring64_bitmap_t* r = roaring64_bitmap_create(); + + roaring64_bitmap_add(r, 0); + roaring64_bitmap_add(r, 100000); + roaring64_bitmap_add(r, 100001); + roaring64_bitmap_add(r, 100002); + roaring64_bitmap_add(r, 200000); + + assert_true(roaring64_bitmap_get_cardinality(r) == 5); + + roaring64_bitmap_free(r); +} + +DEFINE_TEST(test_range_cardinality) { + roaring64_bitmap_t* r = roaring64_bitmap_create(); + + roaring64_bitmap_add(r, 0); + roaring64_bitmap_add(r, 100000); + roaring64_bitmap_add(r, 100001); + roaring64_bitmap_add(r, 100002); + roaring64_bitmap_add(r, 200000); + + assert_true(roaring64_bitmap_range_cardinality(r, 0, 100000) == 1); + assert_true(roaring64_bitmap_range_cardinality(r, 1, 100001) == 1); + assert_true(roaring64_bitmap_range_cardinality(r, 0, 200001) == 5); + + roaring64_bitmap_free(r); +} + +DEFINE_TEST(test_is_empty) { + roaring64_bitmap_t* r = roaring64_bitmap_create(); + assert_true(roaring64_bitmap_is_empty(r)); + roaring64_bitmap_add(r, 1); + assert_false(roaring64_bitmap_is_empty(r)); + roaring64_bitmap_free(r); +} + +DEFINE_TEST(test_minimum) { + roaring64_bitmap_t* r = roaring64_bitmap_create(); + + assert_true(roaring64_bitmap_minimum(r) == UINT64_MAX); + + roaring64_bitmap_add(r, (1ULL << 34) + 1); + roaring64_bitmap_add(r, (1ULL << 35) + 1); + roaring64_bitmap_add(r, (1ULL << 35) + 2); + + assert_true(roaring64_bitmap_minimum(r) == ((1ULL << 34) + 1)); + + roaring64_bitmap_free(r); +} + +DEFINE_TEST(test_maximum) { + roaring64_bitmap_t* r = roaring64_bitmap_create(); + + assert_true(roaring64_bitmap_maximum(r) == 0); + + roaring64_bitmap_add(r, 0); + roaring64_bitmap_add(r, (1ULL << 34) + 1); + roaring64_bitmap_add(r, (1ULL << 35) + 1); + roaring64_bitmap_add(r, (1ULL << 35) + 2); + + assert_true(roaring64_bitmap_maximum(r) == ((1ULL << 35) + 2)); + + roaring64_bitmap_free(r); +} + +DEFINE_TEST(test_run_optimize) { + roaring64_bitmap_t* r = roaring64_bitmap_create(); + + roaring64_bitmap_add(r, 20000); + assert_false(roaring64_bitmap_run_optimize(r)); + + for (uint64_t i = 0; i < 30000; ++i) { + roaring64_bitmap_add(r, i); + } + assert_true(roaring64_bitmap_run_optimize(r)); + + roaring64_bitmap_free(r); +} + +DEFINE_TEST(test_size_in_bytes) { + roaring64_bitmap_t* r = roaring64_bitmap_create(); + + roaring64_bitmap_add(r, 0); + roaring64_bitmap_add(r, 1); + roaring64_bitmap_add(r, uint64_t(10101000100001)); + roaring64_bitmap_add(r, uint64_t(10101000100002)); + // sizeof(art_t) = 8 + // sizeof(art_node4_t) = 48 + // sizeof(roaring_64tree_val_t) * 2 = 16 * 2 + // sizeof(uint16) * 3 = 8 + // ----------------------------------------- + // total size = 96 + // + // Note: size of containers not included, this is serialized size. + assert_true(roaring64_bitmap_size_in_bytes(r) == 96); + + roaring64_bitmap_free(r); +} + +DEFINE_TEST(test_equals) { + roaring64_bitmap_t* r1 = roaring64_bitmap_create(); + roaring64_bitmap_t* r2 = roaring64_bitmap_create(); + + assert_true(roaring64_bitmap_equals(r1, r2)); + + roaring64_bitmap_add(r1, 100000); + roaring64_bitmap_add(r1, 100001); + roaring64_bitmap_add(r1, 200000); + roaring64_bitmap_add(r1, 300000); + + roaring64_bitmap_add(r2, 100000); + roaring64_bitmap_add(r2, 100001); + roaring64_bitmap_add(r2, 200000); + roaring64_bitmap_add(r2, 400000); + + assert_false(roaring64_bitmap_equals(r1, r2)); + + roaring64_bitmap_add(r1, 400000); + roaring64_bitmap_remove(r1, 300000); + + assert_true(roaring64_bitmap_equals(r1, r2)); + + roaring64_bitmap_free(r1); + roaring64_bitmap_free(r2); +} + +DEFINE_TEST(test_is_subset) { + roaring64_bitmap_t* r1 = roaring64_bitmap_create(); + roaring64_bitmap_t* r2 = roaring64_bitmap_create(); + + assert_true(roaring64_bitmap_is_subset(r1, r2)); + + roaring64_bitmap_add(r1, 100000); + roaring64_bitmap_add(r1, 100001); + roaring64_bitmap_add(r1, 200000); + roaring64_bitmap_add(r1, 300000); + + roaring64_bitmap_add(r2, 100000); + roaring64_bitmap_add(r2, 100001); + roaring64_bitmap_add(r2, 200000); + roaring64_bitmap_add(r2, 400000); + + assert_false(roaring64_bitmap_is_subset(r1, r2)); + assert_false(roaring64_bitmap_is_subset(r2, r1)); + + roaring64_bitmap_remove(r1, 300000); + + assert_true(roaring64_bitmap_is_subset(r1, r2)); + assert_false(roaring64_bitmap_is_subset(r2, r1)); + + roaring64_bitmap_free(r1); + roaring64_bitmap_free(r2); +} + +DEFINE_TEST(test_is_strict_subset) { + roaring64_bitmap_t* r1 = roaring64_bitmap_create(); + roaring64_bitmap_t* r2 = roaring64_bitmap_create(); + + assert_false(roaring64_bitmap_is_strict_subset(r1, r2)); + + roaring64_bitmap_add(r1, 100000); + roaring64_bitmap_add(r1, 100001); + roaring64_bitmap_add(r1, 200000); + roaring64_bitmap_add(r1, 300000); + + roaring64_bitmap_add(r2, 100000); + roaring64_bitmap_add(r2, 100001); + roaring64_bitmap_add(r2, 200000); + roaring64_bitmap_add(r2, 400000); + + assert_false(roaring64_bitmap_is_strict_subset(r1, r2)); + assert_false(roaring64_bitmap_is_strict_subset(r2, r1)); + + roaring64_bitmap_remove(r1, 300000); + + assert_true(roaring64_bitmap_is_strict_subset(r1, r2)); + assert_false(roaring64_bitmap_is_strict_subset(r2, r1)); + + roaring64_bitmap_add(r1, 400000); + + assert_false(roaring64_bitmap_is_strict_subset(r1, r2)); + assert_false(roaring64_bitmap_is_strict_subset(r2, r1)); + + roaring64_bitmap_free(r1); + roaring64_bitmap_free(r2); +} + +DEFINE_TEST(test_and) { + roaring64_bitmap_t* r1 = roaring64_bitmap_create(); + roaring64_bitmap_t* r2 = roaring64_bitmap_create(); + + roaring64_bitmap_add(r1, 100000); + roaring64_bitmap_add(r1, 100001); + roaring64_bitmap_add(r1, 200000); + roaring64_bitmap_add(r1, 300000); + + roaring64_bitmap_add(r2, 100001); + roaring64_bitmap_add(r2, 200000); + roaring64_bitmap_add(r2, 400000); + + roaring64_bitmap_t* r3 = roaring64_bitmap_and(r1, r2); + + assert_false(roaring64_bitmap_contains(r3, 100000)); + assert_true(roaring64_bitmap_contains(r3, 100001)); + assert_true(roaring64_bitmap_contains(r3, 200000)); + assert_false(roaring64_bitmap_contains(r3, 300000)); + assert_false(roaring64_bitmap_contains(r3, 400000)); + + roaring64_bitmap_free(r1); + roaring64_bitmap_free(r2); + roaring64_bitmap_free(r3); +} + +DEFINE_TEST(test_and_cardinality) { + roaring64_bitmap_t* r1 = roaring64_bitmap_create(); + roaring64_bitmap_t* r2 = roaring64_bitmap_create(); + + roaring64_bitmap_add(r1, 100000); + roaring64_bitmap_add(r1, 100001); + roaring64_bitmap_add(r1, 200000); + roaring64_bitmap_add(r1, 300000); + + roaring64_bitmap_add(r2, 100001); + roaring64_bitmap_add(r2, 200000); + roaring64_bitmap_add(r2, 400000); + + assert_true(roaring64_bitmap_and_cardinality(r1, r2) == 2); + + roaring64_bitmap_free(r1); + roaring64_bitmap_free(r2); +} + +DEFINE_TEST(test_and_inplace) { + roaring64_bitmap_t* r1 = roaring64_bitmap_create(); + roaring64_bitmap_t* r2 = roaring64_bitmap_create(); + + roaring64_bitmap_add(r1, 50000); + roaring64_bitmap_add(r1, 100000); + roaring64_bitmap_add(r1, 100001); + roaring64_bitmap_add(r1, 200000); + roaring64_bitmap_add(r1, 300000); + + roaring64_bitmap_add(r2, 100001); + roaring64_bitmap_add(r2, 200000); + roaring64_bitmap_add(r2, 400000); + + roaring64_bitmap_and_inplace(r1, r2); + + assert_false(roaring64_bitmap_contains(r1, 50000)); + assert_false(roaring64_bitmap_contains(r1, 100000)); + assert_true(roaring64_bitmap_contains(r1, 100001)); + assert_true(roaring64_bitmap_contains(r1, 200000)); + assert_false(roaring64_bitmap_contains(r1, 300000)); + assert_false(roaring64_bitmap_contains(r1, 400000)); + + roaring64_bitmap_free(r1); + roaring64_bitmap_free(r2); +} + +DEFINE_TEST(test_intersect) { + { + roaring64_bitmap_t* r1 = roaring64_bitmap_create(); + roaring64_bitmap_t* r2 = roaring64_bitmap_create(); + + roaring64_bitmap_add(r1, 50000); + roaring64_bitmap_add(r1, 100000); + roaring64_bitmap_add(r1, 100001); + roaring64_bitmap_add(r1, 200000); + roaring64_bitmap_add(r1, 300000); + + roaring64_bitmap_add(r1, 40000); + roaring64_bitmap_add(r2, 100001); + roaring64_bitmap_add(r1, 400000); + + assert_true(roaring64_bitmap_intersect(r1, r2)); + + roaring64_bitmap_free(r1); + roaring64_bitmap_free(r2); + } + { + roaring64_bitmap_t* r1 = roaring64_bitmap_create(); + roaring64_bitmap_t* r2 = roaring64_bitmap_create(); + + roaring64_bitmap_add(r1, 50000); + roaring64_bitmap_add(r1, 100000); + roaring64_bitmap_add(r1, 100001); + roaring64_bitmap_add(r1, 200000); + roaring64_bitmap_add(r1, 300000); + + roaring64_bitmap_add(r1, 40000); + roaring64_bitmap_add(r1, 400000); + + assert_false(roaring64_bitmap_intersect(r1, r2)); + + roaring64_bitmap_free(r1); + roaring64_bitmap_free(r2); + } +} + +DEFINE_TEST(test_or) { + roaring64_bitmap_t* r1 = roaring64_bitmap_create(); + roaring64_bitmap_t* r2 = roaring64_bitmap_create(); + + roaring64_bitmap_add(r1, 100000); + roaring64_bitmap_add(r1, 100001); + roaring64_bitmap_add(r1, 200000); + roaring64_bitmap_add(r1, 300000); + + roaring64_bitmap_add(r2, 100001); + roaring64_bitmap_add(r2, 200000); + roaring64_bitmap_add(r2, 400000); + + roaring64_bitmap_t* r3 = roaring64_bitmap_or(r1, r2); + + assert_true(roaring64_bitmap_contains(r3, 100000)); + assert_true(roaring64_bitmap_contains(r3, 100001)); + assert_true(roaring64_bitmap_contains(r3, 200000)); + assert_true(roaring64_bitmap_contains(r3, 300000)); + assert_true(roaring64_bitmap_contains(r3, 400000)); + + roaring64_bitmap_free(r1); + roaring64_bitmap_free(r2); + roaring64_bitmap_free(r3); +} + +DEFINE_TEST(test_or_cardinality) { + roaring64_bitmap_t* r1 = roaring64_bitmap_create(); + roaring64_bitmap_t* r2 = roaring64_bitmap_create(); + + roaring64_bitmap_add(r1, 100000); + roaring64_bitmap_add(r1, 100001); + roaring64_bitmap_add(r1, 200000); + roaring64_bitmap_add(r1, 300000); + + roaring64_bitmap_add(r2, 100001); + roaring64_bitmap_add(r2, 200000); + roaring64_bitmap_add(r2, 400000); + + assert_true(roaring64_bitmap_or_cardinality(r1, r2) == 5); + + roaring64_bitmap_free(r1); + roaring64_bitmap_free(r2); +} + +DEFINE_TEST(test_or_inplace) { + roaring64_bitmap_t* r1 = roaring64_bitmap_create(); + roaring64_bitmap_t* r2 = roaring64_bitmap_create(); + + roaring64_bitmap_add(r1, 100000); + roaring64_bitmap_add(r1, 100001); + roaring64_bitmap_add(r1, 200000); + roaring64_bitmap_add(r1, 300000); + + roaring64_bitmap_add(r2, 100001); + roaring64_bitmap_add(r2, 200000); + roaring64_bitmap_add(r2, 400000); + + roaring64_bitmap_or_inplace(r1, r2); + + assert_true(roaring64_bitmap_contains(r1, 100000)); + assert_true(roaring64_bitmap_contains(r1, 100001)); + assert_true(roaring64_bitmap_contains(r1, 200000)); + assert_true(roaring64_bitmap_contains(r1, 300000)); + assert_true(roaring64_bitmap_contains(r1, 400000)); + + roaring64_bitmap_free(r1); + roaring64_bitmap_free(r2); +} + +DEFINE_TEST(test_xor) { + roaring64_bitmap_t* r1 = roaring64_bitmap_create(); + roaring64_bitmap_t* r2 = roaring64_bitmap_create(); + + roaring64_bitmap_add(r1, 100000); + roaring64_bitmap_add(r1, 100001); + roaring64_bitmap_add(r1, 200000); + roaring64_bitmap_add(r1, 300000); + + roaring64_bitmap_add(r2, 100001); + roaring64_bitmap_add(r2, 200000); + roaring64_bitmap_add(r2, 400000); + + roaring64_bitmap_t* r3 = roaring64_bitmap_xor(r1, r2); + + assert_true(roaring64_bitmap_contains(r3, 100000)); + assert_false(roaring64_bitmap_contains(r3, 100001)); + assert_false(roaring64_bitmap_contains(r3, 200000)); + assert_true(roaring64_bitmap_contains(r3, 300000)); + assert_true(roaring64_bitmap_contains(r3, 400000)); + + roaring64_bitmap_free(r1); + roaring64_bitmap_free(r2); + roaring64_bitmap_free(r3); +} + +DEFINE_TEST(test_xor_cardinality) { + roaring64_bitmap_t* r1 = roaring64_bitmap_create(); + roaring64_bitmap_t* r2 = roaring64_bitmap_create(); + + roaring64_bitmap_add(r1, 100000); + roaring64_bitmap_add(r1, 100001); + roaring64_bitmap_add(r1, 200000); + roaring64_bitmap_add(r1, 300000); + + roaring64_bitmap_add(r2, 100001); + roaring64_bitmap_add(r2, 200000); + roaring64_bitmap_add(r2, 400000); + + assert_true(roaring64_bitmap_xor_cardinality(r1, r2) == 3); + + roaring64_bitmap_free(r1); + roaring64_bitmap_free(r2); +} + +DEFINE_TEST(test_xor_inplace) { + roaring64_bitmap_t* r1 = roaring64_bitmap_create(); + roaring64_bitmap_t* r2 = roaring64_bitmap_create(); + + roaring64_bitmap_add(r1, 100000); + roaring64_bitmap_add(r1, 100001); + roaring64_bitmap_add(r1, 200000); + roaring64_bitmap_add(r1, 300000); + + roaring64_bitmap_add(r2, 100001); + roaring64_bitmap_add(r2, 200000); + roaring64_bitmap_add(r2, 400000); + + roaring64_bitmap_xor_inplace(r1, r2); + + assert_true(roaring64_bitmap_contains(r1, 100000)); + assert_false(roaring64_bitmap_contains(r1, 100001)); + assert_false(roaring64_bitmap_contains(r1, 200000)); + assert_true(roaring64_bitmap_contains(r1, 300000)); + assert_true(roaring64_bitmap_contains(r1, 400000)); + + roaring64_bitmap_free(r1); + roaring64_bitmap_free(r2); +} + +DEFINE_TEST(test_andnot) { + roaring64_bitmap_t* r1 = roaring64_bitmap_create(); + roaring64_bitmap_t* r2 = roaring64_bitmap_create(); + + roaring64_bitmap_add(r1, 100000); + roaring64_bitmap_add(r1, 100001); + roaring64_bitmap_add(r1, 200000); + roaring64_bitmap_add(r1, 300000); + + roaring64_bitmap_add(r2, 100001); + roaring64_bitmap_add(r2, 200000); + roaring64_bitmap_add(r2, 400000); + + roaring64_bitmap_t* r3 = roaring64_bitmap_andnot(r1, r2); + + assert_true(roaring64_bitmap_contains(r3, 100000)); + assert_false(roaring64_bitmap_contains(r3, 100001)); + assert_false(roaring64_bitmap_contains(r3, 200000)); + assert_true(roaring64_bitmap_contains(r3, 300000)); + assert_false(roaring64_bitmap_contains(r3, 400000)); + + roaring64_bitmap_free(r1); + roaring64_bitmap_free(r2); + roaring64_bitmap_free(r3); +} + +DEFINE_TEST(test_andnot_cardinality) { + roaring64_bitmap_t* r1 = roaring64_bitmap_create(); + roaring64_bitmap_t* r2 = roaring64_bitmap_create(); + + roaring64_bitmap_add(r1, 100000); + roaring64_bitmap_add(r1, 100001); + roaring64_bitmap_add(r1, 200000); + roaring64_bitmap_add(r1, 300000); + + roaring64_bitmap_add(r2, 100001); + roaring64_bitmap_add(r2, 200000); + roaring64_bitmap_add(r2, 400000); + + assert_true(roaring64_bitmap_andnot_cardinality(r1, r2) == 2); + + roaring64_bitmap_free(r1); + roaring64_bitmap_free(r2); +} + +DEFINE_TEST(test_andnot_inplace) { + roaring64_bitmap_t* r1 = roaring64_bitmap_create(); + roaring64_bitmap_t* r2 = roaring64_bitmap_create(); + + roaring64_bitmap_add(r1, 100000); + roaring64_bitmap_add(r1, 100001); + roaring64_bitmap_add(r1, 200000); + roaring64_bitmap_add(r1, 300000); + + roaring64_bitmap_add(r2, 100001); + roaring64_bitmap_add(r2, 200000); + roaring64_bitmap_add(r2, 400000); + + roaring64_bitmap_andnot_inplace(r1, r2); + + assert_true(roaring64_bitmap_contains(r1, 100000)); + assert_false(roaring64_bitmap_contains(r1, 100001)); + assert_false(roaring64_bitmap_contains(r1, 200000)); + assert_true(roaring64_bitmap_contains(r1, 300000)); + assert_false(roaring64_bitmap_contains(r1, 400000)); + + roaring64_bitmap_free(r1); + roaring64_bitmap_free(r2); +} + +bool roaring_iterator64_sumall(uint64_t value, void* param) { + *(uint64_t*)param += value; + return true; +} + +DEFINE_TEST(test_iterate) { + roaring64_bitmap_t* r = roaring64_bitmap_create(); + + roaring64_bitmap_add(r, 0); + roaring64_bitmap_add(r, 1ULL << 35); + roaring64_bitmap_add(r, (1Ull << 35) + 1); + roaring64_bitmap_add(r, (1Ull << 35) + 2); + roaring64_bitmap_add(r, (1Ull << 36)); + + uint64_t sum = 0; + assert_true(roaring64_bitmap_iterate(r, roaring_iterator64_sumall, &sum)); + assert_true(sum == ((1ULL << 35) + (1ULL << 35) + 1 + (1ULL << 35) + 2 + + (1ULL << 36))); + + roaring64_bitmap_free(r); +} + +} // namespace + +int main() { + const struct CMUnitTest tests[] = { + cmocka_unit_test(test_copy), + cmocka_unit_test(test_from_range), + cmocka_unit_test(test_of_ptr), + cmocka_unit_test(test_of), + cmocka_unit_test(test_add), + cmocka_unit_test(test_add_checked), + cmocka_unit_test(test_add_bulk), + cmocka_unit_test(test_add_many), + cmocka_unit_test(test_add_range_closed), + cmocka_unit_test(test_contains_bulk), + cmocka_unit_test(test_select), + cmocka_unit_test(test_rank), + cmocka_unit_test(test_get_index), + cmocka_unit_test(test_remove), + cmocka_unit_test(test_remove_checked), + cmocka_unit_test(test_remove_bulk), + cmocka_unit_test(test_remove_many), + cmocka_unit_test(test_remove_range_closed), + cmocka_unit_test(test_get_cardinality), + cmocka_unit_test(test_range_cardinality), + cmocka_unit_test(test_is_empty), + cmocka_unit_test(test_minimum), + cmocka_unit_test(test_maximum), + cmocka_unit_test(test_run_optimize), + cmocka_unit_test(test_size_in_bytes), + cmocka_unit_test(test_equals), + cmocka_unit_test(test_is_subset), + cmocka_unit_test(test_is_strict_subset), + cmocka_unit_test(test_and), + cmocka_unit_test(test_and_cardinality), + cmocka_unit_test(test_and_inplace), + cmocka_unit_test(test_intersect), + cmocka_unit_test(test_or), + cmocka_unit_test(test_or_cardinality), + cmocka_unit_test(test_or_inplace), + cmocka_unit_test(test_xor), + cmocka_unit_test(test_xor_cardinality), + cmocka_unit_test(test_xor_inplace), + cmocka_unit_test(test_andnot), + cmocka_unit_test(test_andnot_cardinality), + cmocka_unit_test(test_andnot_inplace), + cmocka_unit_test(test_iterate), + }; + return cmocka_run_group_tests(tests, NULL, NULL); +} + From 95f3c3c7d90c6fd839f1052e6f37cbd664996834 Mon Sep 17 00:00:00 2001 From: Soerian Lieve Date: Sat, 30 Dec 2023 12:55:28 +0100 Subject: [PATCH 03/11] Add benchmarks for roaring64 and roaring64map --- microbenchmarks/bench.cpp | 158 +++++++++++++++++++++++++++++++++----- microbenchmarks/bench.h | 80 ++++++++++++++++--- 2 files changed, 210 insertions(+), 28 deletions(-) diff --git a/microbenchmarks/bench.cpp b/microbenchmarks/bench.cpp index fd2e45717..31f6657ff 100644 --- a/microbenchmarks/bench.cpp +++ b/microbenchmarks/bench.cpp @@ -1,6 +1,6 @@ #include "bench.h" -#include +#include struct successive_intersection { static uint64_t run() { @@ -17,19 +17,48 @@ struct successive_intersection { auto SuccessiveIntersection = BasicBench; BENCHMARK(SuccessiveIntersection); +struct successive_intersection64 { + static uint64_t run() { + uint64_t marker = 0; + for (size_t i = 0; i + 1 < count; ++i) { + roaring64_bitmap_t *tempand = + roaring64_bitmap_and(bitmaps64[i], bitmaps64[i + 1]); + marker += roaring64_bitmap_get_cardinality(tempand); + roaring64_bitmap_free(tempand); + } + return marker; + } +}; +auto SuccessiveIntersection64 = BasicBench; +BENCHMARK(SuccessiveIntersection64); struct successive_intersection_cardinality { static uint64_t run() { uint64_t marker = 0; for (size_t i = 0; i + 1 < count; ++i) { - marker += roaring_bitmap_and_cardinality(bitmaps[i], bitmaps[i + 1]); + marker += + roaring_bitmap_and_cardinality(bitmaps[i], bitmaps[i + 1]); } return marker; } }; -auto SuccessiveIntersectionCardinality = BasicBench; +auto SuccessiveIntersectionCardinality = + BasicBench; BENCHMARK(SuccessiveIntersectionCardinality); +struct successive_intersection_cardinality64 { + static uint64_t run() { + uint64_t marker = 0; + for (size_t i = 0; i + 1 < count; ++i) { + marker += roaring64_bitmap_and_cardinality(bitmaps64[i], + bitmaps64[i + 1]); + } + return marker; + } +}; +auto SuccessiveIntersectionCardinality64 = + BasicBench; +BENCHMARK(SuccessiveIntersectionCardinality64); struct successive_union_cardinality { static uint64_t run() { @@ -43,18 +72,47 @@ struct successive_union_cardinality { auto SuccessiveUnionCardinality = BasicBench; BENCHMARK(SuccessiveUnionCardinality); +struct successive_union_cardinality64 { + static uint64_t run() { + uint64_t marker = 0; + for (size_t i = 0; i + 1 < count; ++i) { + marker += + roaring64_bitmap_or_cardinality(bitmaps64[i], bitmaps64[i + 1]); + } + return marker; + } +}; +auto SuccessiveUnionCardinality64 = BasicBench; +BENCHMARK(SuccessiveUnionCardinality64); + struct successive_difference_cardinality { static uint64_t run() { uint64_t marker = 0; for (size_t i = 0; i + 1 < count; ++i) { - marker += roaring_bitmap_andnot_cardinality(bitmaps[i], bitmaps[i + 1]); + marker += + roaring_bitmap_andnot_cardinality(bitmaps[i], bitmaps[i + 1]); } return marker; } }; -auto SuccessiveDifferenceCardinality = BasicBench; +auto SuccessiveDifferenceCardinality = + BasicBench; BENCHMARK(SuccessiveDifferenceCardinality); +struct successive_difference_cardinality64 { + static uint64_t run() { + uint64_t marker = 0; + for (size_t i = 0; i + 1 < count; ++i) { + marker += roaring64_bitmap_andnot_cardinality(bitmaps64[i], + bitmaps64[i + 1]); + } + return marker; + } +}; +auto SuccessiveDifferenceCardinality64 = + BasicBench; +BENCHMARK(SuccessiveDifferenceCardinality64); + struct successive_union { static uint64_t run() { uint64_t marker = 0; @@ -70,6 +128,21 @@ struct successive_union { auto SuccessiveUnion = BasicBench; BENCHMARK(SuccessiveUnion); +struct successive_union64 { + static uint64_t run() { + uint64_t marker = 0; + for (size_t i = 0; i + 1 < count; ++i) { + roaring64_bitmap_t *tempunion = + roaring64_bitmap_or(bitmaps64[i], bitmaps64[i + 1]); + marker += roaring64_bitmap_get_cardinality(tempunion); + roaring64_bitmap_free(tempunion); + } + return marker; + } +}; +auto SuccessiveUnion64 = BasicBench; +BENCHMARK(SuccessiveUnion64); + struct many_union { static uint64_t run() { uint64_t marker = 0; @@ -110,6 +183,34 @@ struct random_access { auto RandomAccess = BasicBench; BENCHMARK(RandomAccess); +struct random_access64 { + static uint64_t run() { + uint64_t marker = 0; + for (size_t i = 0; i < count; ++i) { + marker += roaring64_bitmap_contains(bitmaps64[i], maxvalue / 4); + marker += roaring64_bitmap_contains(bitmaps64[i], maxvalue / 2); + marker += roaring64_bitmap_contains(bitmaps64[i], 3 * maxvalue / 4); + } + return marker; + } +}; +auto RandomAccess64 = BasicBench; +BENCHMARK(RandomAccess64); + +struct random_access64_cpp { + static uint64_t run() { + uint64_t marker = 0; + for (size_t i = 0; i < count; ++i) { + marker += bitmaps64cpp[i]->contains(maxvalue / 4); + marker += bitmaps64cpp[i]->contains(maxvalue / 2); + marker += bitmaps64cpp[i]->contains(3 * maxvalue / 4); + } + return marker; + } +}; +auto RandomAccess64Cpp = BasicBench; +BENCHMARK(RandomAccess64Cpp); + struct to_array { static uint64_t run() { uint64_t marker = 0; @@ -141,7 +242,6 @@ struct iterate_all { auto IterateAll = BasicBench; BENCHMARK(IterateAll); - struct compute_cardinality { static uint64_t run() { uint64_t marker = 0; @@ -155,15 +255,28 @@ struct compute_cardinality { auto ComputeCardinality = BasicBench; BENCHMARK(ComputeCardinality); +struct compute_cardinality64 { + static uint64_t run() { + uint64_t marker = 0; + for (size_t i = 0; i < count; ++i) { + marker += roaring64_bitmap_get_cardinality(bitmaps64[i]); + } + return marker; + } +}; + +auto ComputeCardinality64 = BasicBench; +BENCHMARK(ComputeCardinality64); + struct rank_many_slow { static uint64_t run() { std::vector ranks(5); for (size_t i = 0; i < count; ++i) { - ranks[0] = roaring_bitmap_rank(bitmaps[i], maxvalue/5); - ranks[1] = roaring_bitmap_rank(bitmaps[i], 2*maxvalue/5); - ranks[2] = roaring_bitmap_rank(bitmaps[i], 3*maxvalue/5); - ranks[3] = roaring_bitmap_rank(bitmaps[i], 4*maxvalue/5); - ranks[4] = roaring_bitmap_rank(bitmaps[i], maxvalue); + ranks[0] = roaring_bitmap_rank(bitmaps[i], maxvalue / 5); + ranks[1] = roaring_bitmap_rank(bitmaps[i], 2 * maxvalue / 5); + ranks[2] = roaring_bitmap_rank(bitmaps[i], 3 * maxvalue / 5); + ranks[3] = roaring_bitmap_rank(bitmaps[i], 4 * maxvalue / 5); + ranks[4] = roaring_bitmap_rank(bitmaps[i], maxvalue); } return ranks[0]; } @@ -174,10 +287,13 @@ BENCHMARK(RankManySlow); struct rank_many { static uint64_t run() { std::vector ranks(5); - std::vector input{maxvalue/5, 2*maxvalue/5, 3*maxvalue/5, 4*maxvalue/5, maxvalue}; + std::vector input{maxvalue / 5, 2 * maxvalue / 5, + 3 * maxvalue / 5, 4 * maxvalue / 5, + maxvalue}; for (size_t i = 0; i < count; ++i) { - roaring_bitmap_rank_many(bitmaps[i],input.data(), input.data()+input.size(), ranks.data()); - } + roaring_bitmap_rank_many(bitmaps[i], input.data(), + input.data() + input.size(), ranks.data()); + } return ranks[0]; } }; @@ -212,10 +328,14 @@ int main(int argc, char **argv) { int support = roaring::internal::croaring_hardware_support(); #if CROARING_COMPILER_SUPPORTS_AVX512 benchmark::AddCustomContext("AVX-512", "supported by compiler"); - benchmark::AddCustomContext("AVX-512 hardware", ( support & roaring::internal::ROARING_SUPPORTS_AVX512 ) ? "yes" : "no"); -#endif // CROARING_COMPILER_SUPPORTS_AVX512 - benchmark::AddCustomContext("AVX-2 hardware", ( support & roaring::internal::ROARING_SUPPORTS_AVX2 ) ? "yes" : "no"); -#endif // CROARING_IS_X64 + benchmark::AddCustomContext( + "AVX-512 hardware", + (support & roaring::internal::ROARING_SUPPORTS_AVX512) ? "yes" : "no"); +#endif // CROARING_COMPILER_SUPPORTS_AVX512 + benchmark::AddCustomContext( + "AVX-2 hardware", + (support & roaring::internal::ROARING_SUPPORTS_AVX2) ? "yes" : "no"); +#endif // CROARING_IS_X64 benchmark::AddCustomContext("data source", dir_name); benchmark::AddCustomContext("number of bitmaps", std::to_string(count)); @@ -233,4 +353,4 @@ int main(int argc, char **argv) { roaring_bitmap_free(bitmaps[i]); } free(array_buffer); -} \ No newline at end of file +} diff --git a/microbenchmarks/bench.h b/microbenchmarks/bench.h index 7a8d0662c..20904abac 100644 --- a/microbenchmarks/bench.h +++ b/microbenchmarks/bench.h @@ -13,10 +13,10 @@ #include "toni_ronnko_dirent.h" #endif - - #include #include +#include +#include #include "performancecounters/event_counter.h" // clang-format on @@ -24,15 +24,19 @@ #if CROARING_IS_X64 #ifndef CROARING_COMPILER_SUPPORTS_AVX512 #error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined." -#endif // CROARING_COMPILER_SUPPORTS_AVX512 +#endif // CROARING_COMPILER_SUPPORTS_AVX512 #endif +using roaring::Roaring64Map; + event_collector collector; size_t N = 1000; size_t bitmap_examples_bytes = 0; size_t count = 0; roaring_bitmap_t **bitmaps = NULL; -uint32_t * array_buffer; +roaring64_bitmap_t **bitmaps64 = NULL; +Roaring64Map **bitmaps64cpp = NULL; +uint32_t *array_buffer; uint32_t maxvalue = 0; uint32_t maxcard = 0; @@ -174,7 +178,9 @@ static roaring_bitmap_t **create_all_bitmaps(size_t *howmany, maxvalue = numbers[i][howmany[i] - 1]; } } - if(maxcard < howmany[i]) { maxcard = howmany[i]; } + if (maxcard < howmany[i]) { + maxcard = howmany[i]; + } } if (numbers == NULL) return NULL; roaring_bitmap_t **answer = @@ -187,7 +193,61 @@ static roaring_bitmap_t **create_all_bitmaps(size_t *howmany, bitmap_examples_bytes += roaring_bitmap_size_in_bytes(answer[i]); roaring_bitmap_set_copy_on_write(answer[i], copy_on_write); } - array_buffer = (uint32_t*) malloc(maxcard * sizeof(uint32_t)); + array_buffer = (uint32_t *)malloc(maxcard * sizeof(uint32_t)); + return answer; +} + +static roaring64_bitmap_t **create_all_64bitmaps(size_t *howmany, + uint32_t **numbers, + size_t tcount, + bool runoptimize) { + for (size_t i = 0; i < count; i++) { + if (howmany[i] > 0) { + if (maxvalue < numbers[i][howmany[i] - 1]) { + maxvalue = numbers[i][howmany[i] - 1]; + } + } + if (maxcard < howmany[i]) { + maxcard = howmany[i]; + } + } + if (numbers == NULL) return NULL; + roaring64_bitmap_t **answer = + (roaring64_bitmap_t **)malloc(sizeof(roaring64_bitmap_t *) * tcount); + for (size_t i = 0; i < tcount; i++) { + answer[i] = roaring64_bitmap_create(); + for (size_t j = 0; j < howmany[i]; ++j) { + roaring64_bitmap_add(answer[i], numbers[i][j]); + } + if (runoptimize) roaring64_bitmap_run_optimize(answer[i]); + } + return answer; +} + +static Roaring64Map **create_all_64bitmaps_cpp(size_t *howmany, + uint32_t **numbers, + size_t tcount, + bool runoptimize) { + for (size_t i = 0; i < count; i++) { + if (howmany[i] > 0) { + if (maxvalue < numbers[i][howmany[i] - 1]) { + maxvalue = numbers[i][howmany[i] - 1]; + } + } + if (maxcard < howmany[i]) { + maxcard = howmany[i]; + } + } + if (numbers == NULL) return NULL; + Roaring64Map **answer = + (Roaring64Map **)malloc(sizeof(Roaring64Map *) * tcount); + for (size_t i = 0; i < tcount; i++) { + answer[i] = new Roaring64Map(); + for (size_t j = 0; j < howmany[i]; ++j) { + answer[i]->add(numbers[i][j]); + } + if (runoptimize) answer[i]->runOptimize(); + } return answer; } @@ -210,14 +270,13 @@ static void BasicBench(benchmark::State &state) { } state.counters["cycles"] = aggregate.best.cycles(); - state.counters["instructions"] = aggregate.best.instructions(); + state.counters["instructions"] = aggregate.best.instructions(); state.counters["GHz"] = aggregate.best.cycles() / aggregate.best.elapsed_ns(); } (void)marker; } - int load(const char *dirname) { const char *extension = ".txt"; bool copy_on_write = false; @@ -235,6 +294,9 @@ int load(const char *dirname) { } bitmaps = create_all_bitmaps(howmany, numbers, count, runoptimize, copy_on_write); + bitmaps64 = create_all_64bitmaps(howmany, numbers, count, runoptimize); + bitmaps64cpp = + create_all_64bitmaps_cpp(howmany, numbers, count, runoptimize); for (size_t i = 0; i < count; ++i) { free(numbers[i]); @@ -244,4 +306,4 @@ int load(const char *dirname) { return count; } -#endif \ No newline at end of file +#endif From bce552950715589b454442cc112fc39a8e8bf64b Mon Sep 17 00:00:00 2001 From: Soerian Lieve Date: Mon, 1 Jan 2024 17:59:58 +0100 Subject: [PATCH 04/11] Support big endian and non-linux in roaring64.c --- src/roaring64.c | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/src/roaring64.c b/src/roaring64.c index 883ded9f9..4937784eb 100644 --- a/src/roaring64.c +++ b/src/roaring64.c @@ -1,5 +1,4 @@ #include -#include // TODO: this is only defined on GNU/Linux #include #include #include @@ -23,23 +22,41 @@ namespace api64 { // TODO: Serialization. // TODO: Error on failed allocation. +// Returns the uint64 represented in big endian format, so that the underlying +// bytes can be used in keys in the ART. +static inline uint64_t htobe(uint64_t x) { +#if CROARING_IS_BIG_ENDIAN + return x; +#else + // Gets compiled to bswap or equivalent on most compilers. + return ((x & 0x00000000000000FFULL) << 56) | + ((x & 0x000000000000FF00ULL) << 40) | + ((x & 0x0000000000FF0000ULL) << 24) | + ((x & 0x00000000FF000000ULL) << 8) | + ((x & 0x000000FF00000000ULL) >> 8) | + ((x & 0x0000FF0000000000ULL) >> 24) | + ((x & 0x00FF000000000000ULL) >> 40) | + ((x & 0xFF00000000000000ULL) >> 56); +#endif // CROARING_IS_BIG_ENDIAN +} + +// Returns the uint64 represented in host format. +static inline uint64_t betoh(uint64_t x) { return htobe(x); } + // Splits the given uint64 key into high 48 bit and low 16 bit components. -// Expects high48_out to be of length 6. +// Expects high48_out to be of length ART_KEY_BYTES. static inline uint16_t split_key(uint64_t key, uint8_t high48_out[]) { - // Reverse byte order of the high 6 bytes. Not portable to big-endian - // systems! - uint64_t tmp = __bswap_64(key); + uint64_t tmp = htobe(key); memcpy(high48_out, (uint8_t *)(&tmp), ART_KEY_BYTES); return (uint16_t)key; } // Recombines the high 48 bit and low 16 bit components into a uint64 key. -// Expects high48_out to be of length 6. +// Expects high48_out to be of length ART_KEY_BYTES. static inline uint64_t combine_key(const uint8_t high48[], uint16_t low16) { uint64_t result = 0; memcpy((uint8_t *)(&result), high48, ART_KEY_BYTES); - // Not portable to big-endian systems! - return __bswap_64(result) | low16; + return betoh(result) | low16; } static inline uint64_t minimum(uint64_t a, uint64_t b) { From b029ca7c23b9b4ad9fbf54eb5ca32f6d9e1ced53 Mon Sep 17 00:00:00 2001 From: Soerian Lieve Date: Mon, 1 Jan 2024 18:11:02 +0100 Subject: [PATCH 05/11] Use void as the base node type in the ART Also zero-instantiate structs when returning. --- include/roaring/art/art.h | 2 +- src/art/art.c | 13 ++++--------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/include/roaring/art/art.h b/include/roaring/art/art.h index 94130c61f..28b91ac1a 100644 --- a/include/roaring/art/art.h +++ b/include/roaring/art/art.h @@ -33,7 +33,7 @@ namespace internal { #endif typedef uint8_t art_key_chunk_t; -typedef struct art_node_s art_node_t; +typedef void art_node_t; /** * Wrapper to allow an empty tree. diff --git a/src/art/art.c b/src/art/art.c index 5379823a8..b349d0494 100644 --- a/src/art/art.c +++ b/src/art/art.c @@ -38,10 +38,6 @@ namespace internal { typedef uint8_t art_typecode_t; -// Base node struct. -typedef struct art_node_s { -} art_node_t; - // Should be kept in sync with art_val_t. typedef struct art_leaf_s { art_key_chunk_t key[ART_KEY_BYTES]; @@ -51,7 +47,6 @@ typedef struct art_leaf_s { // // We use a fixed-length array as a pointer would be larger than the array. typedef struct art_inner_node_s { - art_node_t base; art_typecode_t typecode; uint8_t prefix_size; uint8_t prefix[ART_KEY_BYTES - 1]; @@ -855,7 +850,7 @@ static art_indexed_child_t art_node_next_child(const art_node_t *node, return art_node256_next_child((art_node256_t *)node, index); default: assert(false); - return (art_indexed_child_t){}; + return (art_indexed_child_t){0}; } } @@ -879,7 +874,7 @@ static art_indexed_child_t art_node_prev_child(const art_node_t *node, return art_node256_prev_child((art_node256_t *)node, index); default: assert(false); - return (art_indexed_child_t){}; + return (art_indexed_child_t){0}; } } @@ -903,7 +898,7 @@ static art_indexed_child_t art_node_child_at(const art_node_t *node, return art_node256_child_at((art_node256_t *)node, index); default: assert(false); - return (art_indexed_child_t){}; + return (art_indexed_child_t){0}; } } @@ -927,7 +922,7 @@ static art_indexed_child_t art_node_lower_bound(const art_node_t *node, return art_node256_lower_bound((art_node256_t *)node, key_chunk); default: assert(false); - return (art_indexed_child_t){}; + return (art_indexed_child_t){0}; } } From 1e607f66eee6f5ce263b26b3c60b054e3c1bc22c Mon Sep 17 00:00:00 2001 From: Soerian Lieve Date: Mon, 1 Jan 2024 18:26:22 +0100 Subject: [PATCH 06/11] Remove tests for size methods in ART and roaring64 --- tests/art_unit.cpp | 19 ------------------- tests/roaring64_unit.cpp | 21 --------------------- 2 files changed, 40 deletions(-) diff --git a/tests/art_unit.cpp b/tests/art_unit.cpp index 05144ac4d..06c71a08c 100644 --- a/tests/art_unit.cpp +++ b/tests/art_unit.cpp @@ -460,24 +460,6 @@ DEFINE_TEST(test_art_shadowed) { art.assertLowerBoundValid(1); } -DEFINE_TEST(test_art_size_in_bytes) { - std::vector keys = { - "000001", "000002", "000003", "000004", "001005", - }; - std::vector values = {{1}, {2}, {3}, {4}, {5}}; - - art_t art{NULL}; - for (size_t i = 0; i < keys.size(); ++i) { - art_insert(&art, (art_key_chunk_t*)keys[i], &values[i]); - } - // sizeof(art_t) = 8 - // sizeof(art_node4_t) * 2 = 48 * 2 - // -------------------------------- - // total size = 104 - assert_true(art_size_in_bytes(&art) == 104); - art_free(&art); -} - } // namespace int main() { @@ -495,7 +477,6 @@ int main() { cmocka_unit_test(test_art_iterator_erase), cmocka_unit_test(test_art_iterator_insert), cmocka_unit_test(test_art_shadowed), - cmocka_unit_test(test_art_size_in_bytes), }; return cmocka_run_group_tests(tests, NULL, NULL); } diff --git a/tests/roaring64_unit.cpp b/tests/roaring64_unit.cpp index 868b2df33..ce4bdac20 100644 --- a/tests/roaring64_unit.cpp +++ b/tests/roaring64_unit.cpp @@ -442,26 +442,6 @@ DEFINE_TEST(test_run_optimize) { roaring64_bitmap_free(r); } -DEFINE_TEST(test_size_in_bytes) { - roaring64_bitmap_t* r = roaring64_bitmap_create(); - - roaring64_bitmap_add(r, 0); - roaring64_bitmap_add(r, 1); - roaring64_bitmap_add(r, uint64_t(10101000100001)); - roaring64_bitmap_add(r, uint64_t(10101000100002)); - // sizeof(art_t) = 8 - // sizeof(art_node4_t) = 48 - // sizeof(roaring_64tree_val_t) * 2 = 16 * 2 - // sizeof(uint16) * 3 = 8 - // ----------------------------------------- - // total size = 96 - // - // Note: size of containers not included, this is serialized size. - assert_true(roaring64_bitmap_size_in_bytes(r) == 96); - - roaring64_bitmap_free(r); -} - DEFINE_TEST(test_equals) { roaring64_bitmap_t* r1 = roaring64_bitmap_create(); roaring64_bitmap_t* r2 = roaring64_bitmap_create(); @@ -922,7 +902,6 @@ int main() { cmocka_unit_test(test_minimum), cmocka_unit_test(test_maximum), cmocka_unit_test(test_run_optimize), - cmocka_unit_test(test_size_in_bytes), cmocka_unit_test(test_equals), cmocka_unit_test(test_is_subset), cmocka_unit_test(test_is_strict_subset), From a49eb503043b96263024ec31c9bc206dd5099fa2 Mon Sep 17 00:00:00 2001 From: Soerian Lieve Date: Mon, 1 Jan 2024 18:43:56 +0100 Subject: [PATCH 07/11] Fix failing Win32 test workflow These tests were taking too long to complete. --- src/roaring64.c | 56 ++++++++++++++++++------------- tests/art_unit.cpp | 71 ---------------------------------------- tests/roaring64_unit.cpp | 2 +- 3 files changed, 34 insertions(+), 95 deletions(-) diff --git a/src/roaring64.c b/src/roaring64.c index 4937784eb..ffbbcb9bd 100644 --- a/src/roaring64.c +++ b/src/roaring64.c @@ -5,6 +5,37 @@ #include #include +#if CROARING_IS_BIG_ENDIAN +#define htobe64(x) (x) + +#elif defined(_WIN32) || defined(_WIN64) // CROARING_IS_BIG_ENDIAN +#include +#define htobe64(x) _byteswap_uint64(x) + +#elif defined(__APPLE__) // CROARING_IS_BIG_ENDIAN +#include +#define htobe64(x) OSSwapInt64(x) + +#elif defined(__has_include) && \ + __has_include() // CROARING_IS_BIG_ENDIAN +#include +#define htobe64(x) __bswap_64(x) + +#else // CROARING_IS_BIG_ENDIAN +// Gets compiled to bswap or equivalent on most compilers. +#define htobe64(x) \ + (((x & 0x00000000000000FFULL) << 56) | \ + ((x & 0x000000000000FF00ULL) << 40) | \ + ((x & 0x0000000000FF0000ULL) << 24) | \ + ((x & 0x00000000FF000000ULL) << 8) | \ + ((x & 0x000000FF00000000ULL) >> 8) | \ + ((x & 0x0000FF0000000000ULL) >> 24) | \ + ((x & 0x00FF000000000000ULL) >> 40) | \ + ((x & 0xFF00000000000000ULL) >> 56)) +#endif // CROARING_IS_BIG_ENDIAN + +#define betoh64(x) htobe64(x) + #ifdef __cplusplus using namespace ::roaring::internal; @@ -22,31 +53,10 @@ namespace api64 { // TODO: Serialization. // TODO: Error on failed allocation. -// Returns the uint64 represented in big endian format, so that the underlying -// bytes can be used in keys in the ART. -static inline uint64_t htobe(uint64_t x) { -#if CROARING_IS_BIG_ENDIAN - return x; -#else - // Gets compiled to bswap or equivalent on most compilers. - return ((x & 0x00000000000000FFULL) << 56) | - ((x & 0x000000000000FF00ULL) << 40) | - ((x & 0x0000000000FF0000ULL) << 24) | - ((x & 0x00000000FF000000ULL) << 8) | - ((x & 0x000000FF00000000ULL) >> 8) | - ((x & 0x0000FF0000000000ULL) >> 24) | - ((x & 0x00FF000000000000ULL) >> 40) | - ((x & 0xFF00000000000000ULL) >> 56); -#endif // CROARING_IS_BIG_ENDIAN -} - -// Returns the uint64 represented in host format. -static inline uint64_t betoh(uint64_t x) { return htobe(x); } - // Splits the given uint64 key into high 48 bit and low 16 bit components. // Expects high48_out to be of length ART_KEY_BYTES. static inline uint16_t split_key(uint64_t key, uint8_t high48_out[]) { - uint64_t tmp = htobe(key); + uint64_t tmp = htobe64(key); memcpy(high48_out, (uint8_t *)(&tmp), ART_KEY_BYTES); return (uint16_t)key; } @@ -56,7 +66,7 @@ static inline uint16_t split_key(uint64_t key, uint8_t high48_out[]) { static inline uint64_t combine_key(const uint8_t high48[], uint16_t low16) { uint64_t result = 0; memcpy((uint8_t *)(&result), high48, ART_KEY_BYTES); - return betoh(result) | low16; + return betoh64(result) | low16; } static inline uint64_t minimum(uint64_t a, uint64_t b) { diff --git a/tests/art_unit.cpp b/tests/art_unit.cpp index 06c71a08c..9abbedde3 100644 --- a/tests/art_unit.cpp +++ b/tests/art_unit.cpp @@ -185,75 +185,6 @@ DEFINE_TEST(test_art_erase_all) { art_free(&art); } -DEFINE_TEST(test_art_many_entries) { - const size_t kValues = 10000; - std::vector> keys; - std::vector values; - keys.reserve(kValues); - values.reserve(kValues); - for (uint64_t i = 0; i < kValues; ++i) { - char key[7]; - snprintf(key, 7, "%.6lu", i); - for (size_t j = 0; j < 6; ++j) { - keys[i][j] = key[j]; - } - values[i] = {i}; - } - - art_t art{NULL}; - - for (size_t i = 0; i < kValues; ++i) { - art_insert(&art, (uint8_t*)keys[i].data(), &values[i]); - } - - for (size_t i = 0; i < kValues; ++i) { - Value* found = (Value*)art_find(&art, (uint8_t*)keys[i].data()); - assert_true(found != NULL && *found == values[i]); - } - - for (size_t i = 0; i < kValues; ++i) { - Value* erased = (Value*)art_erase(&art, (uint8_t*)keys[i].data()); - assert_true(erased != NULL && *erased == values[i]); - } - - art_free(&art); -} - -DEFINE_TEST(test_art_dense_entries) { - const size_t kValues = 300; - std::vector keys; - std::vector values; - keys.reserve(kValues); - values.reserve(kValues); - for (uint64_t i = 0; i < kValues; ++i) { - uint64_t key = i; - uint8_t* key_begin = (uint8_t*)&key; - uint8_t* key_end = key_begin + sizeof(key); - std::reverse(key_begin, key_end); - key = key >> 16; - keys[i] = key; - values[i] = {i}; - } - - art_t art{NULL}; - - for (size_t i = 0; i < kValues; ++i) { - art_insert(&art, (uint8_t*)&keys[i], &values[i]); - } - - for (size_t i = 0; i < kValues; ++i) { - Value* found = (Value*)art_find(&art, (uint8_t*)&keys[i]); - assert_true(found != NULL && *found == values[i]); - } - - for (size_t i = 0; i < kValues; ++i) { - Value* erased = (Value*)art_erase(&art, (uint8_t*)&keys[i]); - assert_true(erased != NULL && *erased == values[i]); - } - - art_free(&art); -} - DEFINE_TEST(test_art_is_empty) { std::vector keys = { "000001", "000002", "000003", "000004", "001005", @@ -466,8 +397,6 @@ int main() { const struct CMUnitTest tests[] = { cmocka_unit_test(test_art_simple), cmocka_unit_test(test_art_erase_all), - cmocka_unit_test(test_art_many_entries), - cmocka_unit_test(test_art_dense_entries), cmocka_unit_test(test_art_is_empty), cmocka_unit_test(test_art_iterator_next), cmocka_unit_test(test_art_iterator_prev), diff --git a/tests/roaring64_unit.cpp b/tests/roaring64_unit.cpp index ce4bdac20..d48b2127b 100644 --- a/tests/roaring64_unit.cpp +++ b/tests/roaring64_unit.cpp @@ -93,7 +93,7 @@ DEFINE_TEST(test_of_ptr) { } DEFINE_TEST(test_of) { - roaring64_bitmap_t* r = roaring64_bitmap_of(3, 1, 20000, 500000); + roaring64_bitmap_t* r = roaring64_bitmap_of(3, 1ULL, 20000ULL, 500000ULL); assert_true(roaring64_bitmap_contains(r, 1)); assert_true(roaring64_bitmap_contains(r, 20000)); assert_true(roaring64_bitmap_contains(r, 500000)); From e80d89dff8ba4c24e034c29e570e3435c4c9b463 Mon Sep 17 00:00:00 2001 From: Soerian Lieve Date: Tue, 2 Jan 2024 23:43:11 +0100 Subject: [PATCH 08/11] Hide ART and roaring64-internal types These don't actually need to be exposed to the user, so we can declare them in the header and keep them private. ART types don't need to be declared at all. --- include/roaring/roaring64.h | 26 +++----------------------- src/roaring64.c | 20 ++++++++++++++++++-- 2 files changed, 21 insertions(+), 25 deletions(-) diff --git a/include/roaring/roaring64.h b/include/roaring/roaring64.h index 3685fcbb9..cfa069b50 100644 --- a/include/roaring/roaring64.h +++ b/include/roaring/roaring64.h @@ -1,40 +1,20 @@ #ifndef ROARING64_H #define ROARING64_H -#include #include #include #include #include #include -// TODO: This is messy and can likely be improved. -#if defined(__cplusplus) -#define ROARING_ART_T ::roaring::internal::art_t -#define ROARING_ART_VAL_T ::roaring::internal::art_val_t -#else -#define ROARING_ART_T art_t -#define ROARING_ART_VAL_T art_val_t -#define ROARING_CONTAINER_T void -#endif - #ifdef __cplusplus extern "C" { namespace roaring { namespace api { #endif -typedef struct roaring64_bitmap_s { - ROARING_ART_T art; - uint8_t flags; -} roaring64_bitmap_t; - -// TODO: Ideally we don't put this in the header. -typedef struct leaf_s { - ROARING_ART_VAL_T _pad; - uint8_t typecode; - ROARING_CONTAINER_T *container; -} leaf_t; +typedef struct roaring64_bitmap_s roaring64_bitmap_t; +typedef struct roaring64_leaf_s roaring64_leaf_t; /** * A bit of context usable with `roaring64_bitmap_*_bulk()` functions. @@ -50,7 +30,7 @@ typedef struct leaf_s { typedef struct roaring64_bulk_context_s { uint8_t high_bytes[ART_KEY_BYTES]; uint16_t low_bytes; - leaf_t *leaf; + roaring64_leaf_t *leaf; } roaring64_bulk_context_t; /** diff --git a/src/roaring64.c b/src/roaring64.c index ffbbcb9bd..bf02af4a2 100644 --- a/src/roaring64.c +++ b/src/roaring64.c @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -41,10 +42,9 @@ using namespace ::roaring::internal; extern "C" { namespace roaring { -namespace api64 { +namespace api { #endif -// TODO: Figure out how to keep art_t from being exposed in roaring64.h // TODO: Iteration. // * Need to create a container iterator which can be used across 32 and 64 bit // bitmaps. @@ -53,6 +53,22 @@ namespace api64 { // TODO: Serialization. // TODO: Error on failed allocation. +typedef struct roaring64_bitmap_s { + art_t art; + uint8_t flags; +} roaring64_bitmap_t; + +// Leaf type of the ART used to keep the high 48 bits of each entry. +typedef struct roaring64_leaf_s { + art_val_t _pad; + uint8_t typecode; + container_t *container; +} roaring64_leaf_t; + +// Alias to make it easier to work with, since it's an internal-only type +// anyway. +typedef struct roaring64_leaf_s leaf_t; + // Splits the given uint64 key into high 48 bit and low 16 bit components. // Expects high48_out to be of length ART_KEY_BYTES. static inline uint16_t split_key(uint64_t key, uint8_t high48_out[]) { From ca48de0791329c9dabb513ec259bbc496d5a8bca Mon Sep 17 00:00:00 2001 From: Soerian Lieve Date: Wed, 3 Jan 2024 20:20:27 +0100 Subject: [PATCH 09/11] Cleanups to ART * Change art_compare_prefix to take a single length rather than two. * Use art_compare_keys where possible. * Use art_val_t directly (through an alias) rather than defining a separate type. * Remove key_chunk from art_indexed_child_t. * Use node->base where possible. * Merge art_create_iterator into art_init_iterator. --- include/roaring/art/art.h | 16 ++--- include/roaring/roaring64.h | 2 +- src/art/art.c | 131 ++++++++++++------------------------ src/roaring64.c | 105 ++++++++++------------------- tests/art_unit.cpp | 15 ++--- 5 files changed, 91 insertions(+), 178 deletions(-) diff --git a/include/roaring/art/art.h b/include/roaring/art/art.h index 28b91ac1a..f8a2375e7 100644 --- a/include/roaring/art/art.h +++ b/include/roaring/art/art.h @@ -33,7 +33,7 @@ namespace internal { #endif typedef uint8_t art_key_chunk_t; -typedef void art_node_t; +typedef struct art_node_s art_node_t; /** * Wrapper to allow an empty tree. @@ -50,7 +50,7 @@ typedef struct art_s { * contains the key corresponding to the value. */ typedef struct art_val_s { - art_key_chunk_t _pad[ART_KEY_BYTES]; + art_key_chunk_t key[ART_KEY_BYTES]; } art_val_t; /** @@ -121,15 +121,11 @@ typedef struct art_iterator_s { } art_iterator_t; /** - * Creates an iterator. Must be initialized through `art_init_iterator` first. - */ -art_iterator_t art_create_iterator(); - -/** - * Moves the iterator to the first or last entry in the ART, depending on - * `first`. + * Creates an iterator initialzed to the first or last entry in the ART, + * depending on `first`. The iterator is not valid if there are no entries in + * the ART. */ -void art_init_iterator(const art_t *art, art_iterator_t *iterator, bool first); +art_iterator_t art_init_iterator(const art_t *art, bool first); /** * Returns an initialized iterator positioned at a key equal to or greater than diff --git a/include/roaring/roaring64.h b/include/roaring/roaring64.h index cfa069b50..4656ca840 100644 --- a/include/roaring/roaring64.h +++ b/include/roaring/roaring64.h @@ -28,7 +28,7 @@ typedef struct roaring64_leaf_s roaring64_leaf_t; * contexts associated with that bitmap. */ typedef struct roaring64_bulk_context_s { - uint8_t high_bytes[ART_KEY_BYTES]; + uint8_t high_bytes[6]; uint16_t low_bytes; roaring64_leaf_t *leaf; } roaring64_bulk_context_t; diff --git a/src/art/art.c b/src/art/art.c index b349d0494..a72c8fdbc 100644 --- a/src/art/art.c +++ b/src/art/art.c @@ -26,9 +26,9 @@ // The only places that use SET_LEAF are locations where a field is directly // assigned to a leaf pointer. After using SET_LEAF, the leaf should be treated // as a node of unknown type. -#define IS_LEAF(p) (((uintptr_t)p & 1)) -#define SET_LEAF(p) ((art_node_t *)((uintptr_t)p | 1)) -#define CAST_LEAF(p) ((art_leaf_t *)((void *)((uintptr_t)p & ~1))) +#define IS_LEAF(p) (((uintptr_t)(p)&1)) +#define SET_LEAF(p) ((art_node_t *)((uintptr_t)(p) | 1)) +#define CAST_LEAF(p) ((art_leaf_t *)((void *)((uintptr_t)(p) & ~1))) #ifdef __cplusplus extern "C" { @@ -38,10 +38,9 @@ namespace internal { typedef uint8_t art_typecode_t; -// Should be kept in sync with art_val_t. -typedef struct art_leaf_s { - art_key_chunk_t key[ART_KEY_BYTES]; -} art_leaf_t; +// Aliasing with a "leaf" naming so that its purpose is clearer in the context +// of the trie internals. +typedef art_val_t art_leaf_t; // Inner node, with prefix. // @@ -90,7 +89,6 @@ typedef struct art_node256_s { // Helper struct to refer to a child within a node at a specific index. typedef struct art_indexed_child_s { art_node_t *child; - art_key_chunk_t key_chunk; uint8_t index; } art_indexed_child_t; @@ -138,8 +136,7 @@ static art_node_t *art_node256_insert(art_node256_t *node, art_node_t *child, static art_node4_t *art_node4_create(const art_key_chunk_t prefix[], uint8_t prefix_size) { art_node4_t *node = roaring_malloc(sizeof(art_node4_t)); - art_init_inner_node((art_inner_node_t *)node, ART_NODE4_TYPE, prefix, - prefix_size); + art_init_inner_node(&node->base, ART_NODE4_TYPE, prefix, prefix_size); node->count = 0; return node; } @@ -216,8 +213,7 @@ static inline art_node_t *art_node4_erase(art_node4_t *node, inner_node->prefix, inner_node->prefix_size); memcpy(inner_node->prefix, node->base.prefix, node->base.prefix_size); - *(inner_node->prefix + node->base.prefix_size) = - remaining_child_key; + inner_node->prefix[node->base.prefix_size] = remaining_child_key; inner_node->prefix_size += node->base.prefix_size + 1; } roaring_free(node); @@ -253,7 +249,6 @@ static inline art_indexed_child_t art_node4_next_child(const art_node4_t *node, return indexed_child; } indexed_child.index = index; - indexed_child.key_chunk = node->keys[index]; indexed_child.child = node->children[index]; return indexed_child; } @@ -270,7 +265,6 @@ static inline art_indexed_child_t art_node4_prev_child(const art_node4_t *node, return indexed_child; } indexed_child.index = index; - indexed_child.key_chunk = node->keys[index]; indexed_child.child = node->children[index]; return indexed_child; } @@ -283,7 +277,6 @@ static inline art_indexed_child_t art_node4_child_at(const art_node4_t *node, return indexed_child; } indexed_child.index = index; - indexed_child.key_chunk = node->keys[index]; indexed_child.child = node->children[index]; return indexed_child; } @@ -294,7 +287,6 @@ static inline art_indexed_child_t art_node4_lower_bound( for (size_t i = 0; i < node->count; ++i) { if (node->keys[i] >= key_chunk) { indexed_child.index = i; - indexed_child.key_chunk = node->keys[i]; indexed_child.child = node->children[i]; return indexed_child; } @@ -306,8 +298,7 @@ static inline art_indexed_child_t art_node4_lower_bound( static art_node16_t *art_node16_create(const art_key_chunk_t prefix[], uint8_t prefix_size) { art_node16_t *node = roaring_malloc(sizeof(art_node16_t)); - art_init_inner_node((art_inner_node_t *)node, ART_NODE16_TYPE, prefix, - prefix_size); + art_init_inner_node(&node->base, ART_NODE16_TYPE, prefix, prefix_size); node->count = 0; return node; } @@ -407,7 +398,6 @@ static inline art_indexed_child_t art_node16_next_child( return indexed_child; } indexed_child.index = index; - indexed_child.key_chunk = node->keys[index]; indexed_child.child = node->children[index]; return indexed_child; } @@ -424,7 +414,6 @@ static inline art_indexed_child_t art_node16_prev_child( return indexed_child; } indexed_child.index = index; - indexed_child.key_chunk = node->keys[index]; indexed_child.child = node->children[index]; return indexed_child; } @@ -437,7 +426,6 @@ static inline art_indexed_child_t art_node16_child_at(const art_node16_t *node, return indexed_child; } indexed_child.index = index; - indexed_child.key_chunk = node->keys[index]; indexed_child.child = node->children[index]; return indexed_child; } @@ -448,7 +436,6 @@ static inline art_indexed_child_t art_node16_lower_bound( for (size_t i = 0; i < node->count; ++i) { if (node->keys[i] >= key_chunk) { indexed_child.index = i; - indexed_child.key_chunk = node->keys[i]; indexed_child.child = node->children[i]; return indexed_child; } @@ -460,8 +447,7 @@ static inline art_indexed_child_t art_node16_lower_bound( static art_node48_t *art_node48_create(const art_key_chunk_t prefix[], uint8_t prefix_size) { art_node48_t *node = roaring_malloc(sizeof(art_node48_t)); - art_init_inner_node((art_inner_node_t *)node, ART_NODE48_TYPE, prefix, - prefix_size); + art_init_inner_node(&node->base, ART_NODE48_TYPE, prefix, prefix_size); node->count = 0; for (size_t i = 0; i < 256; ++i) { node->keys[i] = ART_NODE48_EMPTY_VAL; @@ -549,7 +535,6 @@ static inline art_indexed_child_t art_node48_next_child( for (size_t i = index; i < 256; ++i) { if (node->keys[i] != ART_NODE48_EMPTY_VAL) { indexed_child.child = node->children[node->keys[i]]; - indexed_child.key_chunk = node->keys[i]; indexed_child.index = i; return indexed_child; } @@ -568,7 +553,6 @@ static inline art_indexed_child_t art_node48_prev_child( for (int i = index; i > 0; --i) { if (node->keys[i] != ART_NODE48_EMPTY_VAL) { indexed_child.child = node->children[node->keys[i]]; - indexed_child.key_chunk = node->keys[i]; indexed_child.index = i; return indexed_child; } @@ -585,7 +569,6 @@ static inline art_indexed_child_t art_node48_child_at(const art_node48_t *node, return indexed_child; } indexed_child.index = index; - indexed_child.key_chunk = node->keys[index]; indexed_child.child = node->children[node->keys[index]]; return indexed_child; } @@ -596,7 +579,6 @@ static inline art_indexed_child_t art_node48_lower_bound( for (size_t i = key_chunk; i < 256; ++i) { if (node->keys[i] != ART_NODE48_EMPTY_VAL) { indexed_child.index = i; - indexed_child.key_chunk = node->keys[i]; indexed_child.child = node->children[node->keys[i]]; return indexed_child; } @@ -608,8 +590,7 @@ static inline art_indexed_child_t art_node48_lower_bound( static art_node256_t *art_node256_create(const art_key_chunk_t prefix[], uint8_t prefix_size) { art_node256_t *node = roaring_malloc(sizeof(art_node256_t)); - art_init_inner_node((art_inner_node_t *)node, ART_NODE256_TYPE, prefix, - prefix_size); + art_init_inner_node(&node->base, ART_NODE256_TYPE, prefix, prefix_size); node->count = 0; for (size_t i = 0; i < 256; ++i) { node->children[i] = NULL; @@ -670,7 +651,6 @@ static inline art_indexed_child_t art_node256_next_child( for (size_t i = index; i < 256; ++i) { if (node->children[i] != NULL) { indexed_child.child = node->children[i]; - indexed_child.key_chunk = i; indexed_child.index = i; return indexed_child; } @@ -689,7 +669,6 @@ static inline art_indexed_child_t art_node256_prev_child( for (int i = index; i > 0; --i) { if (node->children[i] != NULL) { indexed_child.child = node->children[i]; - indexed_child.key_chunk = i; indexed_child.index = i; return indexed_child; } @@ -706,7 +685,6 @@ static inline art_indexed_child_t art_node256_child_at( return indexed_child; } indexed_child.index = index; - indexed_child.key_chunk = index; indexed_child.child = node->children[index]; return indexed_child; } @@ -717,7 +695,6 @@ static inline art_indexed_child_t art_node256_lower_bound( for (size_t i = key_chunk; i < 256; ++i) { if (node->children[i] != NULL) { indexed_child.index = i; - indexed_child.key_chunk = i; indexed_child.child = node->children[i]; return indexed_child; } @@ -933,21 +910,16 @@ static art_indexed_child_t art_node_lower_bound(const art_node_t *node, // * Key range 1 == key range 2: 0 // * Key range 1 > key range 2: a positive value static inline int art_compare_prefix(const art_key_chunk_t key1[], - uint8_t key1_from, uint8_t key1_to, + uint8_t key1_from, const art_key_chunk_t key2[], - uint8_t key2_from, uint8_t key2_to) { - uint8_t min_len = key1_to - key1_from; - uint8_t key2_len = key2_to - key2_from; - if (key2_len < min_len) { - min_len = key2_len; - } - return memcmp(key1 + key1_from, key2 + key2_from, min_len); + uint8_t key2_from, uint8_t length) { + return memcmp(key1 + key1_from, key2 + key2_from, length); } // Compares two keys in full, see art_compare_prefix. int art_compare_keys(const art_key_chunk_t key1[], const art_key_chunk_t key2[]) { - return art_compare_prefix(key1, 0, ART_KEY_BYTES, key2, 0, ART_KEY_BYTES); + return art_compare_prefix(key1, 0, key2, 0, ART_KEY_BYTES); } // Returns the length of the common prefix between two key ranges. @@ -983,12 +955,10 @@ static art_node_t *art_insert_at(art_node_t *node, const art_key_chunk_t key[], art_node_t *new_node = (art_node_t *)art_node4_create(key + depth, common_prefix); - new_node = - art_node_insert_leaf((art_inner_node_t *)new_node, - *(leaf->key + depth + common_prefix), leaf); - new_node = - art_node_insert_leaf((art_inner_node_t *)new_node, - *(key + depth + common_prefix), new_leaf); + new_node = art_node_insert_leaf((art_inner_node_t *)new_node, + leaf->key[depth + common_prefix], leaf); + new_node = art_node_insert_leaf((art_inner_node_t *)new_node, + key[depth + common_prefix], new_leaf); // The new inner node is now the rootmost node. return new_node; @@ -1006,8 +976,7 @@ static art_node_t *art_insert_at(art_node_t *node, const art_key_chunk_t key[], // Make the existing internal node a child of the new internal node. node4 = (art_node4_t *)art_node4_insert( - node4, node, - *(art_key_chunk_t *)(inner_node->prefix + common_prefix)); + node4, node, inner_node->prefix[common_prefix]); // Correct the prefix of the moved internal node, trimming off the chunk // inserted into the new internal node. @@ -1019,13 +988,12 @@ static art_node_t *art_insert_at(art_node_t *node, const art_key_chunk_t key[], } // Insert the value in the new internal node. - return art_node_insert_leaf( - (art_inner_node_t *)node4, - *(art_key_chunk_t *)(key + common_prefix + depth), new_leaf); + return art_node_insert_leaf(&node4->base, key[common_prefix + depth], + new_leaf); } // Prefix matches entirely or node has no prefix. Look for an existing // child. - art_key_chunk_t key_chunk = *(key + depth + common_prefix); + art_key_chunk_t key_chunk = key[depth + common_prefix]; art_node_t *child = art_find_child(inner_node, key_chunk); if (child != NULL) { art_node_t *new_child = @@ -1076,7 +1044,7 @@ static art_erase_result_t art_erase_at(art_node_t *node, // Prefix mismatch. return result; } - art_key_chunk_t key_chunk = *(key + depth + common_prefix); + art_key_chunk_t key_chunk = key[depth + common_prefix]; art_node_t *child = art_find_child(inner_node, key_chunk); if (child == NULL) { // No child with key chunk. @@ -1114,9 +1082,8 @@ static art_val_t *art_find_at(const art_node_t *node, if (common_prefix != inner_node->prefix_size) { return NULL; } - art_node_t *child = art_find_child( - inner_node, - *(art_key_chunk_t *)(key + depth + inner_node->prefix_size)); + art_node_t *child = + art_find_child(inner_node, key[depth + inner_node->prefix_size]); if (child == NULL) { return NULL; } @@ -1221,7 +1188,7 @@ void art_node_printf(const art_node_t *node, uint8_t depth) { printf("%*s", depth, ""); printf("prefix: "); for (uint8_t i = 0; i < inner_node->prefix_size; ++i) { - printf("%x", (char)*(inner_node->prefix + i)); + printf("%x", (char)inner_node->prefix[i]); } printf("\n"); @@ -1444,12 +1411,12 @@ bool art_iterator_move(art_iterator_t *iterator, bool forward) { // the depth of the iterator. static bool art_node_iterator_lower_bound(const art_node_t *node, art_iterator_t *iterator, - const art_key_chunk_t *key) { + const art_key_chunk_t key[]) { while (!art_is_leaf(node)) { art_inner_node_t *inner_node = (art_inner_node_t *)node; - int prefix_comparison = art_compare_prefix( - inner_node->prefix, 0, inner_node->prefix_size, key, - iterator->depth, iterator->depth + inner_node->prefix_size); + int prefix_comparison = + art_compare_prefix(inner_node->prefix, 0, key, iterator->depth, + inner_node->prefix_size); if (prefix_comparison < 0) { // Prefix so far has been equal, but we've found a smaller key. // Since we take the lower bound within each node, we can return the @@ -1461,7 +1428,7 @@ static bool art_node_iterator_lower_bound(const art_node_t *node, } // Prefix is equal, move to lower bound child. art_key_chunk_t key_chunk = - *(key + iterator->depth + inner_node->prefix_size); + key[iterator->depth + inner_node->prefix_size]; art_indexed_child_t indexed_child = art_node_lower_bound(node, key_chunk); if (indexed_child.child == NULL) { @@ -1479,27 +1446,19 @@ static bool art_node_iterator_lower_bound(const art_node_t *node, art_leaf_t *leaf = CAST_LEAF(node); // Technically we don't have to re-compare the prefix if we arrived here // through the while loop, but it simplifies the code. - if (art_compare_prefix(leaf->key, 0, ART_KEY_BYTES, key, 0, - ART_KEY_BYTES) >= 0) { + if (art_compare_keys(leaf->key, key) >= 0) { return art_iterator_valid_loc(iterator, leaf); } return art_iterator_invalid_loc(iterator); } -art_iterator_t art_create_iterator() { - art_iterator_t iterator; - iterator.depth = 0; - iterator.frame = 0; - memset(iterator.key, 0, ART_KEY_BYTES); - iterator.value = NULL; - return iterator; -} - -void art_init_iterator(const art_t *art, art_iterator_t *iterator, bool first) { +art_iterator_t art_init_iterator(const art_t *art, bool first) { + art_iterator_t iterator = {0}; if (art->root == NULL) { - return; + return iterator; } - art_node_init_iterator(art->root, iterator, first); + art_node_init_iterator(art->root, &iterator, first); + return iterator; } bool art_iterator_next(art_iterator_t *iterator) { @@ -1512,8 +1471,7 @@ bool art_iterator_prev(art_iterator_t *iterator) { bool art_iterator_lower_bound(art_iterator_t *iterator, const art_key_chunk_t *key) { - int compare_result = art_compare_prefix(iterator->key, 0, ART_KEY_BYTES, - key, 0, ART_KEY_BYTES); + int compare_result = art_compare_keys(iterator->key, key); // Move up until we have an equal or greater prefix, after which we can do a // normal lower bound search. while (compare_result < 0 && iterator->frame > 0) { @@ -1524,8 +1482,8 @@ bool art_iterator_lower_bound(art_iterator_t *iterator, } // Since we're only moving up, we can keep comparing against the // iterator key. - compare_result = art_compare_prefix(iterator->key, 0, iterator->depth, - key, 0, iterator->depth); + compare_result = + art_compare_prefix(iterator->key, 0, key, 0, iterator->depth); } if (compare_result > 0) { return art_node_init_iterator(art_iterator_node(iterator), iterator, @@ -1536,7 +1494,7 @@ bool art_iterator_lower_bound(art_iterator_t *iterator, } art_iterator_t art_lower_bound(const art_t *art, const art_key_chunk_t *key) { - art_iterator_t iterator = art_create_iterator(); + art_iterator_t iterator = {0}; if (art->root != NULL) { art_node_iterator_lower_bound(art->root, &iterator, key); } @@ -1544,11 +1502,10 @@ art_iterator_t art_lower_bound(const art_t *art, const art_key_chunk_t *key) { } art_iterator_t art_upper_bound(const art_t *art, const art_key_chunk_t *key) { - art_iterator_t iterator = art_create_iterator(); + art_iterator_t iterator = {0}; if (art->root != NULL) { if (art_node_iterator_lower_bound(art->root, &iterator, key) && - art_compare_prefix(iterator.key, 0, ART_KEY_BYTES, key, 0, - ART_KEY_BYTES) == 0) { + art_compare_keys(iterator.key, key) == 0) { art_iterator_next(&iterator); } } diff --git a/src/roaring64.c b/src/roaring64.c index bf02af4a2..d2bc9f164 100644 --- a/src/roaring64.c +++ b/src/roaring64.c @@ -120,8 +120,7 @@ roaring64_bitmap_t *roaring64_bitmap_create(void) { } void roaring64_bitmap_free(roaring64_bitmap_t *r) { - art_iterator_t it = art_create_iterator(); - art_init_iterator(&r->art, &it, /*first=*/true); + art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); while (it.value != NULL) { leaf_t *leaf = (leaf_t *)it.value; container_free(leaf->container, leaf->typecode); @@ -135,8 +134,7 @@ void roaring64_bitmap_free(roaring64_bitmap_t *r) { roaring64_bitmap_t *roaring64_bitmap_copy(const roaring64_bitmap_t *r) { roaring64_bitmap_t *result = roaring64_bitmap_create(); - art_iterator_t it = art_create_iterator(); - art_init_iterator(&r->art, &it, /*first=*/true); + art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); while (it.value != NULL) { leaf_t *leaf = (leaf_t *)it.value; uint8_t result_typecode = leaf->typecode; @@ -381,8 +379,7 @@ bool roaring64_bitmap_contains_bulk(const roaring64_bitmap_t *r, bool roaring64_bitmap_select(const roaring64_bitmap_t *r, uint64_t rank, uint64_t *element) { - art_iterator_t it = art_create_iterator(); - art_init_iterator(&r->art, &it, /*first=*/true); + art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); uint64_t start_rank = 0; while (it.value != NULL) { leaf_t *leaf = (leaf_t *)it.value; @@ -409,8 +406,7 @@ uint64_t roaring64_bitmap_rank(const roaring64_bitmap_t *r, uint64_t val) { uint8_t high48[ART_KEY_BYTES]; uint16_t low16 = split_key(val, high48); - art_iterator_t it = art_create_iterator(); - art_init_iterator(&r->art, &it, /*first=*/true); + art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); uint64_t rank = 0; while (it.value != NULL) { leaf_t *leaf = (leaf_t *)it.value; @@ -433,8 +429,7 @@ bool roaring64_bitmap_get_index(const roaring64_bitmap_t *r, uint64_t val, uint8_t high48[ART_KEY_BYTES]; uint16_t low16 = split_key(val, high48); - art_iterator_t it = art_create_iterator(); - art_init_iterator(&r->art, &it, /*first=*/true); + art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); uint64_t index = 0; while (it.value != NULL) { leaf_t *leaf = (leaf_t *)it.value; @@ -619,8 +614,7 @@ void roaring64_bitmap_remove_range_closed(roaring64_bitmap_t *r, uint64_t min, } uint64_t roaring64_bitmap_get_cardinality(const roaring64_bitmap_t *r) { - art_iterator_t it = art_create_iterator(); - art_init_iterator(&r->art, &it, /*first=*/true); + art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); uint64_t cardinality = 0; while (it.value != NULL) { leaf_t *leaf = (leaf_t *)it.value; @@ -677,8 +671,7 @@ bool roaring64_bitmap_is_empty(const roaring64_bitmap_t *r) { } uint64_t roaring64_bitmap_minimum(const roaring64_bitmap_t *r) { - art_iterator_t it = art_create_iterator(); - art_init_iterator(&r->art, &it, /*first=*/true); + art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); if (it.value == NULL) { return UINT64_MAX; } @@ -688,8 +681,7 @@ uint64_t roaring64_bitmap_minimum(const roaring64_bitmap_t *r) { } uint64_t roaring64_bitmap_maximum(const roaring64_bitmap_t *r) { - art_iterator_t it = art_create_iterator(); - art_init_iterator(&r->art, &it, /*first=*/false); + art_iterator_t it = art_init_iterator(&r->art, /*first=*/false); if (it.value == NULL) { return 0; } @@ -699,8 +691,7 @@ uint64_t roaring64_bitmap_maximum(const roaring64_bitmap_t *r) { } bool roaring64_bitmap_run_optimize(roaring64_bitmap_t *r) { - art_iterator_t it = art_create_iterator(); - art_init_iterator(&r->art, &it, /*first=*/true); + art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); bool has_run_container = false; while (it.value != NULL) { leaf_t *leaf = (leaf_t *)it.value; @@ -718,8 +709,7 @@ bool roaring64_bitmap_run_optimize(roaring64_bitmap_t *r) { size_t roaring64_bitmap_size_in_bytes(const roaring64_bitmap_t *r) { size_t size = art_size_in_bytes(&r->art); - art_iterator_t it = art_create_iterator(); - art_init_iterator(&r->art, &it, /*first=*/true); + art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); while (it.value != NULL) { leaf_t *leaf = (leaf_t *)it.value; size += sizeof(leaf_t); @@ -731,10 +721,8 @@ size_t roaring64_bitmap_size_in_bytes(const roaring64_bitmap_t *r) { bool roaring64_bitmap_equals(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { - art_iterator_t it1 = art_create_iterator(); - art_init_iterator(&r1->art, &it1, /*first=*/true); - art_iterator_t it2 = art_create_iterator(); - art_init_iterator(&r2->art, &it2, /*first=*/true); + art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); while (it1.value != NULL && it2.value != NULL) { if (compare_high48(it1.key, it2.key) != 0) { @@ -754,10 +742,8 @@ bool roaring64_bitmap_equals(const roaring64_bitmap_t *r1, bool roaring64_bitmap_is_subset(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { - art_iterator_t it1 = art_create_iterator(); - art_init_iterator(&r1->art, &it1, /*first=*/true); - art_iterator_t it2 = art_create_iterator(); - art_init_iterator(&r2->art, &it2, /*first=*/true); + art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); while (it1.value != NULL) { bool it2_present = it2.value != NULL; @@ -796,10 +782,8 @@ roaring64_bitmap_t *roaring64_bitmap_and(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { roaring64_bitmap_t *result = roaring64_bitmap_create(); - art_iterator_t it1 = art_create_iterator(); - art_init_iterator(&r1->art, &it1, /*first=*/true); - art_iterator_t it2 = art_create_iterator(); - art_init_iterator(&r2->art, &it2, /*first=*/true); + art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); while (it1.value != NULL && it2.value != NULL) { // Cases: @@ -840,10 +824,8 @@ uint64_t roaring64_bitmap_and_cardinality(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { uint64_t result = 0; - art_iterator_t it1 = art_create_iterator(); - art_init_iterator(&r1->art, &it1, /*first=*/true); - art_iterator_t it2 = art_create_iterator(); - art_init_iterator(&r2->art, &it2, /*first=*/true); + art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); while (it1.value != NULL && it2.value != NULL) { // Cases: @@ -877,10 +859,8 @@ void roaring64_bitmap_and_inplace(roaring64_bitmap_t *r1, if (r1 == r2) { return; } - art_iterator_t it1 = art_create_iterator(); - art_init_iterator(&r1->art, &it1, /*first=*/true); - art_iterator_t it2 = art_create_iterator(); - art_init_iterator(&r2->art, &it2, /*first=*/true); + art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); while (it1.value != NULL) { // Cases: @@ -950,10 +930,8 @@ void roaring64_bitmap_and_inplace(roaring64_bitmap_t *r1, bool roaring64_bitmap_intersect(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { bool intersect = false; - art_iterator_t it1 = art_create_iterator(); - art_init_iterator(&r1->art, &it1, /*first=*/true); - art_iterator_t it2 = art_create_iterator(); - art_init_iterator(&r2->art, &it2, /*first=*/true); + art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); while (it1.value != NULL && it2.value != NULL) { // Cases: @@ -992,10 +970,8 @@ roaring64_bitmap_t *roaring64_bitmap_or(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { roaring64_bitmap_t *result = roaring64_bitmap_create(); - art_iterator_t it1 = art_create_iterator(); - art_init_iterator(&r1->art, &it1, /*first=*/true); - art_iterator_t it2 = art_create_iterator(); - art_init_iterator(&r2->art, &it2, /*first=*/true); + art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); while (it1.value != NULL || it2.value != NULL) { bool it1_present = it1.value != NULL; @@ -1052,10 +1028,8 @@ void roaring64_bitmap_or_inplace(roaring64_bitmap_t *r1, if (r1 == r2) { return; } - art_iterator_t it1 = art_create_iterator(); - art_init_iterator(&r1->art, &it1, /*first=*/true); - art_iterator_t it2 = art_create_iterator(); - art_init_iterator(&r2->art, &it2, /*first=*/true); + art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); while (it1.value != NULL || it2.value != NULL) { bool it1_present = it1.value != NULL; @@ -1112,10 +1086,8 @@ roaring64_bitmap_t *roaring64_bitmap_xor(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { roaring64_bitmap_t *result = roaring64_bitmap_create(); - art_iterator_t it1 = art_create_iterator(); - art_init_iterator(&r1->art, &it1, /*first=*/true); - art_iterator_t it2 = art_create_iterator(); - art_init_iterator(&r2->art, &it2, /*first=*/true); + art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); while (it1.value != NULL || it2.value != NULL) { bool it1_present = it1.value != NULL; @@ -1177,10 +1149,8 @@ uint64_t roaring64_bitmap_xor_cardinality(const roaring64_bitmap_t *r1, void roaring64_bitmap_xor_inplace(roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { assert(r1 != r2); - art_iterator_t it1 = art_create_iterator(); - art_init_iterator(&r1->art, &it1, /*first=*/true); - art_iterator_t it2 = art_create_iterator(); - art_init_iterator(&r2->art, &it2, /*first=*/true); + art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); while (it1.value != NULL || it2.value != NULL) { bool it1_present = it1.value != NULL; @@ -1256,10 +1226,8 @@ roaring64_bitmap_t *roaring64_bitmap_andnot(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { roaring64_bitmap_t *result = roaring64_bitmap_create(); - art_iterator_t it1 = art_create_iterator(); - art_init_iterator(&r1->art, &it1, /*first=*/true); - art_iterator_t it2 = art_create_iterator(); - art_init_iterator(&r2->art, &it2, /*first=*/true); + art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); while (it1.value != NULL) { // Cases: @@ -1315,10 +1283,8 @@ uint64_t roaring64_bitmap_andnot_cardinality(const roaring64_bitmap_t *r1, void roaring64_bitmap_andnot_inplace(roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { - art_iterator_t it1 = art_create_iterator(); - art_init_iterator(&r1->art, &it1, /*first=*/true); - art_iterator_t it2 = art_create_iterator(); - art_init_iterator(&r2->art, &it2, /*first=*/true); + art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); while (it1.value != NULL) { // Cases: @@ -1378,8 +1344,7 @@ void roaring64_bitmap_andnot_inplace(roaring64_bitmap_t *r1, bool roaring64_bitmap_iterate(const roaring64_bitmap_t *r, roaring_iterator64 iterator, void *ptr) { - art_iterator_t it = art_create_iterator(); - art_init_iterator(&r->art, &it, /*first=*/true); + art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); while (it.value != NULL) { uint64_t high48 = combine_key(it.key, 0); uint64_t high32 = high48 & 0xFFFFFFFF00000000; diff --git a/tests/art_unit.cpp b/tests/art_unit.cpp index 9abbedde3..ad9bf1cc7 100644 --- a/tests/art_unit.cpp +++ b/tests/art_unit.cpp @@ -210,8 +210,7 @@ DEFINE_TEST(test_art_iterator_next) { art_insert(&art, (art_key_chunk_t*)keys[i], &values[i]); } - art_iterator_t iterator = art_create_iterator(); - art_init_iterator(&art, &iterator, true); + art_iterator_t iterator = art_init_iterator(&art, true); size_t i = 0; do { assert_key_eq(iterator.key, (art_key_chunk_t*)keys[i]); @@ -231,8 +230,7 @@ DEFINE_TEST(test_art_iterator_prev) { art_insert(&art, (art_key_chunk_t*)keys[i], &values[i]); } - art_iterator_t iterator = art_create_iterator(); - art_init_iterator(&art, &iterator, false); + art_iterator_t iterator = art_init_iterator(&art, false); size_t i = keys.size() - 1; do { assert_key_eq(iterator.key, (art_key_chunk_t*)keys[i]); @@ -251,8 +249,7 @@ DEFINE_TEST(test_art_iterator_lower_bound) { art_insert(&art, (art_key_chunk_t*)keys[i], &values[i]); } - art_iterator_t iterator = art_create_iterator(); - art_init_iterator(&art, &iterator, true); + art_iterator_t iterator = art_init_iterator(&art, true); assert_true(art_iterator_lower_bound(&iterator, (art_key_chunk_t*)keys[2])); assert_key_eq(iterator.key, (art_key_chunk_t*)keys[2]); const char* key = "000005"; @@ -344,8 +341,7 @@ DEFINE_TEST(test_art_iterator_erase) { for (size_t i = 0; i < keys.size(); ++i) { art_insert(&art, (art_key_chunk_t*)keys[i], &values[i]); } - art_iterator_t iterator = art_create_iterator(); - art_init_iterator(&art, &iterator, true); + art_iterator_t iterator = art_init_iterator(&art, true); size_t i = 0; do { assert_key_eq(iterator.key, (art_key_chunk_t*)keys[i]); @@ -365,8 +361,7 @@ DEFINE_TEST(test_art_iterator_insert) { std::vector values = {{1}, {2}, {3}, {4}, {5}}; art_t art{NULL}; art_insert(&art, (art_key_chunk_t*)keys[0], &values[0]); - art_iterator_t iterator = art_create_iterator(); - art_init_iterator(&art, &iterator, true); + art_iterator_t iterator = art_init_iterator(&art, true); for (size_t i = 1; i < keys.size(); ++i) { art_iterator_insert(&art, &iterator, (art_key_chunk_t*)keys[i], &values[i]); From 2fa99975d9e5b6d58e49476ea3344e7530812c57 Mon Sep 17 00:00:00 2001 From: Soerian Lieve Date: Thu, 4 Jan 2024 19:38:49 +0100 Subject: [PATCH 10/11] Cleanups to roaring64 --- include/roaring/roaring64.h | 1 - src/roaring64.c | 20 ++++++++++---------- tests/roaring64_unit.cpp | 1 + 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/roaring/roaring64.h b/include/roaring/roaring64.h index 4656ca840..e281bda53 100644 --- a/include/roaring/roaring64.h +++ b/include/roaring/roaring64.h @@ -29,7 +29,6 @@ typedef struct roaring64_leaf_s roaring64_leaf_t; */ typedef struct roaring64_bulk_context_s { uint8_t high_bytes[6]; - uint16_t low_bytes; roaring64_leaf_t *leaf; } roaring64_bulk_context_t; diff --git a/src/roaring64.c b/src/roaring64.c index d2bc9f164..d1c725a98 100644 --- a/src/roaring64.c +++ b/src/roaring64.c @@ -24,14 +24,13 @@ #else // CROARING_IS_BIG_ENDIAN // Gets compiled to bswap or equivalent on most compilers. -#define htobe64(x) \ - (((x & 0x00000000000000FFULL) << 56) | \ - ((x & 0x000000000000FF00ULL) << 40) | \ - ((x & 0x0000000000FF0000ULL) << 24) | \ - ((x & 0x00000000FF000000ULL) << 8) | \ - ((x & 0x000000FF00000000ULL) >> 8) | \ - ((x & 0x0000FF0000000000ULL) >> 24) | \ - ((x & 0x00FF000000000000ULL) >> 40) | \ +#define htobe64(x) \ + (((x & 0x00000000000000FFULL) << 56) | \ + ((x & 0x000000000000FF00ULL) << 40) | \ + ((x & 0x0000000000FF0000ULL) << 24) | \ + ((x & 0x00000000FF000000ULL) << 8) | ((x & 0x000000FF00000000ULL) >> 8) | \ + ((x & 0x0000FF0000000000ULL) >> 24) | \ + ((x & 0x00FF000000000000ULL) >> 40) | \ ((x & 0xFF00000000000000ULL) >> 56)) #endif // CROARING_IS_BIG_ENDIAN @@ -278,7 +277,6 @@ void roaring64_bitmap_add_bulk(roaring64_bitmap_t *r, // differ. context->leaf = containerptr_roaring64_bitmap_add(r, high48, low16, NULL); - context->low_bytes = low16; memcpy(context->high_bytes, high48, ART_KEY_BYTES); } } @@ -537,7 +535,6 @@ void roaring64_bitmap_remove_bulk(roaring64_bitmap_t *r, leaf_t *leaf = (leaf_t *)art_find(art, high48); context->leaf = containerptr_roaring64_bitmap_remove(r, high48, low16, leaf); - context->low_bytes = low16; memcpy(context->high_bytes, high48, ART_KEY_BYTES); } } @@ -627,6 +624,9 @@ uint64_t roaring64_bitmap_get_cardinality(const roaring64_bitmap_t *r) { uint64_t roaring64_bitmap_range_cardinality(const roaring64_bitmap_t *r, uint64_t min, uint64_t max) { + if (min >= max) { + return 0; + } max--; // A closed range is easier to work with. uint64_t cardinality = 0; diff --git a/tests/roaring64_unit.cpp b/tests/roaring64_unit.cpp index d48b2127b..9423ab252 100644 --- a/tests/roaring64_unit.cpp +++ b/tests/roaring64_unit.cpp @@ -384,6 +384,7 @@ DEFINE_TEST(test_range_cardinality) { roaring64_bitmap_add(r, 100002); roaring64_bitmap_add(r, 200000); + assert_true(roaring64_bitmap_range_cardinality(r, 0, 0) == 0); assert_true(roaring64_bitmap_range_cardinality(r, 0, 100000) == 1); assert_true(roaring64_bitmap_range_cardinality(r, 1, 100001) == 1); assert_true(roaring64_bitmap_range_cardinality(r, 0, 200001) == 5); From 764eaab73a2bed9deabc6820522864b8c7b84d5a Mon Sep 17 00:00:00 2001 From: Soerian Lieve Date: Mon, 8 Jan 2024 21:25:08 +0100 Subject: [PATCH 11/11] Remove unused "using namespace" --- include/roaring/roaring64.h | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/include/roaring/roaring64.h b/include/roaring/roaring64.h index e281bda53..2f491a470 100644 --- a/include/roaring/roaring64.h +++ b/include/roaring/roaring64.h @@ -388,19 +388,3 @@ bool roaring64_bitmap_iterate(const roaring64_bitmap_t *r, #endif /* ROARING64_H */ -#ifdef __cplusplus -/** - * Best practices for C++ headers is to avoid polluting global scope. - * But for C compatibility when just `roaring.h` is included building as - * C++, default to global access for the C public API. - * - * BUT when `roaring.hh` is included instead, it sets this flag. That way - * explicit namespacing must be used to get the C functions. - * - * This is outside the include guard so that if you include BOTH headers, - * the order won't matter; you still get the global definitions. - */ -#if !defined(ROARING_API_NOT_IN_GLOBAL_NAMESPACE) -using namespace ::roaring::api; -#endif -#endif