diff --git a/include/roaring/containers/array.h b/include/roaring/containers/array.h index d06a5fe83..3070d6e33 100644 --- a/include/roaring/containers/array.h +++ b/include/roaring/containers/array.h @@ -158,6 +158,8 @@ void array_container_printf(const array_container_t *v); void array_container_printf_as_uint32_array(const array_container_t *v, uint32_t base); +bool array_container_validate(const array_container_t *v, const char **reason); + /** * Return the serialized size in bytes of a container having cardinality "card". */ diff --git a/include/roaring/containers/bitset.h b/include/roaring/containers/bitset.h index f71b7a960..a27e715ae 100644 --- a/include/roaring/containers/bitset.h +++ b/include/roaring/containers/bitset.h @@ -413,6 +413,8 @@ void bitset_container_printf(const bitset_container_t *v); void bitset_container_printf_as_uint32_array(const bitset_container_t *v, uint32_t base); +bool bitset_container_validate(const bitset_container_t *v, const char **reason); + /** * Return the serialized size in bytes of a container. */ diff --git a/include/roaring/containers/containers.h b/include/roaring/containers/containers.h index 3588fc49e..d011cc02e 100644 --- a/include/roaring/containers/containers.h +++ b/include/roaring/containers/containers.h @@ -435,6 +435,9 @@ void container_printf(const container_t *container, uint8_t typecode); void container_printf_as_uint32_array(const container_t *container, uint8_t typecode, uint32_t base); +bool container_internal_validate(const container_t *container, + uint8_t typecode, const char **reason); + /** * Checks whether a container is not empty, requires a typecode */ diff --git a/include/roaring/containers/run.h b/include/roaring/containers/run.h index 85deb5767..f24a579a3 100644 --- a/include/roaring/containers/run.h +++ b/include/roaring/containers/run.h @@ -435,6 +435,8 @@ void run_container_printf(const run_container_t *v); void run_container_printf_as_uint32_array(const run_container_t *v, uint32_t base); +bool run_container_validate(const run_container_t *run, const char **reason); + /** * Return the serialized size in bytes of a container having "num_runs" runs. */ diff --git a/include/roaring/portability.h b/include/roaring/portability.h index b787f1b15..162d49016 100644 --- a/include/roaring/portability.h +++ b/include/roaring/portability.h @@ -452,7 +452,7 @@ static inline bool croaring_refcount_dec(croaring_refcount_t *val) { return is_zero; } -static inline uint32_t croaring_refcount_get(croaring_refcount_t *val) { +static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) { return atomic_load_explicit(val, memory_order_relaxed); } #elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_CPP @@ -472,7 +472,7 @@ static inline bool croaring_refcount_dec(croaring_refcount_t *val) { return is_zero; } -static inline uint32_t croaring_refcount_get(croaring_refcount_t *val) { +static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) { return val->load(std::memory_order_relaxed); } #elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_C_WINDOWS @@ -492,7 +492,7 @@ static inline bool croaring_refcount_dec(croaring_refcount_t *val) { return _InterlockedDecrement(val) == 0; } -static inline uint32_t croaring_refcount_get(croaring_refcount_t *val) { +static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) { // Per https://learn.microsoft.com/en-us/windows/win32/sync/interlocked-variable-access // > Simple reads and writes to properly-aligned 32-bit variables are atomic // > operations. In other words, you will not end up with only one portion @@ -513,7 +513,7 @@ static inline bool croaring_refcount_dec(croaring_refcount_t *val) { return val == 0; } -static inline uint32_t croaring_refcount_get(croaring_refcount_t *val) { +static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) { return *val; } #else diff --git a/include/roaring/roaring.h b/include/roaring/roaring.h index fdf13823a..6bfcd859a 100644 --- a/include/roaring/roaring.h +++ b/include/roaring/roaring.h @@ -840,6 +840,16 @@ uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *r); void roaring_bitmap_statistics(const roaring_bitmap_t *r, roaring_statistics_t *stat); +/** + * Perform internal consistency checks. Returns true if the bitmap is consistent. + * + * Note that some operations intentionally leave bitmaps in an inconsistent state temporarily, + * for example, `roaring_bitmap_lazy_*` functions, until `roaring_bitmap_repair_after_lazy` is called. + * + * If reason is non-null, it will be set to a string describing the first inconsistency found if any. + */ +bool roaring_bitmap_internal_validate(const roaring_bitmap_t *r, const char **reason); + /********************* * What follows is code use to iterate through values in a roaring bitmap diff --git a/src/containers/array.c b/src/containers/array.c index 6c0ffd2c7..e74b4f62e 100644 --- a/src/containers/array.c +++ b/src/containers/array.c @@ -444,6 +444,46 @@ void array_container_printf_as_uint32_array(const array_container_t *v, } } +/* + * Validate the container. Returns true if valid. + */ +bool array_container_validate(const array_container_t *v, const char **reason) { + if (v->capacity < 0) { + *reason = "negative capacity"; + return false; + } + if (v->cardinality < 0) { + *reason = "negative cardinality"; + return false; + } + if (v->cardinality > v->capacity) { + *reason = "cardinality exceeds capacity"; + return false; + } + if (v->cardinality > DEFAULT_MAX_SIZE) { + *reason = "cardinality exceeds DEFAULT_MAX_SIZE"; + return false; + } + if (v->cardinality == 0) { + return true; + } + + if (v->array == NULL) { + *reason = "NULL array pointer"; + return false; + } + uint16_t prev = v->array[0]; + for (int i = 1; i < v->cardinality; ++i) { + if (v->array[i] <= prev) { + *reason = "array elements not strictly increasing"; + return false; + } + prev = v->array[i]; + } + + return true; +} + /* Compute the number of runs */ int32_t array_container_number_of_runs(const array_container_t *ac) { // Can SIMD work here? diff --git a/src/containers/bitset.c b/src/containers/bitset.c index 456d807ce..722eda1e3 100644 --- a/src/containers/bitset.c +++ b/src/containers/bitset.c @@ -1000,6 +1000,26 @@ void bitset_container_printf_as_uint32_array(const bitset_container_t * v, uint3 } } +/* + * Validate the container. Returns true if valid. + */ +bool bitset_container_validate(const bitset_container_t *v, const char **reason) { + if (v->words == NULL) { + *reason = "words is NULL"; + return false; + } + if (v->cardinality != bitset_container_compute_cardinality(v)) { + *reason = "cardinality is incorrect"; + return false; + } + // Attempt to forcibly load the first and last words, hopefully causing + // a segfault or an address sanitizer error if words is not allocated. + volatile uint64_t *words = v->words; + (void) words[0]; + (void) words[BITSET_CONTAINER_SIZE_IN_WORDS - 1]; + return true; +} + // TODO: use the fast lower bound, also int bitset_container_number_of_runs(bitset_container_t *bc) { diff --git a/src/containers/containers.c b/src/containers/containers.c index 9bbd758c8..78a72db58 100644 --- a/src/containers/containers.c +++ b/src/containers/containers.c @@ -95,6 +95,43 @@ void container_printf_as_uint32_array( } } +bool container_internal_validate(const container_t *container, + uint8_t typecode, const char **reason) { + if (container == NULL) { + *reason = "container is NULL"; + return false; + } + // Not using container_unwrap_shared because it asserts if shared containers are nested + if (typecode == SHARED_CONTAINER_TYPE) { + const shared_container_t *shared_container = const_CAST_shared(container); + if (croaring_refcount_get(&shared_container->counter) == 0) { + *reason = "shared container has zero refcount"; + return false; + } + if (shared_container->typecode == SHARED_CONTAINER_TYPE) { + *reason = "shared container is nested"; + return false; + } + if (shared_container->container == NULL) { + *reason = "shared container has NULL container"; + return false; + } + container = shared_container->container; + typecode = shared_container->typecode; + } + switch (typecode) { + case BITSET_CONTAINER_TYPE: + return bitset_container_validate(const_CAST_bitset(container), reason); + case ARRAY_CONTAINER_TYPE: + return array_container_validate(const_CAST_array(container), reason); + case RUN_CONTAINER_TYPE: + return run_container_validate(const_CAST_run(container), reason); + default: + *reason = "invalid typecode"; + return false; + } +} + extern inline bool container_nonzero_cardinality( const container_t *c, uint8_t typecode); diff --git a/src/containers/run.c b/src/containers/run.c index 31203a64d..248432cb3 100644 --- a/src/containers/run.c +++ b/src/containers/run.c @@ -680,6 +680,54 @@ void run_container_printf_as_uint32_array(const run_container_t *cont, } } +/* + * Validate the container. Returns true if valid. + */ +bool run_container_validate(const run_container_t *run, const char **reason) { + if (run->n_runs < 0) { + *reason = "negative run count"; + return false; + } + if (run->capacity < 0) { + *reason = "negative run capacity"; + return false; + } + if (run->capacity < run->n_runs) { + *reason = "capacity less than run count"; + return false; + } + + if (run->n_runs == 0) { + return true; + } + if (run->runs == NULL) { + *reason = "NULL runs"; + return false; + } + + // Use uint32_t to avoid overflow issues on ranges that contain UINT16_MAX. + uint32_t last_end = 0; + for (int i = 0; i < run->n_runs; ++i) { + uint32_t start = run->runs[i].value; + uint32_t end = start + run->runs[i].length + 1; + if (end <= start) { + *reason = "run start + length overflow"; + return false; + } + + if (start < last_end) { + *reason = "run start less than last end"; + return false; + } + if (start == last_end && last_end != 0) { + *reason = "run start equal to last end, should have combined"; + return false; + } + last_end = end; + } + return true; +} + int32_t run_container_write(const run_container_t *container, char *buf) { uint16_t cast_16 = container->n_runs; memcpy(buf, &cast_16, sizeof(uint16_t)); diff --git a/src/roaring.c b/src/roaring.c index 1d8c5161d..843d0ae98 100644 --- a/src/roaring.c +++ b/src/roaring.c @@ -425,6 +425,76 @@ void roaring_bitmap_statistics(const roaring_bitmap_t *r, } } +/* + * Checks that: + * - Array containers are sorted and contain no duplicates + * - Range containers are sorted and contain no overlapping ranges + * - Roaring containers are sorted by key and there are no duplicate keys + * - The correct container type is use for each container (e.g. bitmaps aren't used for small containers) + */ +bool roaring_bitmap_internal_validate(const roaring_bitmap_t *r, const char **reason) { + const char *reason_local; + if (reason == NULL) { + // Always allow assigning through *reason + reason = &reason_local; + } + *reason = NULL; + const roaring_array_t *ra = &r->high_low_container; + if (ra->size < 0) { + *reason = "negative size"; + return false; + } + if (ra->allocation_size < 0) { + *reason = "negative allocation size"; + return false; + } + if (ra->size > ra->allocation_size) { + *reason = "more containers than allocated space"; + return false; + } + if (ra->flags & ~(ROARING_FLAG_COW | ROARING_FLAG_FROZEN)) { + *reason = "invalid flags"; + return false; + } + if (ra->size == 0) { + return true; + } + + if (ra->keys == NULL) { + *reason = "keys is NULL"; + return false; + } + if (ra->typecodes == NULL) { + *reason = "typecodes is NULL"; + return false; + } + if (ra->containers == NULL) { + *reason = "containers is NULL"; + return false; + } + + uint32_t prev_key = ra->keys[0]; + for (int32_t i = 1; i < ra->size; ++i) { + if (ra->keys[i] <= prev_key) { + *reason = "keys not strictly increasing"; + return false; + } + prev_key = ra->keys[i]; + } + + for (int32_t i = 0; i < ra->size; ++i) { + if (!container_internal_validate(ra->containers[i], ra->typecodes[i], reason)) { + // reason should already be set + if (*reason == NULL) { + *reason = "container failed to validate but no reason given"; + } + return false; + } + } + + return true; +} + roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r) { roaring_bitmap_t *ans = (roaring_bitmap_t *)roaring_malloc(sizeof(roaring_bitmap_t)); diff --git a/tests/test.h b/tests/test.h index f4d1fe4b0..a71d94c4e 100644 --- a/tests/test.h +++ b/tests/test.h @@ -27,6 +27,12 @@ #define DESCRIBE_TEST fprintf(stderr, "--- %s\n", __func__) +#define assert_bitmap_validate(b) do { \ + const char *internal_reason_buf = NULL; \ + if (!roaring_bitmap_internal_validate((b), &internal_reason_buf)) { \ + fail_msg("internal validation failed: %s", internal_reason_buf); \ + } \ + } while (0) // The "cmocka" test functions are supposed to look like: // diff --git a/tests/toplevel_unit.c b/tests/toplevel_unit.c index 274a20bbb..4d50fc51d 100644 --- a/tests/toplevel_unit.c +++ b/tests/toplevel_unit.c @@ -692,11 +692,13 @@ void test_example(bool copy_on_write) { // create a new empty bitmap roaring_bitmap_t *r1 = roaring_bitmap_create(); roaring_bitmap_set_copy_on_write(r1, copy_on_write); + assert_bitmap_validate(r1); assert_non_null(r1); // then we can add values for (uint32_t i = 100; i < 1000; i++) { roaring_bitmap_add(r1, i); + assert_bitmap_validate(r1); } // check whether a value is contained @@ -710,6 +712,7 @@ void test_example(bool copy_on_write) { // run_optimize uint32_t size = roaring_bitmap_portable_size_in_bytes(r1); roaring_bitmap_run_optimize(r1); + assert_bitmap_validate(r1); uint32_t compact_size = roaring_bitmap_portable_size_in_bytes(r1); printf("size before run optimize %d bytes, and after %d bytes\n", size, @@ -717,6 +720,7 @@ void test_example(bool copy_on_write) { // create a new bitmap with varargs roaring_bitmap_t *r2 = roaring_bitmap_of(5, 1, 2, 3, 5, 6); + assert_bitmap_validate(r2); assert_non_null(r2); roaring_bitmap_printf(r2); @@ -725,6 +729,7 @@ void test_example(bool copy_on_write) { const uint32_t values[] = {2, 3, 4}; roaring_bitmap_t *r3 = roaring_bitmap_of_ptr(3, values); roaring_bitmap_set_copy_on_write(r3, copy_on_write); + assert_bitmap_validate(r3); // we can also go in reverse and go from arrays to bitmaps uint64_t card1 = roaring_bitmap_get_cardinality(r1); @@ -742,6 +747,7 @@ void test_example(bool copy_on_write) { roaring_bitmap_t *r1f = roaring_bitmap_of_ptr(card1, arr1); + assert_bitmap_validate(r1f); free(arr1); assert_non_null(r1f); @@ -752,12 +758,14 @@ void test_example(bool copy_on_write) { // we can copy and compare bitmaps roaring_bitmap_t *z = roaring_bitmap_copy(r3); roaring_bitmap_set_copy_on_write(z, copy_on_write); + assert_bitmap_validate(z); assert_true(roaring_bitmap_equals(r3, z)); roaring_bitmap_free(z); // we can compute union two-by-two roaring_bitmap_t *r1_2_3 = roaring_bitmap_or(r1, r2); + assert_bitmap_validate(r1_2_3); assert_true(roaring_bitmap_get_cardinality(r1_2_3) == roaring_bitmap_or_cardinality(r1, r2)); @@ -767,9 +775,11 @@ void test_example(bool copy_on_write) { // we can compute a big union const roaring_bitmap_t *allmybitmaps[] = {r1, r2, r3}; roaring_bitmap_t *bigunion = roaring_bitmap_or_many(3, allmybitmaps); + assert_bitmap_validate(bigunion); assert_true(roaring_bitmap_equals(r1_2_3, bigunion)); roaring_bitmap_t *bigunionheap = roaring_bitmap_or_many_heap(3, allmybitmaps); + assert_bitmap_validate(bigunionheap); assert_true(roaring_bitmap_equals(r1_2_3, bigunionheap)); roaring_bitmap_free(r1_2_3); roaring_bitmap_free(bigunion); @@ -778,11 +788,13 @@ void test_example(bool copy_on_write) { // we can compute xor two-by-two roaring_bitmap_t *rx1_2_3 = roaring_bitmap_xor(r1, r2); roaring_bitmap_set_copy_on_write(rx1_2_3, copy_on_write); + assert_bitmap_validate(rx1_2_3); roaring_bitmap_xor_inplace(rx1_2_3, r3); // we can compute a big xor const roaring_bitmap_t *allmybitmaps_x[] = {r1, r2, r3}; roaring_bitmap_t *bigxor = roaring_bitmap_xor_many(3, allmybitmaps_x); + assert_bitmap_validate(bigxor); assert_true(roaring_bitmap_equals(rx1_2_3, bigxor)); roaring_bitmap_free(rx1_2_3); @@ -790,6 +802,7 @@ void test_example(bool copy_on_write) { // we can compute intersection two-by-two roaring_bitmap_t *i1_2 = roaring_bitmap_and(r1, r2); + assert_bitmap_validate(i1_2); assert_true(roaring_bitmap_get_cardinality(i1_2) == roaring_bitmap_and_cardinality(r1, r2)); @@ -801,6 +814,7 @@ void test_example(bool copy_on_write) { size_t actualsize = roaring_bitmap_portable_serialize(r1, serializedbytes); assert_int_equal(actualsize, expectedsize); roaring_bitmap_t *t = roaring_bitmap_portable_deserialize(serializedbytes); + assert_bitmap_validate(t); assert_true(roaring_bitmap_equals(r1, t)); roaring_bitmap_free(t); // we can also check whether there is a bitmap at a memory location without reading it @@ -808,6 +822,7 @@ void test_example(bool copy_on_write) { assert_true(sizeofbitmap == expectedsize); // sizeofbitmap would be zero if no bitmap were found // we can also read the bitmap "safely" by specifying a byte size limit: t = roaring_bitmap_portable_deserialize_safe(serializedbytes,expectedsize); + assert_bitmap_validate(t); assert_true(roaring_bitmap_equals(r1, t)); // what we recover is equal roaring_bitmap_free(t); free(serializedbytes); @@ -872,6 +887,7 @@ void test_uint32_iterator(bool run) { roaring_bitmap_add(r1, i); } if(run) roaring_bitmap_run_optimize(r1); + assert_bitmap_validate(r1); roaring_uint32_iterator_t *iter = roaring_create_iterator(r1); for (uint32_t i = 0; i < 66000; i += 3) { assert_true(iter->has_value); @@ -1020,6 +1036,7 @@ DEFINE_TEST(test_addremove) { for (uint32_t value = 33057; value < 147849; value += 8) { roaring_bitmap_remove(bm, value); } + assert_bitmap_validate(bm); assert_true(roaring_bitmap_is_empty(bm)); roaring_bitmap_free(bm); } @@ -1033,6 +1050,7 @@ DEFINE_TEST(test_addremove_bulk) { for (uint32_t value = 33057; value < 147849; value += 8) { assert_true(roaring_bitmap_remove_checked(bm, value)); } + assert_bitmap_validate(bm); assert_true(roaring_bitmap_is_empty(bm)); roaring_bitmap_free(bm); } @@ -1046,6 +1064,7 @@ DEFINE_TEST(test_addremoverun) { for (uint32_t value = 33057; value < 147849; value += 8) { roaring_bitmap_remove(bm, value); } + assert_bitmap_validate(bm); assert_true(roaring_bitmap_is_empty(bm)); roaring_bitmap_free(bm); } @@ -1081,6 +1100,8 @@ bool check_bitmap_from_range(uint32_t min, uint64_t max, uint32_t step) { for (uint32_t value = min; value < max; value += step) { roaring_bitmap_add(expected, value); } + assert_bitmap_validate(result); + assert_bitmap_validate(expected); bool is_equal = roaring_bitmap_equals(expected, result); if (!is_equal) { fprintf(stderr, "[ERROR] check_bitmap_from_range(%u, %u, %u)\n", @@ -1096,6 +1117,8 @@ DEFINE_TEST(test_silly_range) { check_bitmap_from_range(0, 2, 1); roaring_bitmap_t *bm1 = roaring_bitmap_from_range(0, 1, 1); roaring_bitmap_t *bm2 = roaring_bitmap_from_range(0, 2, 1); + assert_bitmap_validate(bm1); + assert_bitmap_validate(bm2); assert_false(roaring_bitmap_equals(bm1, bm2)); roaring_bitmap_free(bm1); roaring_bitmap_free(bm2); @@ -1103,6 +1126,7 @@ DEFINE_TEST(test_silly_range) { DEFINE_TEST(test_adversarial_range) { roaring_bitmap_t *bm1 = roaring_bitmap_from_range(0, UINT64_C(0x100000000), 1); + assert_bitmap_validate(bm1); assert_true(roaring_bitmap_get_cardinality(bm1) == UINT64_C(0x100000000)); roaring_bitmap_free(bm1); } @@ -1152,6 +1176,7 @@ DEFINE_TEST(test_bitmap_from_range) { DEFINE_TEST(test_printf) { roaring_bitmap_t *r1 = roaring_bitmap_of(8, 1, 2, 3, 100, 1000, 10000, 1000000, 20000000); + assert_bitmap_validate(r1); assert_non_null(r1); roaring_bitmap_printf(r1); roaring_bitmap_free(r1); @@ -1178,6 +1203,7 @@ DEFINE_TEST(test_printf_withrun) { for (int i = 100, top_val = 200; i < top_val; i++) roaring_bitmap_add(r1, i); roaring_bitmap_run_optimize(r1); + assert_bitmap_validate(r1); roaring_bitmap_printf(r1); // does it crash? roaring_bitmap_free(r1); printf("\n");