Skip to content

Commit

Permalink
Add a function to do internal validations of the bitmap, intended for…
Browse files Browse the repository at this point in the history
… testing (#493)

* Add a "validate" function, which verifies a bitmap

Verfies that assumptions which should always be true continue to hold:
e.g. containers are in sorted order, array/run containers contain strictly
increasing numbers, bitsets precomputed cardinality is correct, etc.

* Avoid overflow when validating runs which include UINT16_MAX

* Check if size GT allocation size

* Start checking array after the first item

* Correct a grammar
  • Loading branch information
Dr-Emann authored Jul 4, 2023
1 parent cc34c6d commit 31067e3
Show file tree
Hide file tree
Showing 13 changed files with 270 additions and 4 deletions.
2 changes: 2 additions & 0 deletions include/roaring/containers/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,8 @@ void array_container_printf(const array_container_t *v);
void array_container_printf_as_uint32_array(const array_container_t *v,
uint32_t base);

bool array_container_validate(const array_container_t *v, const char **reason);

/**
* Return the serialized size in bytes of a container having cardinality "card".
*/
Expand Down
2 changes: 2 additions & 0 deletions include/roaring/containers/bitset.h
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,8 @@ void bitset_container_printf(const bitset_container_t *v);
void bitset_container_printf_as_uint32_array(const bitset_container_t *v,
uint32_t base);

bool bitset_container_validate(const bitset_container_t *v, const char **reason);

/**
* Return the serialized size in bytes of a container.
*/
Expand Down
3 changes: 3 additions & 0 deletions include/roaring/containers/containers.h
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,9 @@ void container_printf(const container_t *container, uint8_t typecode);
void container_printf_as_uint32_array(const container_t *container,
uint8_t typecode, uint32_t base);

bool container_internal_validate(const container_t *container,
uint8_t typecode, const char **reason);

/**
* Checks whether a container is not empty, requires a typecode
*/
Expand Down
2 changes: 2 additions & 0 deletions include/roaring/containers/run.h
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,8 @@ void run_container_printf(const run_container_t *v);
void run_container_printf_as_uint32_array(const run_container_t *v,
uint32_t base);

bool run_container_validate(const run_container_t *run, const char **reason);

/**
* Return the serialized size in bytes of a container having "num_runs" runs.
*/
Expand Down
8 changes: 4 additions & 4 deletions include/roaring/portability.h
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,7 @@ static inline bool croaring_refcount_dec(croaring_refcount_t *val) {
return is_zero;
}

static inline uint32_t croaring_refcount_get(croaring_refcount_t *val) {
static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) {
return atomic_load_explicit(val, memory_order_relaxed);
}
#elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_CPP
Expand All @@ -472,7 +472,7 @@ static inline bool croaring_refcount_dec(croaring_refcount_t *val) {
return is_zero;
}

static inline uint32_t croaring_refcount_get(croaring_refcount_t *val) {
static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) {
return val->load(std::memory_order_relaxed);
}
#elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_C_WINDOWS
Expand All @@ -492,7 +492,7 @@ static inline bool croaring_refcount_dec(croaring_refcount_t *val) {
return _InterlockedDecrement(val) == 0;
}

static inline uint32_t croaring_refcount_get(croaring_refcount_t *val) {
static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) {
// Per https://learn.microsoft.com/en-us/windows/win32/sync/interlocked-variable-access
// > Simple reads and writes to properly-aligned 32-bit variables are atomic
// > operations. In other words, you will not end up with only one portion
Expand All @@ -513,7 +513,7 @@ static inline bool croaring_refcount_dec(croaring_refcount_t *val) {
return val == 0;
}

static inline uint32_t croaring_refcount_get(croaring_refcount_t *val) {
static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) {
return *val;
}
#else
Expand Down
10 changes: 10 additions & 0 deletions include/roaring/roaring.h
Original file line number Diff line number Diff line change
Expand Up @@ -840,6 +840,16 @@ uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *r);
void roaring_bitmap_statistics(const roaring_bitmap_t *r,
roaring_statistics_t *stat);

/**
* Perform internal consistency checks. Returns true if the bitmap is consistent.
*
* Note that some operations intentionally leave bitmaps in an inconsistent state temporarily,
* for example, `roaring_bitmap_lazy_*` functions, until `roaring_bitmap_repair_after_lazy` is called.
*
* If reason is non-null, it will be set to a string describing the first inconsistency found if any.
*/
bool roaring_bitmap_internal_validate(const roaring_bitmap_t *r, const char **reason);

/*********************
* What follows is code use to iterate through values in a roaring bitmap
Expand Down
40 changes: 40 additions & 0 deletions src/containers/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,46 @@ void array_container_printf_as_uint32_array(const array_container_t *v,
}
}

/*
* Validate the container. Returns true if valid.
*/
bool array_container_validate(const array_container_t *v, const char **reason) {
if (v->capacity < 0) {
*reason = "negative capacity";
return false;
}
if (v->cardinality < 0) {
*reason = "negative cardinality";
return false;
}
if (v->cardinality > v->capacity) {
*reason = "cardinality exceeds capacity";
return false;
}
if (v->cardinality > DEFAULT_MAX_SIZE) {
*reason = "cardinality exceeds DEFAULT_MAX_SIZE";
return false;
}
if (v->cardinality == 0) {
return true;
}

if (v->array == NULL) {
*reason = "NULL array pointer";
return false;
}
uint16_t prev = v->array[0];
for (int i = 1; i < v->cardinality; ++i) {
if (v->array[i] <= prev) {
*reason = "array elements not strictly increasing";
return false;
}
prev = v->array[i];
}

return true;
}

/* Compute the number of runs */
int32_t array_container_number_of_runs(const array_container_t *ac) {
// Can SIMD work here?
Expand Down
20 changes: 20 additions & 0 deletions src/containers/bitset.c
Original file line number Diff line number Diff line change
Expand Up @@ -1000,6 +1000,26 @@ void bitset_container_printf_as_uint32_array(const bitset_container_t * v, uint3
}
}

/*
* Validate the container. Returns true if valid.
*/
bool bitset_container_validate(const bitset_container_t *v, const char **reason) {
if (v->words == NULL) {
*reason = "words is NULL";
return false;
}
if (v->cardinality != bitset_container_compute_cardinality(v)) {
*reason = "cardinality is incorrect";
return false;
}
// Attempt to forcibly load the first and last words, hopefully causing
// a segfault or an address sanitizer error if words is not allocated.
volatile uint64_t *words = v->words;
(void) words[0];
(void) words[BITSET_CONTAINER_SIZE_IN_WORDS - 1];
return true;
}


// TODO: use the fast lower bound, also
int bitset_container_number_of_runs(bitset_container_t *bc) {
Expand Down
37 changes: 37 additions & 0 deletions src/containers/containers.c
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,43 @@ void container_printf_as_uint32_array(
}
}

bool container_internal_validate(const container_t *container,
uint8_t typecode, const char **reason) {
if (container == NULL) {
*reason = "container is NULL";
return false;
}
// Not using container_unwrap_shared because it asserts if shared containers are nested
if (typecode == SHARED_CONTAINER_TYPE) {
const shared_container_t *shared_container = const_CAST_shared(container);
if (croaring_refcount_get(&shared_container->counter) == 0) {
*reason = "shared container has zero refcount";
return false;
}
if (shared_container->typecode == SHARED_CONTAINER_TYPE) {
*reason = "shared container is nested";
return false;
}
if (shared_container->container == NULL) {
*reason = "shared container has NULL container";
return false;
}
container = shared_container->container;
typecode = shared_container->typecode;
}
switch (typecode) {
case BITSET_CONTAINER_TYPE:
return bitset_container_validate(const_CAST_bitset(container), reason);
case ARRAY_CONTAINER_TYPE:
return array_container_validate(const_CAST_array(container), reason);
case RUN_CONTAINER_TYPE:
return run_container_validate(const_CAST_run(container), reason);
default:
*reason = "invalid typecode";
return false;
}
}

extern inline bool container_nonzero_cardinality(
const container_t *c, uint8_t typecode);

Expand Down
48 changes: 48 additions & 0 deletions src/containers/run.c
Original file line number Diff line number Diff line change
Expand Up @@ -680,6 +680,54 @@ void run_container_printf_as_uint32_array(const run_container_t *cont,
}
}

/*
* Validate the container. Returns true if valid.
*/
bool run_container_validate(const run_container_t *run, const char **reason) {
if (run->n_runs < 0) {
*reason = "negative run count";
return false;
}
if (run->capacity < 0) {
*reason = "negative run capacity";
return false;
}
if (run->capacity < run->n_runs) {
*reason = "capacity less than run count";
return false;
}

if (run->n_runs == 0) {
return true;
}
if (run->runs == NULL) {
*reason = "NULL runs";
return false;
}

// Use uint32_t to avoid overflow issues on ranges that contain UINT16_MAX.
uint32_t last_end = 0;
for (int i = 0; i < run->n_runs; ++i) {
uint32_t start = run->runs[i].value;
uint32_t end = start + run->runs[i].length + 1;
if (end <= start) {
*reason = "run start + length overflow";
return false;
}

if (start < last_end) {
*reason = "run start less than last end";
return false;
}
if (start == last_end && last_end != 0) {
*reason = "run start equal to last end, should have combined";
return false;
}
last_end = end;
}
return true;
}

int32_t run_container_write(const run_container_t *container, char *buf) {
uint16_t cast_16 = container->n_runs;
memcpy(buf, &cast_16, sizeof(uint16_t));
Expand Down
70 changes: 70 additions & 0 deletions src/roaring.c
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,76 @@ void roaring_bitmap_statistics(const roaring_bitmap_t *r,
}
}

/*
* Checks that:
* - Array containers are sorted and contain no duplicates
* - Range containers are sorted and contain no overlapping ranges
* - Roaring containers are sorted by key and there are no duplicate keys
* - The correct container type is use for each container (e.g. bitmaps aren't used for small containers)
*/
bool roaring_bitmap_internal_validate(const roaring_bitmap_t *r, const char **reason) {
const char *reason_local;
if (reason == NULL) {
// Always allow assigning through *reason
reason = &reason_local;
}
*reason = NULL;
const roaring_array_t *ra = &r->high_low_container;
if (ra->size < 0) {
*reason = "negative size";
return false;
}
if (ra->allocation_size < 0) {
*reason = "negative allocation size";
return false;
}
if (ra->size > ra->allocation_size) {
*reason = "more containers than allocated space";
return false;
}
if (ra->flags & ~(ROARING_FLAG_COW | ROARING_FLAG_FROZEN)) {
*reason = "invalid flags";
return false;
}
if (ra->size == 0) {
return true;
}

if (ra->keys == NULL) {
*reason = "keys is NULL";
return false;
}
if (ra->typecodes == NULL) {
*reason = "typecodes is NULL";
return false;
}
if (ra->containers == NULL) {
*reason = "containers is NULL";
return false;
}

uint32_t prev_key = ra->keys[0];
for (int32_t i = 1; i < ra->size; ++i) {
if (ra->keys[i] <= prev_key) {
*reason = "keys not strictly increasing";
return false;
}
prev_key = ra->keys[i];
}

for (int32_t i = 0; i < ra->size; ++i) {
if (!container_internal_validate(ra->containers[i], ra->typecodes[i], reason)) {
// reason should already be set
if (*reason == NULL) {
*reason = "container failed to validate but no reason given";
}
return false;
}
}

return true;
}

roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r) {
roaring_bitmap_t *ans =
(roaring_bitmap_t *)roaring_malloc(sizeof(roaring_bitmap_t));
Expand Down
6 changes: 6 additions & 0 deletions tests/test.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@

#define DESCRIBE_TEST fprintf(stderr, "--- %s\n", __func__)

#define assert_bitmap_validate(b) do { \
const char *internal_reason_buf = NULL; \
if (!roaring_bitmap_internal_validate((b), &internal_reason_buf)) { \
fail_msg("internal validation failed: %s", internal_reason_buf); \
} \
} while (0)

// The "cmocka" test functions are supposed to look like:
//
Expand Down
Loading

0 comments on commit 31067e3

Please sign in to comment.