diff --git a/include/roaring/containers/containers.h b/include/roaring/containers/containers.h index 803e09fef..da2c2412b 100644 --- a/include/roaring/containers/containers.h +++ b/include/roaring/containers/containers.h @@ -2521,6 +2521,59 @@ static inline container_t *container_remove_range( } } +#ifdef __cplusplus +using api::roaring_container_iterator_t; +#endif + +/** + * Initializes the iterator at the first entry in the container. + */ +roaring_container_iterator_t container_init_iterator(const container_t *c, + uint8_t typecode, + uint16_t *value); + +/** + * Initializes the iterator at the last entry in the container. + */ +roaring_container_iterator_t container_init_iterator_last(const container_t *c, + uint8_t typecode, + uint16_t *value); + +/** + * Moves the iterator to the next entry. Returns true and sets `value` if a + * value is present. + */ +bool container_iterator_next(const container_t *c, uint8_t typecode, + roaring_container_iterator_t *it, uint16_t *value); + +/** + * Moves the iterator to the previous entry. Returns true and sets `value` if a + * value is present. + */ +bool container_iterator_prev(const container_t *c, uint8_t typecode, + roaring_container_iterator_t *it, uint16_t *value); + +/** + * Moves the iterator to the smallest entry that is greater than or equal to + * `val`. Returns true and sets `value_out` if a value is present. `value_out` + * should be initialized to a value. + */ +bool container_iterator_lower_bound(const container_t *c, uint8_t typecode, + roaring_container_iterator_t *it, + uint16_t *value_out, uint16_t val); + +/** + * Reads up to `count` entries from the container, and writes them into `buf` + * as `high16 | entry`. Returns true and sets `value_out` if a value is present + * after reading the entries. Sets `consumed` to the number of values read. + * `count` should be greater than zero. + */ +bool container_iterator_read_into_uint32(const container_t *c, uint8_t typecode, + roaring_container_iterator_t *it, + uint32_t high16, uint32_t *buf, + uint32_t count, uint32_t *consumed, + uint16_t *value_out); + #ifdef __cplusplus } } } // extern "C" { namespace roaring { namespace internal { #endif diff --git a/include/roaring/roaring.h b/include/roaring/roaring.h index d32a08b24..6d9bcd9bf 100644 --- a/include/roaring/roaring.h +++ b/include/roaring/roaring.h @@ -879,25 +879,21 @@ Obviously, if you modify the underlying bitmap, the iterator becomes invalid. So don't. */ +/** + * A struct used to keep iterator state. Users should only access + * `current_value` and `has_value`, the rest of the type should be treated as + * opaque. + */ typedef struct roaring_uint32_iterator_s { - const roaring_bitmap_t *parent; // owner - int32_t container_index; // point to the current container index - int32_t in_container_index; // for bitset and array container, this is out - // index - int32_t run_index; // for run container, this points at the run + const roaring_bitmap_t *parent; // Owner + const ROARING_CONTAINER_T *container; // Current container + uint8_t typecode; // Typecode of current container + int32_t container_index; // Current container index + uint32_t highbits; // High 16 bits of the current value + roaring_container_iterator_t container_it; uint32_t current_value; bool has_value; - - const ROARING_CONTAINER_T - *container; // should be: - // parent->high_low_container.containers[container_index]; - uint8_t typecode; // should be: - // parent->high_low_container.typecodes[container_index]; - uint32_t highbits; // should be: - // parent->high_low_container.keys[container_index]) << - // 16; - } roaring_uint32_iterator_t; /** @@ -927,17 +923,25 @@ void roaring_init_iterator_last(const roaring_bitmap_t *r, roaring_uint32_iterator_t *roaring_create_iterator(const roaring_bitmap_t *r); /** -* Advance the iterator. If there is a new value, then `it->has_value` is true. -* The new value is in `it->current_value`. Values are traversed in increasing -* orders. For convenience, returns `it->has_value`. -*/ + * Advance the iterator. If there is a new value, then `it->has_value` is true. + * The new value is in `it->current_value`. Values are traversed in increasing + * orders. For convenience, returns `it->has_value`. + * + * Once `it->has_value` is false, `roaring_advance_uint32_iterator` should not + * be called on the iterator again. Calling `roaring_previous_uint32_iterator` + * is allowed. + */ bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it); /** -* Decrement the iterator. If there's a new value, then `it->has_value` is true. -* The new value is in `it->current_value`. Values are traversed in decreasing -* order. For convenience, returns `it->has_value`. -*/ + * Decrement the iterator. If there's a new value, then `it->has_value` is true. + * The new value is in `it->current_value`. Values are traversed in decreasing + * order. For convenience, returns `it->has_value`. + * + * Once `it->has_value` is false, `roaring_previous_uint32_iterator` should not + * be called on the iterator again. Calling `roaring_advance_uint32_iterator` is + * allowed. + */ bool roaring_previous_uint32_iterator(roaring_uint32_iterator_t *it); /** diff --git a/include/roaring/roaring_types.h b/include/roaring/roaring_types.h index 44000e511..7c007d151 100644 --- a/include/roaring/roaring_types.h +++ b/include/roaring/roaring_types.h @@ -97,6 +97,15 @@ typedef struct roaring_statistics_s { // and n_values_arrays, n_values_rle, n_values_bitmap } roaring_statistics_t; +/** + * Roaring-internal type used to iterate within a roaring container. + */ +typedef struct roaring_container_iterator_s { + // For bitset and array containers this is the index of the bit / entry. + // For run containers this points at the run. + int32_t index; +} roaring_container_iterator_t; + #ifdef __cplusplus } } } // extern "C" { namespace roaring { namespace api { #endif diff --git a/src/containers/containers.c b/src/containers/containers.c index 78a72db58..2dc3b74d1 100644 --- a/src/containers/containers.c +++ b/src/containers/containers.c @@ -6,6 +6,10 @@ extern "C" { namespace roaring { namespace internal { #endif +static inline uint32_t minimum_uint32(uint32_t a, uint32_t b) { + return (a < b) ? a : b; +} + extern inline const container_t *container_unwrap_shared( const container_t *candidate_shared_container, uint8_t *type); @@ -289,6 +293,335 @@ extern inline container_t *container_andnot( const container_t *c2, uint8_t type2, uint8_t *result_type); +roaring_container_iterator_t container_init_iterator(const container_t *c, + uint8_t typecode, + uint16_t *value) { + switch (typecode) { + case BITSET_CONTAINER_TYPE: { + const bitset_container_t *bc = const_CAST_bitset(c); + uint32_t wordindex = 0; + uint64_t word; + while ((word = bc->words[wordindex]) == 0) { + wordindex++; + } + // word is non-zero + int32_t index = wordindex * 64 + roaring_trailing_zeroes(word); + *value = index; + return (roaring_container_iterator_t){ + .index = index, + }; + } + case ARRAY_CONTAINER_TYPE: { + const array_container_t *ac = const_CAST_array(c); + *value = ac->array[0]; + return (roaring_container_iterator_t){ + .index = 0, + }; + } + case RUN_CONTAINER_TYPE: { + const run_container_t *rc = const_CAST_run(c); + *value = rc->runs[0].value; + return (roaring_container_iterator_t){ + .index = 0, + }; + } + default: + assert(false); + roaring_unreachable; + return (roaring_container_iterator_t){0}; + } +} + +roaring_container_iterator_t container_init_iterator_last(const container_t *c, + uint8_t typecode, + uint16_t *value) { + switch (typecode) { + case BITSET_CONTAINER_TYPE: { + const bitset_container_t *bc = const_CAST_bitset(c); + uint32_t wordindex = BITSET_CONTAINER_SIZE_IN_WORDS - 1; + uint64_t word; + while ((word = bc->words[wordindex]) == 0) { + wordindex--; + } + // word is non-zero + int32_t index = + wordindex * 64 + (63 - roaring_leading_zeroes(word)); + *value = index; + return (roaring_container_iterator_t){ + .index = index, + }; + } + case ARRAY_CONTAINER_TYPE: { + const array_container_t *ac = const_CAST_array(c); + int32_t index = ac->cardinality - 1; + *value = ac->array[index]; + return (roaring_container_iterator_t){ + .index = index, + }; + } + case RUN_CONTAINER_TYPE: { + const run_container_t *rc = const_CAST_run(c); + int32_t run_index = rc->n_runs - 1; + const rle16_t *last_run = &rc->runs[run_index]; + *value = last_run->value + last_run->length; + return (roaring_container_iterator_t){ + .index = run_index, + }; + } + default: + assert(false); + roaring_unreachable; + return (roaring_container_iterator_t){0}; + } +} + +bool container_iterator_next(const container_t *c, uint8_t typecode, + roaring_container_iterator_t *it, + uint16_t *value) { + switch (typecode) { + case BITSET_CONTAINER_TYPE: { + const bitset_container_t *bc = const_CAST_bitset(c); + it->index++; + + uint32_t wordindex = it->index / 64; + if (wordindex >= BITSET_CONTAINER_SIZE_IN_WORDS) { + return false; + } + + uint64_t word = + bc->words[wordindex] & (UINT64_MAX << (it->index % 64)); + // next part could be optimized/simplified + while (word == 0 && + (wordindex + 1 < BITSET_CONTAINER_SIZE_IN_WORDS)) { + wordindex++; + word = bc->words[wordindex]; + } + if (word != 0) { + it->index = wordindex * 64 + roaring_trailing_zeroes(word); + *value = it->index; + return true; + } + return false; + } + case ARRAY_CONTAINER_TYPE: { + const array_container_t *ac = const_CAST_array(c); + it->index++; + if (it->index < ac->cardinality) { + *value = ac->array[it->index]; + return true; + } + return false; + } + case RUN_CONTAINER_TYPE: { + if (*value == UINT16_MAX) { // Avoid overflow to zero + return false; + } + + const run_container_t *rc = const_CAST_run(c); + uint32_t limit = + rc->runs[it->index].value + rc->runs[it->index].length; + if (*value < limit) { + (*value)++; + return true; + } + + it->index++; + if (it->index < rc->n_runs) { + *value = rc->runs[it->index].value; + return true; + } + return false; + } + default: + assert(false); + roaring_unreachable; + return false; + } +} + +bool container_iterator_prev(const container_t *c, uint8_t typecode, + roaring_container_iterator_t *it, + uint16_t *value) { + switch (typecode) { + case BITSET_CONTAINER_TYPE: { + if (--it->index < 0) { + return false; + } + + const bitset_container_t *bc = const_CAST_bitset(c); + int32_t wordindex = it->index / 64; + uint64_t word = + bc->words[wordindex] & (UINT64_MAX >> (63 - (it->index % 64))); + + while (word == 0 && --wordindex >= 0) { + word = bc->words[wordindex]; + } + if (word == 0) { + return false; + } + + it->index = (wordindex * 64) + (63 - roaring_leading_zeroes(word)); + *value = it->index; + return true; + } + case ARRAY_CONTAINER_TYPE: { + if (--it->index < 0) { + return false; + } + const array_container_t *ac = const_CAST_array(c); + *value = ac->array[it->index]; + return true; + } + case RUN_CONTAINER_TYPE: { + if (*value == 0) { + return false; + } + + const run_container_t *rc = const_CAST_run(c); + (*value)--; + if (*value >= rc->runs[it->index].value) { + return true; + } + + if (--it->index < 0) { + return false; + } + + *value = rc->runs[it->index].value + rc->runs[it->index].length; + return true; + } + default: + assert(false); + roaring_unreachable; + return false; + } +} + +bool container_iterator_lower_bound(const container_t *c, uint8_t typecode, + roaring_container_iterator_t *it, + uint16_t *value_out, uint16_t val) { + if (val > container_maximum(c, typecode)) { + return false; + } + switch (typecode) { + case BITSET_CONTAINER_TYPE: { + const bitset_container_t *bc = const_CAST_bitset(c); + it->index = bitset_container_index_equalorlarger(bc, val); + *value_out = it->index; + return true; + } + case ARRAY_CONTAINER_TYPE: { + const array_container_t *ac = const_CAST_array(c); + it->index = array_container_index_equalorlarger(ac, val); + *value_out = ac->array[it->index]; + return true; + } + case RUN_CONTAINER_TYPE: { + const run_container_t *rc = const_CAST_run(c); + it->index = run_container_index_equalorlarger(rc, val); + if (rc->runs[it->index].value <= val) { + *value_out = val; + } else { + *value_out = rc->runs[it->index].value; + } + return true; + } + default: + assert(false); + roaring_unreachable; + return false; + } +} + +bool container_iterator_read_into_uint32(const container_t *c, uint8_t typecode, + roaring_container_iterator_t *it, + uint32_t high16, uint32_t *buf, + uint32_t count, uint32_t *consumed, + uint16_t *value_out) { + *consumed = 0; + if (count == 0) { + return false; + } + switch (typecode) { + case BITSET_CONTAINER_TYPE: { + const bitset_container_t *bc = const_CAST_bitset(c); + uint32_t wordindex = it->index / 64; + uint64_t word = + bc->words[wordindex] & (UINT64_MAX << (it->index % 64)); + do { + // Read set bits. + while (word != 0 && *consumed < count) { + *buf = high16 | + (wordindex * 64 + roaring_trailing_zeroes(word)); + word = word & (word - 1); + buf++; + (*consumed)++; + } + // Skip unset bits. + while (word == 0 && + wordindex + 1 < BITSET_CONTAINER_SIZE_IN_WORDS) { + wordindex++; + word = bc->words[wordindex]; + } + } while (word != 0 && *consumed < count); + + if (word != 0) { + it->index = wordindex * 64 + roaring_trailing_zeroes(word); + *value_out = it->index; + return true; + } + return false; + } + case ARRAY_CONTAINER_TYPE: { + const array_container_t *ac = const_CAST_array(c); + uint32_t num_values = + minimum_uint32(ac->cardinality - it->index, count); + for (uint32_t i = 0; i < num_values; i++) { + buf[i] = high16 | ac->array[it->index + i]; + } + *consumed += num_values; + it->index += num_values; + if (it->index < ac->cardinality) { + *value_out = ac->array[it->index]; + return true; + } + return false; + } + case RUN_CONTAINER_TYPE: { + const run_container_t *rc = const_CAST_run(c); + do { + uint32_t largest_run_value = + rc->runs[it->index].value + rc->runs[it->index].length; + uint32_t num_values = minimum_uint32( + largest_run_value - *value_out + 1, count - *consumed); + for (uint32_t i = 0; i < num_values; i++) { + buf[i] = high16 | (*value_out + i); + } + *value_out += num_values; + buf += num_values; + *consumed += num_values; + + // We check for `value == 0` because `it->value += num_values` + // can overflow when `value == UINT16_MAX`, and `count > + // length`. In this case `value` will overflow to 0. + if (*value_out > largest_run_value || *value_out == 0) { + it->index++; + if (it->index < rc->n_runs) { + *value_out = rc->runs[it->index].value; + } else { + return false; + } + } + } while (*consumed < count); + return true; + } + default: + assert(false); + roaring_unreachable; + return 0; + } +} + #ifdef __cplusplus } } } // extern "C" { namespace roaring { namespace internal { #endif diff --git a/src/roaring.c b/src/roaring.c index 688ae9991..c7c1e5155 100644 --- a/src/roaring.c +++ b/src/roaring.c @@ -200,10 +200,6 @@ roaring_bitmap_t *roaring_bitmap_of(size_t n_args, ...) { return answer; } -static inline uint32_t minimum_uint32(uint32_t a, uint32_t b) { - return (a < b) ? a : b; -} - static inline uint64_t minimum_uint64(uint64_t a, uint64_t b) { return (a < b) ? a : b; } @@ -1636,145 +1632,80 @@ bool roaring_iterate64(const roaring_bitmap_t *r, roaring_iterator64 iterator, * begin roaring_uint32_iterator_t *****/ -// Partially initializes the roaring iterator when it begins looking at -// a new container. +/** + * Partially initializes the iterator. Leaves it in either state: + * 1. Invalid due to `has_value = false`, or + * 2. At a container, with the high bits set, `has_value = true`. + */ static bool iter_new_container_partial_init(roaring_uint32_iterator_t *newit) { - newit->in_container_index = 0; - newit->run_index = 0; newit->current_value = 0; if (newit->container_index >= newit->parent->high_low_container.size || newit->container_index < 0) { newit->current_value = UINT32_MAX; return (newit->has_value = false); } - // assume not empty newit->has_value = true; // we precompute container, typecode and highbits so that successive // iterators do not have to grab them from odd memory locations // and have to worry about the (easily predicted) container_unwrap_shared // call. newit->container = - newit->parent->high_low_container.containers[newit->container_index]; + newit->parent->high_low_container.containers[newit->container_index]; newit->typecode = - newit->parent->high_low_container.typecodes[newit->container_index]; + newit->parent->high_low_container.typecodes[newit->container_index]; newit->highbits = - ((uint32_t) - newit->parent->high_low_container.keys[newit->container_index]) - << 16; + ((uint32_t) + newit->parent->high_low_container.keys[newit->container_index]) + << 16; newit->container = - container_unwrap_shared(newit->container, &(newit->typecode)); - return newit->has_value; -} - -static bool loadfirstvalue(roaring_uint32_iterator_t *newit) { - if (!iter_new_container_partial_init(newit)) - return newit->has_value; - - switch (newit->typecode) { - case BITSET_CONTAINER_TYPE: { - const bitset_container_t *bc = const_CAST_bitset(newit->container); - - uint32_t wordindex = 0; - uint64_t word; - while ((word = bc->words[wordindex]) == 0) { - wordindex++; // advance - } - // here "word" is non-zero - newit->in_container_index = wordindex * 64 + roaring_trailing_zeroes(word); - newit->current_value = newit->highbits | newit->in_container_index; - break; } - - case ARRAY_CONTAINER_TYPE: { - const array_container_t *ac = const_CAST_array(newit->container); - newit->current_value = newit->highbits | ac->array[0]; - break; } - - case RUN_CONTAINER_TYPE: { - const run_container_t *rc = const_CAST_run(newit->container); - newit->current_value = newit->highbits | rc->runs[0].value; - break; } - - default: - // if this ever happens, bug! - assert(false); - } // switch (typecode) + container_unwrap_shared(newit->container, &(newit->typecode)); return true; } -static bool loadlastvalue(roaring_uint32_iterator_t* newit) { - if (!iter_new_container_partial_init(newit)) - return newit->has_value; - - switch(newit->typecode) { - case BITSET_CONTAINER_TYPE: { - uint32_t wordindex = BITSET_CONTAINER_SIZE_IN_WORDS - 1; - uint64_t word; - const bitset_container_t* bitset_container = (const bitset_container_t*)newit->container; - while ((word = bitset_container->words[wordindex]) == 0) - --wordindex; - - int num_leading_zeros = roaring_leading_zeroes(word); - newit->in_container_index = (wordindex * 64) + (63 - num_leading_zeros); - newit->current_value = newit->highbits | newit->in_container_index; - break; - } - case ARRAY_CONTAINER_TYPE: { - const array_container_t* array_container = (const array_container_t*)newit->container; - newit->in_container_index = array_container->cardinality - 1; - newit->current_value = newit->highbits | array_container->array[newit->in_container_index]; - break; - } - case RUN_CONTAINER_TYPE: { - const run_container_t* run_container = (const run_container_t*)newit->container; - newit->run_index = run_container->n_runs - 1; - const rle16_t* last_run = &run_container->runs[newit->run_index]; - newit->current_value = newit->highbits | (last_run->value + last_run->length); - break; - } - default: - // if this ever happens, bug! - assert(false); +/** + * Positions the iterator at the first value of the current container that the + * iterator points at, if available. + */ +static bool loadfirstvalue(roaring_uint32_iterator_t *newit) { + if (iter_new_container_partial_init(newit)) { + uint16_t value = 0; + newit->container_it = + container_init_iterator(newit->container, newit->typecode, &value); + newit->current_value = newit->highbits | value; } - return true; + return newit->has_value; } -// prerequesite: the value should be in range of the container -static bool loadfirstvalue_largeorequal(roaring_uint32_iterator_t *newit, uint32_t val) { - // Don't have to check return value because of prerequisite - iter_new_container_partial_init(newit); - uint16_t lb = val & 0xFFFF; - - switch (newit->typecode) { - case BITSET_CONTAINER_TYPE: { - const bitset_container_t *bc = const_CAST_bitset(newit->container); - newit->in_container_index = - bitset_container_index_equalorlarger(bc, lb); - newit->current_value = newit->highbits | newit->in_container_index; - break; } - - case ARRAY_CONTAINER_TYPE: { - const array_container_t *ac = const_CAST_array(newit->container); - newit->in_container_index = - array_container_index_equalorlarger(ac, lb); - newit->current_value = - newit->highbits | ac->array[newit->in_container_index]; - break; } - - case RUN_CONTAINER_TYPE: { - const run_container_t *rc = const_CAST_run(newit->container); - newit->run_index = run_container_index_equalorlarger(rc, lb); - if (rc->runs[newit->run_index].value <= lb) { - newit->current_value = val; - } else { - newit->current_value = - newit->highbits | rc->runs[newit->run_index].value; - } - break; } - - default: - roaring_unreachable; +/** + * Positions the iterator at the last value of the current container that the + * iterator points at, if available. + */ +static bool loadlastvalue(roaring_uint32_iterator_t *newit) { + if (iter_new_container_partial_init(newit)) { + uint16_t value = 0; + newit->container_it = + container_init_iterator_last(newit->container, newit->typecode, &value); + newit->current_value = newit->highbits | value; } + return newit->has_value; +} +/** + * Positions the iterator at the smallest value that is larger than or equal to + * `val` within the current container that the iterator points at. Assumes such + * a value exists within the current container. + */ +static bool loadfirstvalue_largeorequal(roaring_uint32_iterator_t *newit, + uint32_t val) { + bool partial_init = iter_new_container_partial_init(newit); + assert(partial_init); + uint16_t value = 0; + newit->container_it = + container_init_iterator(newit->container, newit->typecode, &value); + bool found = container_iterator_lower_bound( + newit->container, newit->typecode, &newit->container_it, &value, val & 0xFFFF); + assert(found); + newit->current_value = newit->highbits | value; return true; } @@ -1808,28 +1739,32 @@ roaring_uint32_iterator_t *roaring_copy_uint32_iterator( return newit; } -bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it, uint32_t val) { +bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it, + uint32_t val) { uint16_t hb = val >> 16; - const int i = ra_get_index(& it->parent->high_low_container, hb); + const int i = ra_get_index(&it->parent->high_low_container, hb); if (i >= 0) { - uint32_t lowvalue = container_maximum(it->parent->high_low_container.containers[i], it->parent->high_low_container.typecodes[i]); - uint16_t lb = val & 0xFFFF; - if(lowvalue < lb ) { - it->container_index = i+1; // will have to load first value of next container - } else {// the value is necessarily within the range of the container - it->container_index = i; - it->has_value = loadfirstvalue_largeorequal(it, val); - return it->has_value; - } + uint32_t lowvalue = + container_maximum(it->parent->high_low_container.containers[i], + it->parent->high_low_container.typecodes[i]); + uint16_t lb = val & 0xFFFF; + if (lowvalue < lb) { + // will have to load first value of next container + it->container_index = i + 1; + } else { + // the value is necessarily within the range of the container + it->container_index = i; + it->has_value = loadfirstvalue_largeorequal(it, val); + return it->has_value; + } } else { - // there is no matching, so we are going for the next container - it->container_index = -i-1; + // there is no matching, so we are going for the next container + it->container_index = -i - 1; } it->has_value = loadfirstvalue(it); return it->has_value; } - bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it) { if (it->container_index >= it->parent->high_low_container.size) { return (it->has_value = false); @@ -1838,65 +1773,12 @@ bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it) { it->container_index = 0; return (it->has_value = loadfirstvalue(it)); } - - switch (it->typecode) { - case BITSET_CONTAINER_TYPE: { - const bitset_container_t *bc = const_CAST_bitset(it->container); - it->in_container_index++; - - uint32_t wordindex = it->in_container_index / 64; - if (wordindex >= BITSET_CONTAINER_SIZE_IN_WORDS) break; - - uint64_t word = bc->words[wordindex] & - (UINT64_MAX << (it->in_container_index % 64)); - // next part could be optimized/simplified - while ((word == 0) && - (wordindex + 1 < BITSET_CONTAINER_SIZE_IN_WORDS)) { - wordindex++; - word = bc->words[wordindex]; - } - if (word != 0) { - it->in_container_index = wordindex * 64 + roaring_trailing_zeroes(word); - it->current_value = it->highbits | it->in_container_index; - return (it->has_value = true); - } - break; } - - case ARRAY_CONTAINER_TYPE: { - const array_container_t *ac = const_CAST_array(it->container); - it->in_container_index++; - if (it->in_container_index < ac->cardinality) { - it->current_value = - it->highbits | ac->array[it->in_container_index]; - return (it->has_value = true); - } - break; } - - case RUN_CONTAINER_TYPE: { - if(it->current_value == UINT32_MAX) { // avoid overflow to zero - return (it->has_value = false); - } - - const run_container_t* rc = const_CAST_run(it->container); - uint32_t limit = (it->highbits | (rc->runs[it->run_index].value + - rc->runs[it->run_index].length)); - if (++it->current_value <= limit) { - return (it->has_value = true); - } - - if (++it->run_index < rc->n_runs) { // Assume the run has a value - it->current_value = - it->highbits | rc->runs[it->run_index].value; - return (it->has_value = true); - } - break; - } - - default: - roaring_unreachable; + uint16_t low16 = (uint16_t)it->current_value; + if (container_iterator_next(it->container, it->typecode, + &it->container_it, &low16)) { + it->current_value = it->highbits | low16; + return (it->has_value = true); } - - // moving to next container it->container_index++; return (it->has_value = loadfirstvalue(it)); } @@ -1909,147 +1791,42 @@ bool roaring_previous_uint32_iterator(roaring_uint32_iterator_t *it) { it->container_index = it->parent->high_low_container.size - 1; return (it->has_value = loadlastvalue(it)); } - - switch (it->typecode) { - case BITSET_CONTAINER_TYPE: { - if (--it->in_container_index < 0) - break; - - const bitset_container_t* bitset_container = (const bitset_container_t*)it->container; - int32_t wordindex = it->in_container_index / 64; - uint64_t word = bitset_container->words[wordindex] & (UINT64_MAX >> (63 - (it->in_container_index % 64))); - - while (word == 0 && --wordindex >= 0) { - word = bitset_container->words[wordindex]; - } - if (word == 0) - break; - - int num_leading_zeros = roaring_leading_zeroes(word); - it->in_container_index = (wordindex * 64) + (63 - num_leading_zeros); - it->current_value = it->highbits | it->in_container_index; - return (it->has_value = true); - } - case ARRAY_CONTAINER_TYPE: { - if (--it->in_container_index < 0) - break; - - const array_container_t* array_container = (const array_container_t*)it->container; - it->current_value = it->highbits | array_container->array[it->in_container_index]; - return (it->has_value = true); - } - case RUN_CONTAINER_TYPE: { - if(it->current_value == 0) - return (it->has_value = false); - - const run_container_t* run_container = (const run_container_t*)it->container; - if (--it->current_value >= (it->highbits | run_container->runs[it->run_index].value)) { - return (it->has_value = true); - } - - if (--it->run_index < 0) - break; - - it->current_value = it->highbits | (run_container->runs[it->run_index].value + - run_container->runs[it->run_index].length); - return (it->has_value = true); - } - default: - // if this ever happens, bug! - assert(false); - } // switch (typecode) - - // moving to previous container + uint16_t low16 = (uint16_t)it->current_value; + if (container_iterator_prev(it->container, it->typecode, + &it->container_it, &low16)) { + it->current_value = it->highbits | low16; + return (it->has_value = true); + } it->container_index--; return (it->has_value = loadlastvalue(it)); } -uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it, uint32_t* buf, uint32_t count) { - uint32_t ret = 0; - uint32_t num_values; - uint32_t wordindex; // used for bitsets - uint64_t word; // used for bitsets - const array_container_t* acont; //TODO remove - const run_container_t* rcont; //TODO remove - const bitset_container_t* bcont; //TODO remove - - while (it->has_value && ret < count) { - switch (it->typecode) { - case BITSET_CONTAINER_TYPE: - bcont = const_CAST_bitset(it->container); - wordindex = it->in_container_index / 64; - word = bcont->words[wordindex] & (UINT64_MAX << (it->in_container_index % 64)); - do { - while (word != 0 && ret < count) { - buf[0] = it->highbits | (wordindex * 64 + roaring_trailing_zeroes(word)); - word = word & (word - 1); - buf++; - ret++; - } - while (word == 0 && wordindex+1 < BITSET_CONTAINER_SIZE_IN_WORDS) { - wordindex++; - word = bcont->words[wordindex]; - } - } while (word != 0 && ret < count); - it->has_value = (word != 0); - if (it->has_value) { - it->in_container_index = wordindex * 64 + roaring_trailing_zeroes(word); - it->current_value = it->highbits | it->in_container_index; - } - break; - case ARRAY_CONTAINER_TYPE: - acont = const_CAST_array(it->container); - num_values = minimum_uint32(acont->cardinality - it->in_container_index, count - ret); - for (uint32_t i = 0; i < num_values; i++) { - buf[i] = it->highbits | acont->array[it->in_container_index + i]; - } - buf += num_values; - ret += num_values; - it->in_container_index += num_values; - it->has_value = (it->in_container_index < acont->cardinality); - if (it->has_value) { - it->current_value = it->highbits | acont->array[it->in_container_index]; - } - break; - case RUN_CONTAINER_TYPE: - rcont = const_CAST_run(it->container); - //"in_run_index" name is misleading, read it as "max_value_in_current_run" - do { - uint32_t largest_run_value = it->highbits | (rcont->runs[it->run_index].value + rcont->runs[it->run_index].length); - num_values = minimum_uint32(largest_run_value - it->current_value + 1, count - ret); - for (uint32_t i = 0; i < num_values; i++) { - buf[i] = it->current_value + i; - } - it->current_value += num_values; // this can overflow to zero: UINT32_MAX+1=0 - buf += num_values; - ret += num_values; - - if (it->current_value > largest_run_value || it->current_value == 0) { - it->run_index++; - if (it->run_index < rcont->n_runs) { - it->current_value = it->highbits | rcont->runs[it->run_index].value; - } else { - it->has_value = false; - } - } - } while ((ret < count) && it->has_value); - break; - default: - assert(false); - } - if (it->has_value) { - assert(ret == count); - return ret; +uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it, + uint32_t *buf, uint32_t count) { + uint32_t ret = 0; + while (it->has_value && ret < count) { + uint32_t consumed; + uint16_t low16 = (uint16_t)it->current_value; + bool has_value = container_iterator_read_into_uint32( + it->container, it->typecode, &it->container_it, it->highbits, buf, + count - ret, &consumed, &low16); + ret += consumed; + buf += consumed; + if (has_value) { + it->has_value = true; + it->current_value = it->highbits | low16; + assert(ret == count); + return ret; + } + it->container_index++; + it->has_value = loadfirstvalue(it); } - it->container_index++; - it->has_value = loadfirstvalue(it); - } - return ret; + return ret; } - - -void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it) { roaring_free(it); } +void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it) { + roaring_free(it); +} /**** * end of roaring_uint32_iterator_t diff --git a/tests/toplevel_unit.c b/tests/toplevel_unit.c index 44f4f6de1..af5429dfb 100644 --- a/tests/toplevel_unit.c +++ b/tests/toplevel_unit.c @@ -3968,6 +3968,14 @@ void test_read_uint32_iterator(uint8_t type) { convert_all_containers(r, type); } + roaring_uint32_iterator_t *iter = roaring_create_iterator(r); + uint32_t buffer[1]; + uint32_t got = roaring_read_uint32_iterator(iter, buffer, 0); + assert_true(got == 0); + assert_true(iter->has_value); + assert_true(iter->current_value == 0); + roaring_free_uint32_iterator(iter); + read_compare(r, ref_values, ref_count, 1); read_compare(r, ref_values, ref_count, 2); read_compare(r, ref_values, ref_count, 7); @@ -4111,6 +4119,18 @@ DEFINE_TEST(test_iterator_reuse) { test_iterator_reuse_retry_count(0); } DEFINE_TEST(test_iterator_reuse_many) { test_iterator_reuse_retry_count(10); } +DEFINE_TEST(read_uint32_iterator_zero_count) { + roaring_bitmap_t *r = roaring_bitmap_from_range(0, 10000, 1); + roaring_uint32_iterator_t *iterator = roaring_create_iterator(r); + uint32_t buf[1]; + uint32_t read = roaring_read_uint32_iterator(iterator, buf, 0); + assert_true(read == 0); + assert_true(iterator->has_value); + assert_true(iterator->current_value == 0); + roaring_free_uint32_iterator(iterator); + roaring_bitmap_free(r); +} + DEFINE_TEST(test_add_range) { // autoconversion: BITSET -> BITSET -> RUN { @@ -4883,6 +4903,7 @@ int main() { cmocka_unit_test(test_previous_iterator_native), cmocka_unit_test(test_iterator_reuse), cmocka_unit_test(test_iterator_reuse_many), + cmocka_unit_test(read_uint32_iterator_zero_count), cmocka_unit_test(test_add_range), cmocka_unit_test(test_remove_range), cmocka_unit_test(test_remove_many),