Skip to content

Commit

Permalink
Add a roaring64 iterator and functions for it (#558)
Browse files Browse the repository at this point in the history
* Add a roaring64 iterator and functions for it

These mirror the 32-bit roaring iterator functions.

* Rename roaring64_{verb}_iterator to roaring64_iterator_{verb}

This is more consistent with the rest of the codebase.

* Add roaring64_iterator_{reinit, reinit_last}

These allow reusing an existing iterator.
  • Loading branch information
SLieve authored Jan 19, 2024
1 parent edd12bb commit ab06f50
Show file tree
Hide file tree
Showing 6 changed files with 683 additions and 4 deletions.
12 changes: 12 additions & 0 deletions include/roaring/containers/containers.h
Original file line number Diff line number Diff line change
Expand Up @@ -2574,6 +2574,18 @@ bool container_iterator_read_into_uint32(const container_t *c, uint8_t typecode,
uint32_t count, uint32_t *consumed,
uint16_t *value_out);

/**
* Reads up to `count` entries from the container, and writes them into `buf`
* as `high48 | entry`. Returns true and sets `value_out` if a value is present
* after reading the entries. Sets `consumed` to the number of values read.
* `count` should be greater than zero.
*/
bool container_iterator_read_into_uint64(const container_t *c, uint8_t typecode,
roaring_container_iterator_t *it,
uint64_t high48, uint64_t *buf,
uint64_t count, uint32_t *consumed,
uint16_t *value_out);

#ifdef __cplusplus
} } } // extern "C" { namespace roaring { namespace internal {
#endif
Expand Down
101 changes: 101 additions & 0 deletions include/roaring/roaring64.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ namespace api {

typedef struct roaring64_bitmap_s roaring64_bitmap_t;
typedef struct roaring64_leaf_s roaring64_leaf_t;
typedef struct roaring64_iterator_s roaring64_iterator_t;

/**
* A bit of context usable with `roaring64_bitmap_*_bulk()` functions.
Expand Down Expand Up @@ -380,6 +381,106 @@ void roaring64_bitmap_andnot_inplace(roaring64_bitmap_t *r1,
bool roaring64_bitmap_iterate(const roaring64_bitmap_t *r,
roaring_iterator64 iterator, void *ptr);

/**
* Create an iterator object that can be used to iterate through the values.
* Caller is responsible for calling `roaring64_iterator_free()`.
*
* The iterator is initialized. If there is a value, then this iterator points
* to the first value and `roaring64_iterator_has_value()` returns true. The
* value can be retrieved with `roaring64_iterator_value()`.
*/
roaring64_iterator_t *roaring64_iterator_create(const roaring64_bitmap_t *r);

/**
* Create an iterator object that can be used to iterate through the values.
* Caller is responsible for calling `roaring64_iterator_free()`.
*
* The iterator is initialized. If there is a value, then this iterator points
* to the last value and `roaring64_iterator_has_value()` returns true. The
* value can be retrieved with `roaring64_iterator_value()`.
*/
roaring64_iterator_t *roaring64_iterator_create_last(
const roaring64_bitmap_t *r);

/**
* Re-initializes an existing iterator. Functionally the same as
* `roaring64_iterator_create` without a allocation.
*/
void roaring64_iterator_reinit(const roaring64_bitmap_t *r,
roaring64_iterator_t *it);

/**
* Re-initializes an existing iterator. Functionally the same as
* `roaring64_iterator_create_last` without a allocation.
*/
void roaring64_iterator_reinit_last(const roaring64_bitmap_t *r,
roaring64_iterator_t *it);

/**
* Creates a copy of the iterator. Caller is responsible for calling
* `roaring64_iterator_free()` on the resulting iterator.
*/
roaring64_iterator_t *roaring64_iterator_copy(const roaring64_iterator_t *it);

/**
* Free the iterator.
*/
void roaring64_iterator_free(roaring64_iterator_t *it);

/**
* Returns true if the iterator currently points to a value. If so, calling
* `roaring64_iterator_value()` returns the value.
*/
bool roaring64_iterator_has_value(const roaring64_iterator_t *it);

/**
* Returns the value the iterator currently points to. Should only be called if
* `roaring64_iterator_has_value()` returns true.
*/
uint64_t roaring64_iterator_value(const roaring64_iterator_t *it);

/**
* Advance the iterator. If there is a new value, then
* `roaring64_iterator_has_value()` returns true. Values are traversed in
* increasing order. For convenience, returns the result of
* `roaring64_iterator_has_value()`.
*
* Once this returns false, `roaring64_iterator_advance` should not be called on
* the iterator again. Calling `roaring64_iterator_previous` is allowed.
*/
bool roaring64_iterator_advance(roaring64_iterator_t *it);

/**
* Decrement the iterator. If there is a new value, then
* `roaring64_iterator_has_value()` returns true. Values are traversed in
* decreasing order. For convenience, returns the result of
* `roaring64_iterator_has_value()`.
*
* Once this returns false, `roaring64_iterator_previous` should not be called
* on the iterator again. Calling `roaring64_iterator_advance` is allowed.
*/
bool roaring64_iterator_previous(roaring64_iterator_t *it);

/**
* Move the iterator to the first value greater than or equal to `val`, if it
* exists at or after the current position of the iterator. If there is a new
* value, then `roaring64_iterator_has_value()` returns true. Values are
* traversed in increasing order. For convenience, returns the result of
* `roaring64_iterator_has_value()`.
*/
bool roaring64_iterator_move_equalorlarger(roaring64_iterator_t *it,
uint64_t val);

/**
* Reads up to `count` values from the iterator into the given `buf`. Returns
* the number of elements read. The number of elements read can be smaller than
* `count`, which means that there are no more elements in the bitmap.
*
* This function can be used together with other iterator functions.
*/
uint64_t roaring64_iterator_read(roaring64_iterator_t *it, uint64_t *buf,
uint64_t count);

#ifdef __cplusplus
} // extern "C"
} // namespace roaring
Expand Down
18 changes: 18 additions & 0 deletions microbenchmarks/bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,24 @@ struct iterate_all {
auto IterateAll = BasicBench<iterate_all>;
BENCHMARK(IterateAll);

struct iterate_all64 {
static uint64_t run() {
uint64_t marker = 0;
for (size_t i = 0; i < count; ++i) {
roaring64_bitmap_t *r = bitmaps64[i];
roaring64_iterator_t *it = roaring64_iterator_create(r);
while (roaring64_iterator_has_value(it)) {
marker++;
roaring64_iterator_advance(it);
}
roaring64_iterator_free(it);
}
return marker;
}
};
auto IterateAll64 = BasicBench<iterate_all64>;
BENCHMARK(IterateAll64);

struct compute_cardinality {
static uint64_t run() {
uint64_t marker = 0;
Expand Down
89 changes: 89 additions & 0 deletions src/containers/containers.c
Original file line number Diff line number Diff line change
Expand Up @@ -622,6 +622,95 @@ bool container_iterator_read_into_uint32(const container_t *c, uint8_t typecode,
}
}

bool container_iterator_read_into_uint64(const container_t *c, uint8_t typecode,
roaring_container_iterator_t *it,
uint64_t high48, uint64_t *buf,
uint64_t count, uint32_t *consumed,
uint16_t *value_out) {
*consumed = 0;
if (count == 0) {
return false;
}
switch (typecode) {
case BITSET_CONTAINER_TYPE: {
const bitset_container_t *bc = const_CAST_bitset(c);
uint32_t wordindex = it->index / 64;
uint64_t word =
bc->words[wordindex] & (UINT64_MAX << (it->index % 64));
do {
// Read set bits.
while (word != 0 && *consumed < count) {
*buf = high48 |
(wordindex * 64 + roaring_trailing_zeroes(word));
word = word & (word - 1);
buf++;
(*consumed)++;
}
// Skip unset bits.
while (word == 0 &&
wordindex + 1 < BITSET_CONTAINER_SIZE_IN_WORDS) {
wordindex++;
word = bc->words[wordindex];
}
} while (word != 0 && *consumed < count);

if (word != 0) {
it->index = wordindex * 64 + roaring_trailing_zeroes(word);
*value_out = it->index;
return true;
}
return false;
}
case ARRAY_CONTAINER_TYPE: {
const array_container_t *ac = const_CAST_array(c);
uint32_t num_values =
minimum_uint32(ac->cardinality - it->index, count);
for (uint32_t i = 0; i < num_values; i++) {
buf[i] = high48 | ac->array[it->index + i];
}
*consumed += num_values;
it->index += num_values;
if (it->index < ac->cardinality) {
*value_out = ac->array[it->index];
return true;
}
return false;
}
case RUN_CONTAINER_TYPE: {
const run_container_t *rc = const_CAST_run(c);
do {
uint32_t largest_run_value =
rc->runs[it->index].value + rc->runs[it->index].length;
uint32_t num_values = minimum_uint32(
largest_run_value - *value_out + 1, count - *consumed);
for (uint32_t i = 0; i < num_values; i++) {
buf[i] = high48 | (*value_out + i);
}
*value_out += num_values;
buf += num_values;
*consumed += num_values;

// We check for `value == 0` because `it->value += num_values`
// can overflow when `value == UINT16_MAX`, and `count >
// length`. In this case `value` will overflow to 0.
if (*value_out > largest_run_value || *value_out == 0) {
it->index++;
if (it->index < rc->n_runs) {
*value_out = rc->runs[it->index].value;
} else {
return false;
}
}
} while (*consumed < count);
return true;
}
default:
assert(false);
roaring_unreachable;
return 0;
}
}

#ifdef __cplusplus
} } } // extern "C" { namespace roaring { namespace internal {
#endif
Loading

0 comments on commit ab06f50

Please sign in to comment.