Skip to content

Commit

Permalink
add roaring_bitmap_rank_many(): get rank() values in Bulk (#527)
Browse files Browse the repository at this point in the history
* roaring_bitmap_rank_many(): get `rank()` values in Bulk

* add C++ Wrapper of `roaring_bitmap_rank_many()`

* explicit cast uint32 to uint16; add more precise document on `rank_many(begin,end,ans)`
  • Loading branch information
longqimin authored Dec 1, 2023
1 parent 098cdaa commit 370199f
Show file tree
Hide file tree
Showing 12 changed files with 227 additions and 0 deletions.
8 changes: 8 additions & 0 deletions cpp/roaring.hh
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,14 @@ public:
return api::roaring_bitmap_rank(&roaring, x);
}

/**
* Get `rank()` values in bulk. The values in `[begin .. end)` must be in Ascending order.
* possible implementation: for(auto* iter = begin; iter != end; ++iter) *(ans++) = rank(*iter);
*/
void rank_many(const uint32_t* begin, const uint32_t* end, uint64_t* ans) const noexcept {
return api::roaring_bitmap_rank_many(&roaring, begin, end, ans);
}

/**
* Returns the index of x in the set, index start from 0.
* If the set doesn't contain x , this function will return -1.
Expand Down
23 changes: 23 additions & 0 deletions include/roaring/containers/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,29 @@ inline int array_container_rank(const array_container_t *arr, uint16_t x) {
}
}

// bulk version of array_container_rank(); return number of consumed elements
inline uint32_t array_container_rank_many(const array_container_t *arr, uint64_t start_rank, const uint32_t* begin, const uint32_t* end, uint64_t* ans){
const uint16_t high = (uint16_t)((*begin) >> 16);
uint32_t pos = 0;
const uint32_t* iter = begin;
for(; iter != end; iter++) {
uint32_t x = *iter;
uint16_t xhigh = (uint16_t)(x >> 16);
if(xhigh != high) return iter - begin;// stop at next container

const int32_t idx = binarySearch(arr->array+pos, arr->cardinality-pos, (uint16_t)x);
const bool is_present = idx >= 0;
if (is_present) {
*(ans++) = start_rank + pos + (idx + 1);
pos = idx+1;
} else {
*(ans++) = start_rank + pos + (-idx - 1);
}
}
return iter - begin;
}


/* Returns the index of x , if not exsist return -1 */
inline int array_container_get_index(const array_container_t *arr, uint16_t x) {
const int32_t idx = binarySearch(arr->array, arr->cardinality, x);
Expand Down
3 changes: 3 additions & 0 deletions include/roaring/containers/bitset.h
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,9 @@ uint16_t bitset_container_maximum(const bitset_container_t *container);
/* Returns the number of values equal or smaller than x */
int bitset_container_rank(const bitset_container_t *container, uint16_t x);

// bulk version of bitset_container_rank(); return number of consumed elements
uint32_t bitset_container_rank_many(const bitset_container_t *container, uint64_t start_rank, const uint32_t* begin, const uint32_t* end, uint64_t* ans);

/* Returns the index of x , if not exsist return -1 */
int bitset_container_get_index(const bitset_container_t *container, uint16_t x);

Expand Down
22 changes: 22 additions & 0 deletions include/roaring/containers/containers.h
Original file line number Diff line number Diff line change
Expand Up @@ -2331,6 +2331,28 @@ static inline int container_rank(
return false;
}

// bulk version of container_rank(); return number of consumed elements
static inline uint32_t container_rank_many(
const container_t *c, uint8_t type,
uint64_t start_rank, const uint32_t* begin, const uint32_t* end, uint64_t* ans
){
c = container_unwrap_shared(c, &type);
switch (type) {
case BITSET_CONTAINER_TYPE:
return bitset_container_rank_many(const_CAST_bitset(c), start_rank, begin, end, ans);
case ARRAY_CONTAINER_TYPE:
return array_container_rank_many(const_CAST_array(c), start_rank, begin, end, ans);
case RUN_CONTAINER_TYPE:
return run_container_rank_many(const_CAST_run(c), start_rank, begin, end, ans);
default:
assert(false);
roaring_unreachable;
}
assert(false);
roaring_unreachable;
return 0;
}

// return the index of x, if not exsist return -1
static inline int container_get_index(const container_t *c, uint8_t type,
uint16_t x) {
Expand Down
3 changes: 3 additions & 0 deletions include/roaring/containers/run.h
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,9 @@ inline uint16_t run_container_maximum(const run_container_t *run) {
/* Returns the number of values equal or smaller than x */
int run_container_rank(const run_container_t *arr, uint16_t x);

// bulk version of run_container_rank(); return number of consumed elements
uint32_t run_container_rank_many(const run_container_t *arr, uint64_t start_rank, const uint32_t* begin, const uint32_t* end, uint64_t* ans);

/* Returns the index of x, if not exsist return -1 */
int run_container_get_index(const run_container_t *arr, uint16_t x);

Expand Down
11 changes: 11 additions & 0 deletions include/roaring/roaring.h
Original file line number Diff line number Diff line change
Expand Up @@ -815,6 +815,17 @@ bool roaring_bitmap_select(const roaring_bitmap_t *r, uint32_t rank,
*/
uint64_t roaring_bitmap_rank(const roaring_bitmap_t *r, uint32_t x);

/**
* roaring_bitmap_rank_many is an `Bulk` version of `roaring_bitmap_rank`
* it puts rank value of each element in `[begin .. end)` to `ans[]`
*
* the values in `[begin .. end)` must be sorted in Ascending order;
* Caller is responsible to ensure that there is enough memory allocated, e.g.
*
* ans = malloc((end-begin) * sizeof(uint64_t));
*/
void roaring_bitmap_rank_many(const roaring_bitmap_t *r, const uint32_t* begin, const uint32_t* end, uint64_t* ans);

/**
* Returns the index of x in the given roaring bitmap.
* If the roaring bitmap doesn't contain x , this function will return -1.
Expand Down
30 changes: 30 additions & 0 deletions microbenchmarks/bench.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "bench.h"
#include <vector>


struct successive_intersection {
Expand Down Expand Up @@ -154,6 +155,35 @@ struct compute_cardinality {
auto ComputeCardinality = BasicBench<compute_cardinality>;
BENCHMARK(ComputeCardinality);

struct rank_many_slow {
static uint64_t run() {
std::vector<uint64_t> ranks(5);
for (size_t i = 0; i < count; ++i) {
ranks[0] = roaring_bitmap_rank(bitmaps[i], maxvalue/5);
ranks[1] = roaring_bitmap_rank(bitmaps[i], 2*maxvalue/5);
ranks[2] = roaring_bitmap_rank(bitmaps[i], 3*maxvalue/5);
ranks[3] = roaring_bitmap_rank(bitmaps[i], 4*maxvalue/5);
ranks[4] = roaring_bitmap_rank(bitmaps[i], maxvalue);
}
return ranks[0];
}
};
auto RankManySlow = BasicBench<rank_many_slow>;
BENCHMARK(RankManySlow);

struct rank_many {
static uint64_t run() {
std::vector<uint64_t> ranks(5);
std::vector<uint32_t> input{maxvalue/5, 2*maxvalue/5, 3*maxvalue/5, 4*maxvalue/5, maxvalue};
for (size_t i = 0; i < count; ++i) {
roaring_bitmap_rank_many(bitmaps[i],input.data(), input.data()+input.size(), ranks.data());
}
return ranks[0];
}
};
auto RankMany = BasicBench<rank_many>;
BENCHMARK(RankMany);

int main(int argc, char **argv) {
const char *dir_name;
if ((argc == 1) || (argc > 1 && argv[1][0] == '-')) {
Expand Down
23 changes: 23 additions & 0 deletions src/containers/bitset.c
Original file line number Diff line number Diff line change
Expand Up @@ -1232,6 +1232,29 @@ int bitset_container_rank(const bitset_container_t *container, uint16_t x) {
return sum;
}

uint32_t bitset_container_rank_many(const bitset_container_t *container, uint64_t start_rank, const uint32_t* begin, const uint32_t* end, uint64_t* ans){
const uint16_t high = (uint16_t)((*begin) >> 16);
int i = 0;
int sum = 0;
const uint32_t* iter = begin;
for(; iter != end; iter++) {
uint32_t x = *iter;
uint16_t xhigh = (uint16_t)(x >> 16);
if(xhigh != high) return iter - begin; // stop at next container

uint16_t xlow = (uint16_t)x;
for(int count = xlow / 64; i < count; i++){
sum += roaring_hamming(container->words[i]);
}
uint64_t lastword = container->words[i];
uint64_t lastpos = UINT64_C(1) << (xlow % 64);
uint64_t mask = lastpos + lastpos - 1; // smear right
*(ans++) = start_rank + sum + roaring_hamming(lastword & mask);
}
return iter - begin;
}


/* Returns the index of x , if not exsist return -1 */
int bitset_container_get_index(const bitset_container_t *container, uint16_t x) {
if (bitset_container_get(container, x)) {
Expand Down
33 changes: 33 additions & 0 deletions src/containers/run.c
Original file line number Diff line number Diff line change
Expand Up @@ -883,6 +883,39 @@ int run_container_rank(const run_container_t *container, uint16_t x) {
}
return sum;
}
uint32_t run_container_rank_many(const run_container_t *container, uint64_t start_rank, const uint32_t* begin, const uint32_t* end, uint64_t* ans){
const uint16_t high = (uint16_t)((*begin) >> 16);
const uint32_t* iter = begin;
int sum = 0;
int i = 0;
for(;iter != end; iter++) {
uint32_t x = *iter;
uint16_t xhigh = (uint16_t)(x >> 16);
if(xhigh != high) return iter - begin; // stop at next container

uint32_t x32 = x & 0xFFFF;
while(i < container->n_runs) {
uint32_t startpoint = container->runs[i].value;
uint32_t length = container->runs[i].length;
uint32_t endpoint = length + startpoint;
if (x32 <= endpoint) {
if (x32 < startpoint) {
*(ans++) = start_rank + sum;
} else {
*(ans++) = start_rank + sum + (x32 - startpoint) + 1;
}
break;
} else {
sum += length + 1;
i++;
}
}
if (i >= container->n_runs) *(ans++) = start_rank + sum;
}

return iter - begin;
}


int run_container_get_index(const run_container_t *container, uint16_t x) {
if (run_container_contains(container, x)) {
Expand Down
26 changes: 26 additions & 0 deletions src/roaring.c
Original file line number Diff line number Diff line change
Expand Up @@ -2796,6 +2796,32 @@ uint64_t roaring_bitmap_rank(const roaring_bitmap_t *bm, uint32_t x) {
}
return size;
}
void roaring_bitmap_rank_many(const roaring_bitmap_t *bm, const uint32_t* begin, const uint32_t* end, uint64_t* ans) {
uint64_t size = 0;

int i = 0;
const uint32_t* iter = begin;
while(i < bm->high_low_container.size && iter != end) {
uint32_t x = *iter;
uint32_t xhigh = x >> 16;
uint32_t key = bm->high_low_container.keys[i];
if (xhigh > key) {
size +=
container_get_cardinality(bm->high_low_container.containers[i],
bm->high_low_container.typecodes[i]);
i++;
} else if (xhigh == key) {
uint32_t consumed = container_rank_many(bm->high_low_container.containers[i],
bm->high_low_container.typecodes[i],
size, iter, end, ans);
iter += consumed;
ans += consumed;
} else {
*(ans++) = size;
iter++;
}
}
}

/**
* roaring_bitmap_get_index returns the index of x, if not exsist return -1.
Expand Down
12 changes: 12 additions & 0 deletions tests/cpp_unit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -978,6 +978,17 @@ DEFINE_TEST(test_cpp_add_many) {
assert_true(r1 == r2);
}

DEFINE_TEST(test_cpp_rank_many) {
std::vector<uint32_t> values = {123, 9999, 9999, 0xFFFFFFF7, 0xFFFFFFFF};
Roaring r1;
r1.addMany(values.size(), values.data());

std::vector<uint64_t> ranks(values.size());
r1.rank_many(values.data(), values.data()+values.size(), ranks.data());
std::vector<uint64_t> expect_ranks{1,2,2,3,4};
assert_true(ranks == expect_ranks);
}

DEFINE_TEST(test_cpp_add_many_64) {
{
// 32-bit integers
Expand Down Expand Up @@ -2002,6 +2013,7 @@ int main() {
cmocka_unit_test(test_cpp_add_range_closed_combinatoric_64),
cmocka_unit_test(test_cpp_add_bulk),
cmocka_unit_test(test_cpp_contains_bulk),
cmocka_unit_test(test_cpp_rank_many),
cmocka_unit_test(test_cpp_remove_range_closed_64),
cmocka_unit_test(test_cpp_remove_range_64),
cmocka_unit_test(test_run_compression_cpp_64_true),
Expand Down
33 changes: 33 additions & 0 deletions tests/toplevel_unit.c
Original file line number Diff line number Diff line change
Expand Up @@ -3606,6 +3606,17 @@ DEFINE_TEST(test_rank) {
if (truerank != computedrank)
printf("%d != %d \n", (int)truerank, (int)computedrank);
assert_true(truerank == computedrank);

uint32_t input[] = {z, z+1, z+10, z+100, z+1000};
uint64_t output[5];
roaring_bitmap_rank_many(r, input, input+5, output);
for(uint32_t i = 0; i < 5; i++) {
truerank = rank(ans, card, input[i]);
computedrank = output[i];
if (truerank != computedrank)
printf("%d != %d \n", (int)truerank, (int)computedrank);
assert_true(truerank == computedrank);
}
}
free(ans);
// now bitmap
Expand All @@ -3622,6 +3633,17 @@ DEFINE_TEST(test_rank) {
if (truerank != computedrank)
printf("%d != %d \n", (int)truerank, (int)computedrank);
assert_true(truerank == computedrank);

uint32_t input[] = {z, z+1, z+10, z+100, z+1000};
uint64_t output[5];
roaring_bitmap_rank_many(r, input, input+5, output);
for(uint32_t i = 0; i < 5; i++) {
truerank = rank(ans, card, input[i]);
computedrank = output[i];
if (truerank != computedrank)
printf("%d != %d \n", (int)truerank, (int)computedrank);
assert_true(truerank == computedrank);
}
}
free(ans);
// now run
Expand All @@ -3639,6 +3661,17 @@ DEFINE_TEST(test_rank) {
if (truerank != computedrank)
printf("%d != %d \n", (int)truerank, (int)computedrank);
assert_true(truerank == computedrank);

uint32_t input[] = {z, z+1, z+10, z+100, z+1000};
uint64_t output[5];
roaring_bitmap_rank_many(r, input, input+5, output);
for(uint32_t i = 0; i < 5; i++) {
truerank = rank(ans, card, input[i]);
computedrank = output[i];
if (truerank != computedrank)
printf("%d != %d \n", (int)truerank, (int)computedrank);
assert_true(truerank == computedrank);
}
}
free(ans);

Expand Down

0 comments on commit 370199f

Please sign in to comment.