Skip to content

Commit

Permalink
fixing issue 660 (#661)
Browse files Browse the repository at this point in the history
* fixing issue 660

* update
  • Loading branch information
lemire authored Sep 19, 2024
1 parent e42ec07 commit 51be4bd
Show file tree
Hide file tree
Showing 6 changed files with 129 additions and 11 deletions.
15 changes: 14 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -410,10 +410,17 @@ int main() {
// we can write a bitmap to a pointer and recover it later
uint32_t expectedsize = roaring_bitmap_portable_size_in_bytes(r1);
char *serializedbytes = malloc(expectedsize);
// When serializing data to a file, we recommend that you also use
// checksums so that, at deserialization, you can be confident
// that you are recovering the correct data.
roaring_bitmap_portable_serialize(r1, serializedbytes);
// Note: it is expected that the input follows the specification
// https://github.com/RoaringBitmap/RoaringFormatSpec
// otherwise the result may be unusable.
// The 'roaring_bitmap_portable_deserialize_safe' function will not read
// beyond expectedsize bytes.
// We recommend you further use checksums to make sure that the input is from
// serialized data.
roaring_bitmap_t *t = roaring_bitmap_portable_deserialize_safe(serializedbytes, expectedsize);
if(t == NULL) { return EXIT_FAILURE; }
const char *reason = NULL;
Expand All @@ -428,7 +435,11 @@ int main() {
roaring_bitmap_portable_deserialize_size(serializedbytes, expectedsize);
assert(sizeofbitmap ==
expectedsize); // sizeofbitmap would be zero if no bitmap were found
// we can also read the bitmap "safely" by specifying a byte size limit:
// We can also read the bitmap "safely" by specifying a byte size limit.
// The 'roaring_bitmap_portable_deserialize_safe' function will not read
// beyond expectedsize bytes.
// We recommend you further use checksums to make sure that the input is from
// serialized data.
t = roaring_bitmap_portable_deserialize_safe(serializedbytes, expectedsize);
if(t == NULL) {
printf("Problem during deserialization.\n");
Expand Down Expand Up @@ -500,6 +511,8 @@ We also support efficient 64-bit compressed bitmaps in C:
roaring64_bitmap_free(r2);
```

The API is similar to the conventional 32-bit bitmaps. Please see
the header file `roaring64.h` (compare with `roaring.h`).

# Conventional bitsets (C)

Expand Down
23 changes: 20 additions & 3 deletions include/roaring/roaring.h
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,10 @@ size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r);
* This function is endian-sensitive. If you have a big-endian system (e.g., a
* mainframe IBM s390x), the data format is going to be big-endian and not
* compatible with little-endian systems.
*
* When serializing data to a file, we recommend that you also use
* checksums so that, at deserialization, you can be confident
* that you are recovering the correct data.
*/
size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf);

Expand Down Expand Up @@ -615,7 +619,10 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf);
* https://github.com/RoaringBitmap/RoaringFormatSpec
*
* The function itself is safe in the sense that it will not cause buffer
* overflows. However, for correct operations, it is assumed that the bitmap
* overflows: it will not read beyond the scope of the provided buffer
* (buf,maxbytes).
*
* However, for correct operations, it is assumed that the bitmap
* read was once serialized from a valid bitmap (i.e., it follows the format
* specification). If you provided an incorrect input (garbage), then the bitmap
* read may not be in a valid state and following operations may not lead to
Expand All @@ -625,8 +632,10 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf);
* but not for random inputs.
*
* You may use roaring_bitmap_internal_validate to check the validity of the
* bitmap prior to using it. You may also use other strategies to check for
* corrupted inputs (e.g., checksums).
* bitmap prior to using it.
*
* We recommend that you use checksums to check that serialized data corresponds
* to a serialized bitmap.
*
* This function is endian-sensitive. If you have a big-endian system (e.g., a
* mainframe IBM s390x), the data format is going to be big-endian and not
Expand Down Expand Up @@ -688,6 +697,10 @@ size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r);
* This function is endian-sensitive. If you have a big-endian system (e.g., a
* mainframe IBM s390x), the data format is going to be big-endian and not
* compatible with little-endian systems.
*
* When serializing data to a file, we recommend that you also use
* checksums so that, at deserialization, you can be confident
* that you are recovering the correct data.
*/
size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, char *buf);

Expand Down Expand Up @@ -722,6 +735,10 @@ size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *r);
* This function is endian-sensitive. If you have a big-endian system (e.g., a
* mainframe IBM s390x), the data format is going to be big-endian and not
* compatible with little-endian systems.
*
* When serializing data to a file, we recommend that you also use
* checksums so that, at deserialization, you can be confident
* that you are recovering the correct data.
*/
void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *r, char *buf);

Expand Down
17 changes: 15 additions & 2 deletions include/roaring/roaring64.h
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,10 @@ size_t roaring64_bitmap_portable_size_in_bytes(const roaring64_bitmap_t *r);
* This function is endian-sensitive. If you have a big-endian system (e.g., a
* mainframe IBM s390x), the data format is going to be big-endian and not
* compatible with little-endian systems.
*
* When serializing data to a file, we recommend that you also use
* checksums so that, at deserialization, you can be confident
* that you are recovering the correct data.
*/
size_t roaring64_bitmap_portable_serialize(const roaring64_bitmap_t *r,
char *buf);
Expand All @@ -525,14 +529,17 @@ size_t roaring64_bitmap_portable_deserialize_size(const char *buf,
size_t maxbytes);

/**
* Read a bitmap from a serialized buffer safely (reading up to maxbytes).
* Read a bitmap from a serialized buffer (reading up to maxbytes).
* In case of failure, NULL is returned.
*
* This is meant to be compatible with other languages
* https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations
*
* The function itself is safe in the sense that it will not cause buffer
* overflows. However, for correct operations, it is assumed that the bitmap
* overflows: it will not read beyond the scope of the provided buffer
* (buf,maxbytes).
*
* However, for correct operations, it is assumed that the bitmap
* read was once serialized from a valid bitmap (i.e., it follows the format
* specification). If you provided an incorrect input (garbage), then the bitmap
* read may not be in a valid state and following operations may not lead to
Expand All @@ -541,6 +548,12 @@ size_t roaring64_bitmap_portable_deserialize_size(const char *buf,
* order. This is is guaranteed to happen when serializing an existing bitmap,
* but not for random inputs.
*
* You may use roaring64_bitmap_internal_validate to check the validity of the
* bitmap prior to using it.
*
* We recommend that you use checksums to check that serialized data corresponds
* to a serialized bitmap.
*
* This function is endian-sensitive. If you have a big-endian system (e.g., a
* mainframe IBM s390x), the data format is going to be big-endian and not
* compatible with little-endian systems.
Expand Down
2 changes: 1 addition & 1 deletion src/roaring.c
Original file line number Diff line number Diff line change
Expand Up @@ -3319,7 +3319,7 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf) {

bool roaring_bitmap_to_bitset(const roaring_bitmap_t *r, bitset_t *bitset) {
uint32_t max_value = roaring_bitmap_maximum(r);
size_t new_array_size = (size_t)(((uint64_t)max_value + 63) / 64);
size_t new_array_size = (size_t)(max_value / 64 + 1);
bool resize_ok = bitset_resize(bitset, new_array_size, true);
if (!resize_ok) {
return false;
Expand Down
53 changes: 53 additions & 0 deletions tests/roaring64_unit.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#include <algorithm>
#include <array>
#include <map>
#include <numeric>
#include <random>
#include <string>
#include <vector>

Expand All @@ -10,6 +12,15 @@

using namespace roaring::api;

static unsigned int seed = 123456789;
static const int OUR_RAND_MAX = (1 << 30) - 1;
inline static unsigned int
our_rand() { // we do not want to depend on a system-specific
// random number generator
seed = (1103515245 * seed + 12345);
return seed & OUR_RAND_MAX;
}

namespace {

void assert_vector_equal(const std::vector<uint64_t>& lhs,
Expand Down Expand Up @@ -37,6 +48,47 @@ void assert_r64_valid(roaring64_bitmap_t* b) {
}
}

bool deserialization_test(const char* data, size_t size) {
// We test that deserialization never fails.
roaring64_bitmap_t* bitmap =
roaring64_bitmap_portable_deserialize_safe(data, size);
if (bitmap) {
// The bitmap may not be usable if it does not follow the specification.
// We can validate the bitmap we recovered to make sure it is proper.
const char* reason_failure = NULL;
if (roaring64_bitmap_internal_validate(bitmap, &reason_failure)) {
// the bitmap is ok!
uint32_t cardinality = roaring64_bitmap_get_cardinality(bitmap);

for (uint32_t i = 100; i < 1000; i++) {
if (!roaring64_bitmap_contains(bitmap, i)) {
cardinality++;
roaring64_bitmap_add(bitmap, i);
}
}

uint32_t new_cardinality = roaring64_bitmap_get_cardinality(bitmap);
if (cardinality != new_cardinality) {
return false;
}
}
roaring64_bitmap_free(bitmap);
}
return true;
}

DEFINE_TEST(fuzz_deserializer) {
for (size_t i = 0; i < 10000; i++) {
size_t vec_size = our_rand() % 10000;
char *buffer = (char *)malloc(vec_size);
for (size_t j = 0; j < vec_size; j++) {
buffer[j] = our_rand() % 256;
}
deserialization_test(buffer, vec_size);
free(buffer);
}
}

DEFINE_TEST(test_copy) {
roaring64_bitmap_t* r1 = roaring64_bitmap_create();
assert_r64_valid(r1);
Expand Down Expand Up @@ -1879,6 +1931,7 @@ DEFINE_TEST(test_stats) {

int main() {
const struct CMUnitTest tests[] = {
cmocka_unit_test(fuzz_deserializer),
cmocka_unit_test(test_copy),
cmocka_unit_test(test_from_range),
cmocka_unit_test(test_move_from_roaring32),
Expand Down
30 changes: 26 additions & 4 deletions tests/toplevel_unit.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,18 @@ bool roaring_iterator_sumall(uint32_t value, void *param) {
*(uint32_t *)param += value;
return true; // continue till the end
}

DEFINE_TEST(issue660) {
roaring_bitmap_t *r1 = roaring_bitmap_create();
roaring_bitmap_add(r1, 0);
bitset_t *b = bitset_create();
bool success =
roaring_bitmap_to_bitset(r1, b); // Segfault happens on this line
assert_true(success);
bitset_free(b);
roaring_bitmap_free(r1);
}

DEFINE_TEST(issue457) {
roaring_bitmap_t *r1 = roaring_bitmap_from_range(65539, 65541, 1);
roaring_bitmap_printf_describe(r1);
Expand Down Expand Up @@ -1807,7 +1819,6 @@ static roaring_bitmap_t *gen_bitmap(double start_density,
double density_gradient, int run_length,
int blank_range_start, int blank_range_end,
int universe_size) {
srand(2345);
roaring_bitmap_t *ans = roaring_bitmap_create();
double d = start_density;

Expand Down Expand Up @@ -3340,7 +3351,6 @@ DEFINE_TEST(test_inplace_negation_run2) {
// TODO it

DEFINE_TEST(test_rand_flips) {
srand(1234);
const int min_runs = 1;
const int flip_trials = 5; // these are expensive tests
const int range = 2000000;
Expand Down Expand Up @@ -3398,7 +3408,6 @@ DEFINE_TEST(test_rand_flips) {

// randomized flipping test - inplace version
DEFINE_TEST(test_inplace_rand_flips) {
srand(1234);
const int min_runs = 1;
const int flip_trials = 5; // these are expensive tests
const int range = 2000000;
Expand Down Expand Up @@ -3486,7 +3495,6 @@ DEFINE_TEST(test_flip_run_container_removal2) {

// randomized test for rank query
DEFINE_TEST(select_test) {
srand(1234);
const int min_runs = 1;
const uint32_t range = 2000000;
char *input = (char *)malloc(range);
Expand Down Expand Up @@ -4702,6 +4710,18 @@ DEFINE_TEST(robust_deserialization) {
assert_true(deserialization_test(test1, sizeof(test1)));
}

DEFINE_TEST(fuzz_deserializer) {
for (size_t i = 0; i < 10000; i++) {
size_t vec_size = our_rand() % 10000;
char *buffer = malloc(vec_size);
for (size_t j = 0; j < vec_size; j++) {
buffer[j] = our_rand() % 256;
}
deserialization_test(buffer, vec_size);
free(buffer);
}
}

DEFINE_TEST(issue538) {
roaring_bitmap_t *dense = roaring_bitmap_create();
int *values = (int *)malloc(4500 * sizeof(int));
Expand Down Expand Up @@ -4785,6 +4805,8 @@ int main() {
tellmeall();

const struct CMUnitTest tests[] = {
cmocka_unit_test(fuzz_deserializer),
cmocka_unit_test(issue660),
cmocka_unit_test(issue538b),
cmocka_unit_test(issue538),
cmocka_unit_test(simple_roaring_bitmap_or_many),
Expand Down

0 comments on commit 51be4bd

Please sign in to comment.