From 872a8bb2efeeb9c94a988c852bfbed2fb7d961b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oliver=20Sch=C3=B6nrock?= Date: Sat, 14 Dec 2024 17:20:30 +0000 Subject: [PATCH] removing unnecessary boiler plate in unit.c (#65) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * first cut at reducing warnings binaryfusefilter.h only issues addressed 1. changed some return value and parameter types of (static) functions -- PLEASE CHECK THIS IN REVIEW 2. sprinkled 'U' into bitwise operations to silence warnings 3. casting to avoid "standard integer promotion rules" which resulted in signedness warnings 4. explicitly reducing results to the target type rather than letting it happen implicitly tests still passing * first cut at reducing warnings binaryfusefilter.h only issues addressed 1. changed some return value and parameter types of (static) functions -- PLEASE CHECK THIS IN REVIEW 2. sprinkled 'U' into bitwise operations to silence warnings 3. casting to avoid "standard integer promotion rules" which resulted in signedness warnings 4. explicitly reducing results to the target type rather than letting it happen implicitly 5. when and `if` statements ends in break or return, then a following `else if` can be just a new `if` tests still passing * starting work on xofilter.h * binclude/binaryfusefilter.h apparently clean for first time * formatting * first cut on xofilter.h mostly casting size_t down to uint32_t - maybe some internal struct types should have been size_t? also some integer promotion casts * round2 on xorfilter.h mostly casting blocklengt to uint32_t to fit into keyindex.index should keyindex.index be a size_t? * bench.c and unit.c very repetitive casting of mainly sizes and doubles. * all silent now on a clean compile with -Wconversion and -Wsign-conversion so putting these in the Makefile, so during "private" development with the Makefile new warnings will be noticed straight away but not in CMakeLists.txt, because as this is a header-only INTERFACE library, it would force these warning levels on the users. * another sweep from including c++ project turned up these additional 'U' tweaks * mistaken cast which broke test * factoring out the report functionality all sections were indentical except for the call to *contain() and *size_in_bytes some void* and function pointer juggling allowed to make this generic report code reduced by 2/3rds * iron out slight inconsistencies between tests * abstracting away the rest of the test logic for all but the special "failure rate test" the large function dispatch table is a litle annoying, but can be removed as well...TBC tests all pass * fixing a memory leak caught by sanitizer just a missing free() * _duplicates test cases can be convered by the same code remove initialization. it's not needed and compiler now happy * removing the need for large array of boiler plate function wrappers instead of having a wrapper function per action per filter type, we can cast the functions as a generic function pointer on the into the generic test runner, and then cast them as a compatible function pointer type inside the test runner. The generic `filter*` parameter cannot be `void*` and must be a dummy struct because: § 6.2.5..28: "All pointers to structure types shall have the same representation and alignment requirements as each other" (the same is not true for `void*` which may have different representation. This simple change results in a large code reduction and removes the unsightly and hard to remove array of boiler plate function wrappers. --- tests/unit.c | 279 +++++++++++++++++++-------------------------------- 1 file changed, 104 insertions(+), 175 deletions(-) diff --git a/tests/unit.c b/tests/unit.c index 3a30b96..7a5673e 100644 --- a/tests/unit.c +++ b/tests/unit.c @@ -2,71 +2,33 @@ #include "xorfilter.h" #include -// generic function dispatch - -bool gen_xor8_allocate(uint32_t size, void *filter) { return xor8_allocate(size, filter); } -bool gen_xor16_allocate(uint32_t size, void *filter) { return xor16_allocate(size, filter); } -bool gen_binary_fuse8_allocate(uint32_t size, void *filter) { return binary_fuse8_allocate(size, filter); } -bool gen_binary_fuse16_allocate(uint32_t size, void *filter) { return binary_fuse16_allocate(size, filter); } - -void gen_xor8_free(void *filter) { xor8_free(filter); } -void gen_xor16_free(void *filter) { xor16_free(filter); } -void gen_binary_fuse8_free(void *filter) { binary_fuse8_free(filter); } -void gen_binary_fuse16_free(void *filter) { binary_fuse16_free(filter); } - -size_t gen_xor8_size_in_bytes(const void *filter) { return xor8_size_in_bytes(filter); } -size_t gen_xor16_size_in_bytes(const void *filter) { return xor16_size_in_bytes(filter); } -size_t gen_binary_fuse8_size_in_bytes(const void *filter) { return binary_fuse8_size_in_bytes(filter); } -size_t gen_binary_fuse16_size_in_bytes(const void *filter) { return binary_fuse16_size_in_bytes(filter); } - -size_t gen_xor8_serialization_bytes(void *filter) { return xor8_serialization_bytes(filter); } -size_t gen_xor16_serialization_bytes(void *filter) { return xor16_serialization_bytes(filter); } -size_t gen_binary_fuse8_serialization_bytes(void *filter) { return binary_fuse8_serialization_bytes(filter); } -size_t gen_binary_fuse16_serialization_bytes(void *filter) { return binary_fuse16_serialization_bytes(filter); } - -void gen_xor8_serialize(void *filter, char *buffer) { xor8_serialize(filter, buffer); } -void gen_xor16_serialize(void *filter, char *buffer) { xor16_serialize(filter, buffer); } -void gen_binary_fuse8_serialize(void *filter, char *buffer) { binary_fuse8_serialize(filter, buffer); } -void gen_binary_fuse16_serialize(void *filter, char *buffer) { binary_fuse16_serialize(filter, buffer); } - -bool gen_xor8_deserialize(void *filter, const char *buffer) { return xor8_deserialize(filter, buffer); } -bool gen_xor16_deserialize(void *filter, const char *buffer) { return xor16_deserialize(filter, buffer); } -bool gen_binary_fuse8_deserialize(void *filter, const char *buffer) { return binary_fuse8_deserialize(filter, buffer); } -bool gen_binary_fuse16_deserialize(void *filter, const char *buffer) { return binary_fuse16_deserialize(filter, buffer); } - -bool gen_xor8_populate(uint64_t *keys, uint32_t size, void *filter) { return xor8_populate(keys, size, filter); } -bool gen_xor8_buffered_populate(uint64_t *keys, uint32_t size, void *filter) { return xor8_buffered_populate(keys, size, filter); } -bool gen_xor16_populate(uint64_t *keys, uint32_t size, void *filter) { return xor16_populate(keys, size, filter); } -bool gen_xor16_buffered_populate(uint64_t *keys, uint32_t size, void *filter) { return xor16_buffered_populate(keys, size, filter); } -bool gen_binary_fuse8_populate(uint64_t *keys, uint32_t size, void *filter) { return binary_fuse8_populate(keys, size, filter); } -bool gen_binary_fuse16_populate(uint64_t *keys, uint32_t size, void *filter) { return binary_fuse16_populate(keys, size, filter); } - -bool gen_xor8_contain(uint64_t key, const void *filter) { return xor8_contain(key, filter); } -bool gen_xor16_contain(uint64_t key, const void *filter) { return xor16_contain(key, filter); } -bool gen_binary_fuse8_contain(uint64_t key, const void *filter) { return binary_fuse8_contain(key, filter); } -bool gen_binary_fuse16_contain(uint64_t key, const void *filter) { return binary_fuse16_contain(key, filter); } - -typedef bool (*allocate_fpt)(uint32_t size, void *filter); -typedef void (*free_fpt)(void *filter); -typedef size_t (*size_in_bytes_fpt)(const void *filter); -typedef size_t (*serialization_bytes_fpt)(void *filter); -typedef void (*serialize_fpt)(void *filter, char *buffer); -typedef bool (*deserialize_fpt)(void *filter, const char *buffer); -typedef bool (*populate_fpt)(uint64_t *keys, uint32_t size, void *filter); -typedef bool (*contain_fpt)(uint64_t key, const void *filter); +// generic proxy for filter, important that this is a struct, not void +// as § 6.2.5..28: "All pointers to structure types shall have the +// same representation and alignment requirements as each other" +typedef struct { int dummy_; } gen_filter; + +typedef bool (*allocate_fpt)(uint32_t size, gen_filter *filter); +typedef void (*free_fpt)(gen_filter *filter); +typedef size_t (*size_in_bytes_fpt)(const gen_filter *filter); +typedef size_t (*serialization_bytes_fpt)(gen_filter *filter); +typedef void (*serialize_fpt)(gen_filter *filter, char *buffer); +typedef bool (*deserialize_fpt)(gen_filter *filter, const char *buffer); +typedef bool (*populate_fpt)(uint64_t *keys, uint32_t size, gen_filter *filter); +typedef bool (*contain_fpt)(uint64_t key, const gen_filter *filter); + +typedef void (*gfp)(void); // generic function pointer // generic test runner - bool test(size_t size, size_t repeated_size, void *filter, - allocate_fpt allocate, - free_fpt free_filter, - size_in_bytes_fpt size_in_bytes, - serialization_bytes_fpt serialization_bytes, - serialize_fpt serialize, - deserialize_fpt deserialize, - populate_fpt populate, - contain_fpt contain) { - allocate((uint32_t)size, filter); + gfp allocate, + gfp free_filter, + gfp size_in_bytes, + gfp serialization_bytes, + gfp serialize, + gfp deserialize, + gfp populate, + gfp contain) { + ((allocate_fpt)allocate)((uint32_t)size, filter); // we need some set of values uint64_t *big_set = (uint64_t *)malloc(sizeof(uint64_t) * size); for (size_t i = 0; i < size - repeated_size; i++) { @@ -76,22 +38,22 @@ bool test(size_t size, size_t repeated_size, void *filter, big_set[size - i - 1] = i; // we use contiguous values } // we construct the filter - if(!populate(big_set, (uint32_t)size, filter)) { return false; } + if(!((populate_fpt)populate)(big_set, (uint32_t)size, filter)) { return false; } for (size_t i = 0; i < size; i++) { - if (!contain(big_set[i], filter)) { + if (!((contain_fpt)contain)(big_set[i], filter)) { printf("bug!\n"); return false; } } - size_t buffer_size = serialization_bytes(filter); + size_t buffer_size = ((serialization_bytes_fpt)serialization_bytes)(filter); char *buffer = (char*)malloc(buffer_size); - serialize(filter, buffer); - free_filter(filter); - deserialize(filter, buffer); + ((serialize_fpt)serialize)(filter, buffer); + ((free_fpt)free_filter)(filter); + ((deserialize_fpt)deserialize)(filter, buffer); free(buffer); for (size_t i = 0; i < size; i++) { - if (!contain(big_set[i], filter)) { + if (!((contain_fpt)contain)(big_set[i], filter)) { printf("bug!\n"); return false; } @@ -101,7 +63,7 @@ bool test(size_t size, size_t repeated_size, void *filter, size_t trials = 10000000; for (size_t i = 0; i < trials; i++) { uint64_t random_key = ((uint64_t)rand() << 32U) + (uint64_t)rand(); - if (contain(random_key, filter)) { + if (((contain_fpt)contain)(random_key, filter)) { if (random_key >= size) { random_matches++; } @@ -109,135 +71,102 @@ bool test(size_t size, size_t repeated_size, void *filter, } double fpp = (double)random_matches * 1.0 / (double)trials; printf(" fpp %3.5f (estimated) \n", fpp); - double bpe = (double)size_in_bytes(filter) * 8.0 / (double)size; + double bpe = (double)((size_in_bytes_fpt)size_in_bytes)(filter) * 8.0 / (double)size; printf(" bits per entry %3.2f\n", bpe); printf(" bits per entry %3.2f (theoretical lower bound)\n", - log(fpp)/log(2)); printf(" efficiency ratio %3.3f \n", bpe /(- log(fpp)/log(2))); - free_filter(filter); + ((free_fpt)free_filter)(filter); free(big_set); return true; } bool testbufferedxor8(size_t size) { printf("testing buffered xor8\n"); - xor8_t filter = {0}; // zero initialisation silences unitialized warning + xor8_t filter; return test(size, 0, &filter, - gen_xor8_allocate, - gen_xor8_free, - gen_xor8_size_in_bytes, - gen_xor8_serialization_bytes, - gen_xor8_serialize, - gen_xor8_deserialize, - gen_xor8_buffered_populate, - gen_xor8_contain); + (gfp)xor8_allocate, + (gfp)xor8_free, + (gfp)xor8_size_in_bytes, + (gfp)xor8_serialization_bytes, + (gfp)xor8_serialize, + (gfp)xor8_deserialize, + (gfp)xor8_buffered_populate, + (gfp)xor8_contain); } bool testxor8(size_t size) { printf("testing xor8\n"); - - xor8_t filter = {0}; // zero initialisation silences unitialized warning + xor8_t filter; return test(size, 0, &filter, - gen_xor8_allocate, - gen_xor8_free, - gen_xor8_size_in_bytes, - gen_xor8_serialization_bytes, - gen_xor8_serialize, - gen_xor8_deserialize, - gen_xor8_populate, - gen_xor8_contain); + (gfp)xor8_allocate, + (gfp)xor8_free, + (gfp)xor8_size_in_bytes, + (gfp)xor8_serialization_bytes, + (gfp)xor8_serialize, + (gfp)xor8_deserialize, + (gfp)xor8_populate, + (gfp)xor8_contain); } bool testxor16(size_t size) { printf("testing xor16\n"); - xor16_t filter = {0}; // zero initialisation silences unitialized warning + xor16_t filter; return test(size, 0, &filter, - gen_xor16_allocate, - gen_xor16_free, - gen_xor16_size_in_bytes, - gen_xor16_serialization_bytes, - gen_xor16_serialize, - gen_xor16_deserialize, - gen_xor16_populate, - gen_xor16_contain); + (gfp)xor16_allocate, + (gfp)xor16_free, + (gfp)xor16_size_in_bytes, + (gfp)xor16_serialization_bytes, + (gfp)xor16_serialize, + (gfp)xor16_deserialize, + (gfp)xor16_populate, + (gfp)xor16_contain); } bool testbufferedxor16(size_t size) { printf("testing buffered xor16\n"); - xor16_t filter = {0}; // zero initialisation silences unitialized warning + xor16_t filter; return test(size, 0, &filter, - gen_xor16_allocate, - gen_xor16_free, - gen_xor16_size_in_bytes, - gen_xor16_serialization_bytes, - gen_xor16_serialize, - gen_xor16_deserialize, - gen_xor16_buffered_populate, - gen_xor16_contain); + (gfp)xor16_allocate, + (gfp)xor16_free, + (gfp)xor16_size_in_bytes, + (gfp)xor16_serialization_bytes, + (gfp)xor16_serialize, + (gfp)xor16_deserialize, + (gfp)xor16_buffered_populate, + (gfp)xor16_contain); } -bool testbinaryfuse8(size_t size) { - printf("testing binary fuse8 with size %zu\n", size); - binary_fuse8_t filter = {0}; // zero initialisation silences unitialized warning - return test(size, 0, &filter, - gen_binary_fuse8_allocate, - gen_binary_fuse8_free, - gen_binary_fuse8_size_in_bytes, - gen_binary_fuse8_serialization_bytes, - gen_binary_fuse8_serialize, - gen_binary_fuse8_deserialize, - gen_binary_fuse8_populate, - gen_binary_fuse8_contain); -} - - - -bool testbinaryfuse16(size_t size) { - printf("testing binary fuse16\n"); - binary_fuse16_t filter = {0}; // zero initialisation silences unitialized warning - return test(size, 0, &filter, - gen_binary_fuse16_allocate, - gen_binary_fuse16_free, - gen_binary_fuse16_size_in_bytes, - gen_binary_fuse16_serialization_bytes, - gen_binary_fuse16_serialize, - gen_binary_fuse16_deserialize, - gen_binary_fuse16_populate, - gen_binary_fuse16_contain); +bool testbinaryfuse8(size_t size, size_t repeated_size) { + printf("testing binary fuse8 with size %zu and %zu duplicates\n", size, repeated_size); + binary_fuse8_t filter; + return test(size, repeated_size, &filter, + (gfp)binary_fuse8_allocate, + (gfp)binary_fuse8_free, + (gfp)binary_fuse8_size_in_bytes, + (gfp)binary_fuse8_serialization_bytes, + (gfp)binary_fuse8_serialize, + (gfp)binary_fuse8_deserialize, + (gfp)binary_fuse8_populate, + (gfp)binary_fuse8_contain); } -bool testbinaryfuse8_dup(size_t size) { - printf("testing binary fuse8 with duplicates\n"); - binary_fuse8_t filter = {0}; // zero initialisation silences unitialized warning - return test(size, 10, &filter, - gen_binary_fuse8_allocate, - gen_binary_fuse8_free, - gen_binary_fuse8_size_in_bytes, - gen_binary_fuse8_serialization_bytes, - gen_binary_fuse8_serialize, - gen_binary_fuse8_deserialize, - gen_binary_fuse8_populate, - gen_binary_fuse8_contain); -} - - - -bool testbinaryfuse16_dup(size_t size) { - printf("testing binary fuse16 with duplicates\n"); - binary_fuse16_t filter = {0}; // zero initialisation silences unitialized warning - return test(size, 10, &filter, - gen_binary_fuse16_allocate, - gen_binary_fuse16_free, - gen_binary_fuse16_size_in_bytes, - gen_binary_fuse16_serialization_bytes, - gen_binary_fuse16_serialize, - gen_binary_fuse16_deserialize, - gen_binary_fuse16_populate, - gen_binary_fuse16_contain); +bool testbinaryfuse16(size_t size, size_t repeated_size) { + printf("testing binary fuse16 with size %zu and %zu duplicates\n", size, repeated_size); + binary_fuse16_t filter; + return test(size, repeated_size, &filter, + (gfp)binary_fuse16_allocate, + (gfp)binary_fuse16_free, + (gfp)binary_fuse16_size_in_bytes, + (gfp)binary_fuse16_serialization_bytes, + (gfp)binary_fuse16_serialize, + (gfp)binary_fuse16_deserialize, + (gfp)binary_fuse16_populate, + (gfp)binary_fuse16_contain); } void failure_rate_binary_fuse16() { @@ -267,13 +196,13 @@ int main() { failure_rate_binary_fuse16(); for(size_t size = 1000; size <= 1000000; size *= 300) { printf("== size = %zu \n", size); - if(!testbinaryfuse8(size)) { abort(); } + if(!testbinaryfuse8(size, 0)) { abort(); } printf("\n"); - if(!testbinaryfuse16(size)) { abort(); } + if(!testbinaryfuse16(size, 0)) { abort(); } printf("\n"); - if(!testbinaryfuse8_dup(size)) { abort(); } + if(!testbinaryfuse8(size, 10)) { abort(); } printf("\n"); - if(!testbinaryfuse16_dup(size)) { abort(); } + if(!testbinaryfuse16(size, 10)) { abort(); } printf("\n"); if(!testbufferedxor8(size)) { abort(); } printf("\n"); @@ -287,10 +216,10 @@ int main() { } // test small edge-case binary fuse input sizes - if(!testbinaryfuse8(0)) { abort(); } - if(!testbinaryfuse8(1)) { abort(); } - if(!testbinaryfuse8(2)) { abort(); } - if(!testbinaryfuse16(0)) { abort(); } - if(!testbinaryfuse16(1)) { abort(); } - if(!testbinaryfuse16(2)) { abort(); } + if(!testbinaryfuse8(0, 0)) { abort(); } + if(!testbinaryfuse8(1, 0)) { abort(); } + if(!testbinaryfuse8(2, 0)) { abort(); } + if(!testbinaryfuse16(0, 0)) { abort(); } + if(!testbinaryfuse16(1, 0)) { abort(); } + if(!testbinaryfuse16(2, 0)) { abort(); } }