From eb8a6b179ad83d787930a560a83ba8853a05d2aa Mon Sep 17 00:00:00 2001 From: Arun Date: Sat, 21 Jan 2023 10:34:00 +0530 Subject: [PATCH 1/2] Use uint8_t * instead of char * --- src/bloom.c | 24 ++++++++++++------------ src/bloom.h | 8 ++++---- tests/bloom_test.c | 26 +++++++++++++------------- 3 files changed, 29 insertions(+), 29 deletions(-) diff --git a/src/bloom.c b/src/bloom.c index 1e94ef4..f4ec9fd 100644 --- a/src/bloom.c +++ b/src/bloom.c @@ -42,8 +42,8 @@ static const unsigned char bits_set_table[256] = {B6(0), B6(1), B6(1), B6(2)}; /******************************************************************************* *** PRIVATE FUNCTIONS *******************************************************************************/ -static uint64_t* __default_hash(int num_hashes, const char *str); -static uint64_t __fnv_1a(const char *key, int seed); +static uint64_t* __default_hash(int num_hashes, const uint8_t *str, const size_t str_len); +static uint64_t __fnv_1a(const uint8_t *key, const size_t key_len, int seed); static void __calculate_optimal_hashes(BloomFilter *bf); static void __read_from_file(BloomFilter *bf, FILE *fp, short on_disk, const char *filename); static void __write_to_file(BloomFilter *bf, FILE *fp, short on_disk); @@ -142,23 +142,23 @@ void bloom_filter_stats(BloomFilter *bf) { bloom_filter_count_set_bits(bf), is_on_disk); } -int bloom_filter_add_string(BloomFilter *bf, const char *str) { - uint64_t *hashes = bloom_filter_calculate_hashes(bf, str, bf->number_hashes); +int bloom_filter_add_string(BloomFilter *bf, const uint8_t *str, const size_t str_len) { + uint64_t *hashes = bloom_filter_calculate_hashes(bf, str, str_len, bf->number_hashes); int res = bloom_filter_add_string_alt(bf, hashes, bf->number_hashes); free(hashes); return res; } -int bloom_filter_check_string(BloomFilter *bf, const char *str) { - uint64_t *hashes = bloom_filter_calculate_hashes(bf, str, bf->number_hashes); +int bloom_filter_check_string(BloomFilter *bf, const uint8_t *str, const size_t str_len) { + uint64_t *hashes = bloom_filter_calculate_hashes(bf, str, str_len, bf->number_hashes); int res = bloom_filter_check_string_alt(bf, hashes, bf->number_hashes); free(hashes); return res; } -uint64_t* bloom_filter_calculate_hashes(BloomFilter *bf, const char *str, unsigned int number_hashes) { - return bf->hash_function(number_hashes, str); +uint64_t* bloom_filter_calculate_hashes(BloomFilter *bf, const uint8_t *str, const size_t str_len, unsigned int number_hashes) { + return bf->hash_function(number_hashes, str, str_len); } /* Add a string to a bloom filter using the defined hashes */ @@ -487,18 +487,18 @@ static void __update_elements_added_on_disk(BloomFilter* bf) { } /* NOTE: The caller will free the results */ -static uint64_t* __default_hash(int num_hashes, const char *str) { +static uint64_t* __default_hash(int num_hashes, const uint8_t *str, const size_t str_len) { uint64_t *results = (uint64_t*)calloc(num_hashes, sizeof(uint64_t)); int i; for (i = 0; i < num_hashes; ++i) { - results[i] = __fnv_1a(str, i); + results[i] = __fnv_1a(str, str_len, i); } return results; } -static uint64_t __fnv_1a(const char *key, int seed) { +static uint64_t __fnv_1a(const uint8_t *key, const size_t len, int seed) { // FNV-1a hash (http://www.isthe.com/chongo/tech/comp/fnv/) - int i, len = strlen(key); + size_t i; uint64_t h = 14695981039346656037ULL + (31 * seed); // FNV_OFFSET 64 bit with magic number seed for (i = 0; i < len; ++i){ h = h ^ (unsigned char) key[i]; diff --git a/src/bloom.h b/src/bloom.h index 8549988..31acdf7 100644 --- a/src/bloom.h +++ b/src/bloom.h @@ -39,7 +39,7 @@ extern "C" { #define bloom_filter_get_version() (BLOOMFILTER_VERSION) -typedef uint64_t* (*BloomHashFunction) (int num_hashes, const char *str); +typedef uint64_t* (*BloomHashFunction) (int num_hashes, const uint8_t *str, const size_t str_len); typedef struct bloom_filter { /* bloom parameters */ @@ -114,13 +114,13 @@ int bloom_filter_destroy(BloomFilter *bf); int bloom_filter_clear(BloomFilter *bf); /* Add a string (or element) to the bloom filter */ -int bloom_filter_add_string(BloomFilter *bf, const char *str); +int bloom_filter_add_string(BloomFilter *bf, const uint8_t *str, const size_t str_len); /* Add a string to a bloom filter using the defined hashes */ int bloom_filter_add_string_alt(BloomFilter *bf, uint64_t *hashes, unsigned int number_hashes_passed); /* Check to see if a string (or element) is or is not in the bloom filter */ -int bloom_filter_check_string(BloomFilter *bf, const char *str); +int bloom_filter_check_string(BloomFilter *bf, const uint8_t *str, const size_t str_len); /* Check if a string is in the bloom filter using the passed hashes */ int bloom_filter_check_string_alt(BloomFilter *bf, uint64_t *hashes, unsigned int number_hashes_passed); @@ -144,7 +144,7 @@ void bloom_filter_set_elements_to_estimated(BloomFilter *bf); /* Generate the desired number of hashes for the provided string NOTE: It is up to the caller to free the allocated memory */ -uint64_t* bloom_filter_calculate_hashes(BloomFilter *bf, const char *str, unsigned int number_hashes); +uint64_t* bloom_filter_calculate_hashes(BloomFilter *bf, const uint8_t *str, const size_t str_len, unsigned int number_hashes); /* Calculate the size the bloom filter will take on disk when exported in bytes */ uint64_t bloom_filter_export_size(BloomFilter *bf); diff --git a/tests/bloom_test.c b/tests/bloom_test.c index f430e59..901df44 100644 --- a/tests/bloom_test.c +++ b/tests/bloom_test.c @@ -28,8 +28,8 @@ int check_unknown_values_alt(BloomFilter *bf, int mult, int mult2, int offset, i int check_unknown_values_alt_2(BloomFilter *bf, int mult, int mult2, int offset, int* used); void success_or_failure(int res); void populate_bloom_filter(BloomFilter *bf, unsigned long long elements, int mult); -static uint64_t __fnv_1a_mod(const char *key); -static uint64_t* __default_hash_mod(int num_hashes, const char *str); +static uint64_t __fnv_1a_mod(const uint8_t *key, const size_t str_len); +static uint64_t* __default_hash_mod(int num_hashes, const uint8_t *str, const size_t str_len); @@ -354,7 +354,7 @@ void populate_bloom_filter(BloomFilter *bf, unsigned long long elements, int mul for (unsigned long long i = 0; i < elements * mult; i+=mult) { char key[KEY_LEN] = {0}; sprintf(key, "%llu", i); - bloom_filter_add_string(bf, key); + bloom_filter_add_string(bf, (const uint8_t *) key, strlen(key)); } } @@ -363,7 +363,7 @@ int check_known_values(BloomFilter *bf, int mult) { for (i = 0; i < ELEMENTS * mult; i+=mult) { char key[KEY_LEN] = {0}; sprintf(key, "%d", i); - if (bloom_filter_check_string(bf, key) == BLOOM_FAILURE) { + if (bloom_filter_check_string(bf, (const uint8_t *) key, strlen(key)) == BLOOM_FAILURE) { cnt++; } } @@ -377,7 +377,7 @@ int check_known_values_alt(BloomFilter *bf, int mult, int mult2, int* used) { if (i % mult2 == 0 && i % mult == 0) { char key[KEY_LEN] = {0}; sprintf(key, "%d", i); - if (bloom_filter_check_string(bf, key) == BLOOM_FAILURE) { + if (bloom_filter_check_string(bf, (const uint8_t *) key, strlen(key)) == BLOOM_FAILURE) { cnt++; } j++; @@ -392,7 +392,7 @@ int check_unknown_values(BloomFilter *bf, int mult) { for (i = 1; i < ELEMENTS * mult; i+=mult) { char key[KEY_LEN] = {0}; sprintf(key, "%d", i); - if (bloom_filter_check_string(bf, key) == BLOOM_SUCCESS) { + if (bloom_filter_check_string(bf, (const uint8_t *) key, strlen(key)) == BLOOM_SUCCESS) { cnt++; } } @@ -408,7 +408,7 @@ int check_unknown_values_alt(BloomFilter *bf, int mult, int mult2, int offset, i } else { char key[KEY_LEN] = {0}; sprintf(key, "%d", i); - if (bloom_filter_check_string(bf, key) == BLOOM_SUCCESS) { + if (bloom_filter_check_string(bf, (const uint8_t *) key, strlen(key)) == BLOOM_SUCCESS) { cnt++; } j++; @@ -427,7 +427,7 @@ int check_unknown_values_alt_2(BloomFilter *bf, int mult, int mult2, int offset, } else { char key[KEY_LEN] = {0}; sprintf(key, "%d", i); - if (bloom_filter_check_string(bf, key) == BLOOM_SUCCESS) { + if (bloom_filter_check_string(bf, (const uint8_t *) key, strlen(key)) == BLOOM_SUCCESS) { cnt++; } j++; @@ -446,22 +446,22 @@ void success_or_failure(int res) { } /* NOTE: The caller will free the results */ -static uint64_t* __default_hash_mod(int num_hashes, const char *str) { +static uint64_t* __default_hash_mod(int num_hashes, const uint8_t *str, const size_t str_len) { uint64_t *results = (uint64_t*)calloc(num_hashes, sizeof(uint64_t)); int i; char *key = (char*)calloc(17, sizeof(char)); // largest value is 7FFF,FFFF,FFFF,FFFF - results[0] = __fnv_1a_mod(str); + results[0] = __fnv_1a_mod(str, str_len); for (i = 1; i < num_hashes; ++i) { sprintf(key, "%" PRIx64 "", results[i-1]); - results[i] = __fnv_1a_mod(key); + results[i] = __fnv_1a_mod((const uint8_t *) key, strlen(key)); } free(key); return results; } -static uint64_t __fnv_1a_mod(const char *key) { +static uint64_t __fnv_1a_mod(const uint8_t *key, const size_t len) { // FNV-1a hash (http://www.isthe.com/chongo/tech/comp/fnv/) - int i, len = strlen(key); + size_t i; uint64_t h = 14695981039346656073ULL; // FNV_OFFSET 64 bit for (i = 0; i < len; ++i) { h = h ^ (unsigned char) key[i]; From 4c07d2ae21d63e88b37765a2e98f0da4967b8330 Mon Sep 17 00:00:00 2001 From: Arun Date: Sun, 12 Feb 2023 18:38:34 +0530 Subject: [PATCH 2/2] Add support for uint8_t strings --- src/bloom.c | 11 +++++++++-- src/bloom.h | 10 ++++++++-- tests/bloom_test.c | 12 ++++++------ 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/src/bloom.c b/src/bloom.c index f4ec9fd..71d13f5 100644 --- a/src/bloom.c +++ b/src/bloom.c @@ -142,15 +142,22 @@ void bloom_filter_stats(BloomFilter *bf) { bloom_filter_count_set_bits(bf), is_on_disk); } -int bloom_filter_add_string(BloomFilter *bf, const uint8_t *str, const size_t str_len) { +int bloom_filter_add_string(BloomFilter *bf, const char *str) { + return bloom_filter_add_uint8_str(bf, (const uint8_t *) str, strlen(str)); +} + +int bloom_filter_add_uint8_str(BloomFilter *bf, const uint8_t *str, const size_t str_len) { uint64_t *hashes = bloom_filter_calculate_hashes(bf, str, str_len, bf->number_hashes); int res = bloom_filter_add_string_alt(bf, hashes, bf->number_hashes); free(hashes); return res; } +int bloom_filter_check_string(BloomFilter *bf, const char *str) { + return bloom_filter_check_uint8_str(bf, (const uint8_t *) str, strlen(str)); +} -int bloom_filter_check_string(BloomFilter *bf, const uint8_t *str, const size_t str_len) { +int bloom_filter_check_uint8_str(BloomFilter *bf, const uint8_t *str, const size_t str_len) { uint64_t *hashes = bloom_filter_calculate_hashes(bf, str, str_len, bf->number_hashes); int res = bloom_filter_check_string_alt(bf, hashes, bf->number_hashes); free(hashes); diff --git a/src/bloom.h b/src/bloom.h index 31acdf7..29c00c9 100644 --- a/src/bloom.h +++ b/src/bloom.h @@ -114,13 +114,19 @@ int bloom_filter_destroy(BloomFilter *bf); int bloom_filter_clear(BloomFilter *bf); /* Add a string (or element) to the bloom filter */ -int bloom_filter_add_string(BloomFilter *bf, const uint8_t *str, const size_t str_len); +int bloom_filter_add_string(BloomFilter *bf, const char *str); + +/* Add a uint8_t string (or element) to the bloom filter */ +int bloom_filter_add_uint8_str(BloomFilter *bf, const uint8_t *str, const size_t str_len); /* Add a string to a bloom filter using the defined hashes */ int bloom_filter_add_string_alt(BloomFilter *bf, uint64_t *hashes, unsigned int number_hashes_passed); /* Check to see if a string (or element) is or is not in the bloom filter */ -int bloom_filter_check_string(BloomFilter *bf, const uint8_t *str, const size_t str_len); +int bloom_filter_check_string(BloomFilter *bf, const char *str); + +/* Check to see if a uint8_t string (or element) is or is not in the bloom filter */ +int bloom_filter_check_uint8_str(BloomFilter *bf, const uint8_t *str, const size_t str_len); /* Check if a string is in the bloom filter using the passed hashes */ int bloom_filter_check_string_alt(BloomFilter *bf, uint64_t *hashes, unsigned int number_hashes_passed); diff --git a/tests/bloom_test.c b/tests/bloom_test.c index 901df44..be428b4 100644 --- a/tests/bloom_test.c +++ b/tests/bloom_test.c @@ -354,7 +354,7 @@ void populate_bloom_filter(BloomFilter *bf, unsigned long long elements, int mul for (unsigned long long i = 0; i < elements * mult; i+=mult) { char key[KEY_LEN] = {0}; sprintf(key, "%llu", i); - bloom_filter_add_string(bf, (const uint8_t *) key, strlen(key)); + bloom_filter_add_string(bf, key); } } @@ -363,7 +363,7 @@ int check_known_values(BloomFilter *bf, int mult) { for (i = 0; i < ELEMENTS * mult; i+=mult) { char key[KEY_LEN] = {0}; sprintf(key, "%d", i); - if (bloom_filter_check_string(bf, (const uint8_t *) key, strlen(key)) == BLOOM_FAILURE) { + if (bloom_filter_check_string(bf, key) == BLOOM_FAILURE) { cnt++; } } @@ -377,7 +377,7 @@ int check_known_values_alt(BloomFilter *bf, int mult, int mult2, int* used) { if (i % mult2 == 0 && i % mult == 0) { char key[KEY_LEN] = {0}; sprintf(key, "%d", i); - if (bloom_filter_check_string(bf, (const uint8_t *) key, strlen(key)) == BLOOM_FAILURE) { + if (bloom_filter_check_string(bf, key) == BLOOM_FAILURE) { cnt++; } j++; @@ -392,7 +392,7 @@ int check_unknown_values(BloomFilter *bf, int mult) { for (i = 1; i < ELEMENTS * mult; i+=mult) { char key[KEY_LEN] = {0}; sprintf(key, "%d", i); - if (bloom_filter_check_string(bf, (const uint8_t *) key, strlen(key)) == BLOOM_SUCCESS) { + if (bloom_filter_check_string(bf, key) == BLOOM_SUCCESS) { cnt++; } } @@ -408,7 +408,7 @@ int check_unknown_values_alt(BloomFilter *bf, int mult, int mult2, int offset, i } else { char key[KEY_LEN] = {0}; sprintf(key, "%d", i); - if (bloom_filter_check_string(bf, (const uint8_t *) key, strlen(key)) == BLOOM_SUCCESS) { + if (bloom_filter_check_string(bf, key) == BLOOM_SUCCESS) { cnt++; } j++; @@ -427,7 +427,7 @@ int check_unknown_values_alt_2(BloomFilter *bf, int mult, int mult2, int offset, } else { char key[KEY_LEN] = {0}; sprintf(key, "%d", i); - if (bloom_filter_check_string(bf, (const uint8_t *) key, strlen(key)) == BLOOM_SUCCESS) { + if (bloom_filter_check_string(bf, key) == BLOOM_SUCCESS) { cnt++; } j++;