Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for uint8_t * strings #26

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 18 additions & 11 deletions src/bloom.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ static const unsigned char bits_set_table[256] = {B6(0), B6(1), B6(1), B6(2)};
/*******************************************************************************
*** PRIVATE FUNCTIONS
*******************************************************************************/
static uint64_t* __default_hash(int num_hashes, const char *str);
static uint64_t __fnv_1a(const char *key, int seed);
static uint64_t* __default_hash(int num_hashes, const uint8_t *str, const size_t str_len);
static uint64_t __fnv_1a(const uint8_t *key, const size_t key_len, int seed);
static void __calculate_optimal_hashes(BloomFilter *bf);
static void __read_from_file(BloomFilter *bf, FILE *fp, short on_disk, const char *filename);
static void __write_to_file(BloomFilter *bf, FILE *fp, short on_disk);
Expand Down Expand Up @@ -143,22 +143,29 @@ void bloom_filter_stats(BloomFilter *bf) {
}

int bloom_filter_add_string(BloomFilter *bf, const char *str) {
uint64_t *hashes = bloom_filter_calculate_hashes(bf, str, bf->number_hashes);
return bloom_filter_add_uint8_str(bf, (const uint8_t *) str, strlen(str));
}

int bloom_filter_add_uint8_str(BloomFilter *bf, const uint8_t *str, const size_t str_len) {
uint64_t *hashes = bloom_filter_calculate_hashes(bf, str, str_len, bf->number_hashes);
int res = bloom_filter_add_string_alt(bf, hashes, bf->number_hashes);
free(hashes);
return res;
}


int bloom_filter_check_string(BloomFilter *bf, const char *str) {
uint64_t *hashes = bloom_filter_calculate_hashes(bf, str, bf->number_hashes);
return bloom_filter_check_uint8_str(bf, (const uint8_t *) str, strlen(str));
}

int bloom_filter_check_uint8_str(BloomFilter *bf, const uint8_t *str, const size_t str_len) {
uint64_t *hashes = bloom_filter_calculate_hashes(bf, str, str_len, bf->number_hashes);
int res = bloom_filter_check_string_alt(bf, hashes, bf->number_hashes);
free(hashes);
return res;
}

uint64_t* bloom_filter_calculate_hashes(BloomFilter *bf, const char *str, unsigned int number_hashes) {
return bf->hash_function(number_hashes, str);
uint64_t* bloom_filter_calculate_hashes(BloomFilter *bf, const uint8_t *str, const size_t str_len, unsigned int number_hashes) {
return bf->hash_function(number_hashes, str, str_len);
}

/* Add a string to a bloom filter using the defined hashes */
Expand Down Expand Up @@ -487,18 +494,18 @@ static void __update_elements_added_on_disk(BloomFilter* bf) {
}

/* NOTE: The caller will free the results */
static uint64_t* __default_hash(int num_hashes, const char *str) {
static uint64_t* __default_hash(int num_hashes, const uint8_t *str, const size_t str_len) {
uint64_t *results = (uint64_t*)calloc(num_hashes, sizeof(uint64_t));
int i;
for (i = 0; i < num_hashes; ++i) {
results[i] = __fnv_1a(str, i);
results[i] = __fnv_1a(str, str_len, i);
}
return results;
}

static uint64_t __fnv_1a(const char *key, int seed) {
static uint64_t __fnv_1a(const uint8_t *key, const size_t len, int seed) {
// FNV-1a hash (http://www.isthe.com/chongo/tech/comp/fnv/)
int i, len = strlen(key);
size_t i;
uint64_t h = 14695981039346656037ULL + (31 * seed); // FNV_OFFSET 64 bit with magic number seed
for (i = 0; i < len; ++i){
h = h ^ (unsigned char) key[i];
Expand Down
10 changes: 8 additions & 2 deletions src/bloom.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ extern "C" {

#define bloom_filter_get_version() (BLOOMFILTER_VERSION)

typedef uint64_t* (*BloomHashFunction) (int num_hashes, const char *str);
typedef uint64_t* (*BloomHashFunction) (int num_hashes, const uint8_t *str, const size_t str_len);

typedef struct bloom_filter {
/* bloom parameters */
Expand Down Expand Up @@ -116,12 +116,18 @@ int bloom_filter_clear(BloomFilter *bf);
/* Add a string (or element) to the bloom filter */
int bloom_filter_add_string(BloomFilter *bf, const char *str);

/* Add a uint8_t string (or element) to the bloom filter */
int bloom_filter_add_uint8_str(BloomFilter *bf, const uint8_t *str, const size_t str_len);

/* Add a string to a bloom filter using the defined hashes */
int bloom_filter_add_string_alt(BloomFilter *bf, uint64_t *hashes, unsigned int number_hashes_passed);

/* Check to see if a string (or element) is or is not in the bloom filter */
int bloom_filter_check_string(BloomFilter *bf, const char *str);

/* Check to see if a uint8_t string (or element) is or is not in the bloom filter */
int bloom_filter_check_uint8_str(BloomFilter *bf, const uint8_t *str, const size_t str_len);

/* Check if a string is in the bloom filter using the passed hashes */
int bloom_filter_check_string_alt(BloomFilter *bf, uint64_t *hashes, unsigned int number_hashes_passed);

Expand All @@ -144,7 +150,7 @@ void bloom_filter_set_elements_to_estimated(BloomFilter *bf);

/* Generate the desired number of hashes for the provided string
NOTE: It is up to the caller to free the allocated memory */
uint64_t* bloom_filter_calculate_hashes(BloomFilter *bf, const char *str, unsigned int number_hashes);
uint64_t* bloom_filter_calculate_hashes(BloomFilter *bf, const uint8_t *str, const size_t str_len, unsigned int number_hashes);

/* Calculate the size the bloom filter will take on disk when exported in bytes */
uint64_t bloom_filter_export_size(BloomFilter *bf);
Expand Down
14 changes: 7 additions & 7 deletions tests/bloom_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ int check_unknown_values_alt(BloomFilter *bf, int mult, int mult2, int offset, i
int check_unknown_values_alt_2(BloomFilter *bf, int mult, int mult2, int offset, int* used);
void success_or_failure(int res);
void populate_bloom_filter(BloomFilter *bf, unsigned long long elements, int mult);
static uint64_t __fnv_1a_mod(const char *key);
static uint64_t* __default_hash_mod(int num_hashes, const char *str);
static uint64_t __fnv_1a_mod(const uint8_t *key, const size_t str_len);
static uint64_t* __default_hash_mod(int num_hashes, const uint8_t *str, const size_t str_len);



Expand Down Expand Up @@ -446,22 +446,22 @@ void success_or_failure(int res) {
}

/* NOTE: The caller will free the results */
static uint64_t* __default_hash_mod(int num_hashes, const char *str) {
static uint64_t* __default_hash_mod(int num_hashes, const uint8_t *str, const size_t str_len) {
uint64_t *results = (uint64_t*)calloc(num_hashes, sizeof(uint64_t));
int i;
char *key = (char*)calloc(17, sizeof(char)); // largest value is 7FFF,FFFF,FFFF,FFFF
results[0] = __fnv_1a_mod(str);
results[0] = __fnv_1a_mod(str, str_len);
for (i = 1; i < num_hashes; ++i) {
sprintf(key, "%" PRIx64 "", results[i-1]);
results[i] = __fnv_1a_mod(key);
results[i] = __fnv_1a_mod((const uint8_t *) key, strlen(key));
}
free(key);
return results;
}

static uint64_t __fnv_1a_mod(const char *key) {
static uint64_t __fnv_1a_mod(const uint8_t *key, const size_t len) {
// FNV-1a hash (http://www.isthe.com/chongo/tech/comp/fnv/)
int i, len = strlen(key);
size_t i;
uint64_t h = 14695981039346656073ULL; // FNV_OFFSET 64 bit
for (i = 0; i < len; ++i) {
h = h ^ (unsigned char) key[i];
Expand Down