From d325e3ee9eae7dd161b61b16509a25dbafaa77eb Mon Sep 17 00:00:00 2001 From: ampli Date: Fri, 17 May 2024 01:42:22 +0300 Subject: [PATCH] eliminate_duplicate_disjuncts(), tracon-set.c: Use Fibonacci Hashing Knuth's Method / Fibonacci Hashing --- link-grammar/connectors.h | 3 ++- link-grammar/disjunct-utils.c | 10 +++++++--- link-grammar/tracon-set.c | 2 +- link-grammar/utilities.h | 12 ++++++++++++ 4 files changed, 22 insertions(+), 5 deletions(-) diff --git a/link-grammar/connectors.h b/link-grammar/connectors.h index 0a0177c475..4b534f16f7 100644 --- a/link-grammar/connectors.h +++ b/link-grammar/connectors.h @@ -329,6 +329,7 @@ static inline uint32_t string_hash(const char *s) } typedef uint32_t connector_hash_t; +static const connector_hash_t FIBONACCI_MULT = 0x9E3779B9; static inline connector_hash_t connector_hash(const Connector *c) { @@ -351,7 +352,7 @@ static inline connector_hash_t connector_list_hash(const Connector *c) for (c = c->next; c != NULL; c = c->next) #if FEEDBACK_HASH - accum = (accum<<6) + (accum<<16) + (accum >> 16) - connector_hash(c); + accum = (accum<<7) + (accum<<14) + (accum >> 16) - connector_hash(c); #else // Bad. accum = (19 * accum) + connector_hash(c); diff --git a/link-grammar/disjunct-utils.c b/link-grammar/disjunct-utils.c index 3901d4d708..fa80b8c61d 100644 --- a/link-grammar/disjunct-utils.c +++ b/link-grammar/disjunct-utils.c @@ -223,7 +223,8 @@ static unsigned int count_connectors(Sentence sent) typedef struct disjunct_dup_table_s disjunct_dup_table; struct disjunct_dup_table_s { - size_t dup_table_size; + unsigned int dup_table_size; + unsigned int log2_size; Disjunct *dup_table[]; }; @@ -244,10 +245,12 @@ static inline unsigned int old_hash_disjunct(disjunct_dup_table *dt, i += 19 * connector_list_hash(d->right); if (string_too) i += string_hash(d->word_string); - //i += (i>>10); d->dup_hash = i; - return (i & (dt->dup_table_size-1)); + + i *= FIBONACCI_MULT; + // Feed back log2(table_size) MSBs. + return ((i ^ (i>>(32-dt->log2_size))) & (dt->dup_table_size-1)); } /** @@ -322,6 +325,7 @@ static disjunct_dup_table * disjunct_dup_table_new(size_t sz) dt = malloc(sz * sizeof(Disjunct *) + sizeof(disjunct_dup_table)); dt->dup_table_size = sz; + dt->log2_size = power_of_2_log2(sz); memset(dt->dup_table, 0, sz * sizeof(Disjunct *)); diff --git a/link-grammar/tracon-set.c b/link-grammar/tracon-set.c index ef530b6705..a30afb3c8e 100644 --- a/link-grammar/tracon-set.c +++ b/link-grammar/tracon-set.c @@ -52,7 +52,7 @@ static tid_hash_t hash_connectors(const Connector *c, unsigned int shallow) { tid_hash_t accum = (shallow && c->shallow) ? 1000003 : 0; - return accum + connector_list_hash(c); + return (accum + connector_list_hash(c)) * FIBONACCI_MULT; } #if 0 diff --git a/link-grammar/utilities.h b/link-grammar/utilities.h index f03b488292..b5b255f246 100644 --- a/link-grammar/utilities.h +++ b/link-grammar/utilities.h @@ -534,4 +534,16 @@ static inline size_t next_power_of_two_up(size_t i) return j; } +/** + * Return log2 of a given power-of-2 \p i. + */ +static inline unsigned int power_of_2_log2(size_t i) +{ + unsigned int n = 0; + while (i >>= 1) + n++; + return n; +} + + #endif /* _LINK_GRAMMAR_UTILITIES_H_ */