Skip to content

Commit

Permalink
eliminate_duplicate_disjuncts(), tracon-set.c: Use Fibonacci Hashing
Browse files Browse the repository at this point in the history
Knuth's Method / Fibonacci Hashing
  • Loading branch information
ampli committed May 17, 2024
1 parent 8f35c47 commit d325e3e
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 5 deletions.
3 changes: 2 additions & 1 deletion link-grammar/connectors.h
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,7 @@ static inline uint32_t string_hash(const char *s)
}

typedef uint32_t connector_hash_t;
static const connector_hash_t FIBONACCI_MULT = 0x9E3779B9;

static inline connector_hash_t connector_hash(const Connector *c)
{
Expand All @@ -351,7 +352,7 @@ static inline connector_hash_t connector_list_hash(const Connector *c)

for (c = c->next; c != NULL; c = c->next)
#if FEEDBACK_HASH
accum = (accum<<6) + (accum<<16) + (accum >> 16) - connector_hash(c);
accum = (accum<<7) + (accum<<14) + (accum >> 16) - connector_hash(c);
#else
// Bad.
accum = (19 * accum) + connector_hash(c);
Expand Down
10 changes: 7 additions & 3 deletions link-grammar/disjunct-utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,8 @@ static unsigned int count_connectors(Sentence sent)
typedef struct disjunct_dup_table_s disjunct_dup_table;
struct disjunct_dup_table_s
{
size_t dup_table_size;
unsigned int dup_table_size;
unsigned int log2_size;
Disjunct *dup_table[];
};

Expand All @@ -244,10 +245,12 @@ static inline unsigned int old_hash_disjunct(disjunct_dup_table *dt,
i += 19 * connector_list_hash(d->right);
if (string_too)
i += string_hash(d->word_string);
//i += (i>>10);

d->dup_hash = i;
return (i & (dt->dup_table_size-1));

i *= FIBONACCI_MULT;
// Feed back log2(table_size) MSBs.
return ((i ^ (i>>(32-dt->log2_size))) & (dt->dup_table_size-1));
}

/**
Expand Down Expand Up @@ -322,6 +325,7 @@ static disjunct_dup_table * disjunct_dup_table_new(size_t sz)

dt = malloc(sz * sizeof(Disjunct *) + sizeof(disjunct_dup_table));
dt->dup_table_size = sz;
dt->log2_size = power_of_2_log2(sz);

memset(dt->dup_table, 0, sz * sizeof(Disjunct *));

Expand Down
2 changes: 1 addition & 1 deletion link-grammar/tracon-set.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ static tid_hash_t hash_connectors(const Connector *c, unsigned int shallow)
{
tid_hash_t accum = (shallow && c->shallow) ? 1000003 : 0;

return accum + connector_list_hash(c);
return (accum + connector_list_hash(c)) * FIBONACCI_MULT;
}

#if 0
Expand Down
12 changes: 12 additions & 0 deletions link-grammar/utilities.h
Original file line number Diff line number Diff line change
Expand Up @@ -534,4 +534,16 @@ static inline size_t next_power_of_two_up(size_t i)
return j;
}

/**
* Return log2 of a given power-of-2 \p i.
*/
static inline unsigned int power_of_2_log2(size_t i)
{
unsigned int n = 0;
while (i >>= 1)
n++;
return n;
}


#endif /* _LINK_GRAMMAR_UTILITIES_H_ */

0 comments on commit d325e3e

Please sign in to comment.