From 43546e022a73936c42c74ce88d3a139b117202ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linas=20Vep=C5=A1tas?= Date: Tue, 16 Apr 2024 13:09:39 -0500 Subject: [PATCH] Add commentary about the hash function --- link-grammar/connectors.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/link-grammar/connectors.h b/link-grammar/connectors.h index 365830716..b46aca173 100644 --- a/link-grammar/connectors.h +++ b/link-grammar/connectors.h @@ -313,10 +313,25 @@ typedef uint32_t connector_hash_t; static inline connector_hash_t connector_hash(const Connector *c) { + // The use of (c->desc->lc_mask & 1) during hashing is important; + // See pull req #1487 for details. This raises other questions + // about hashing. Two forms are attempted below. They appear to + // be equivalent, in terms of measured elapsed-time performance. + // (I did not look at the quality of the distribution.) + // The second form uses some mixing bitshifts: + // 266281 == sum of 1 8 32 4096 (256*1024) It is a prime number + // 524429 == sum of 1 4 8 128 (512*1024) and it is a prime number +#ifdef SIMPLE_HASH return c->desc->uc_num + (c->multi << 19) + (((connector_hash_t)c->desc->lc_mask & 1) << 20) + (connector_hash_t)c->desc->lc_letters; +#else + return c->desc->uc_num + + c->multi * 266281 + + (((connector_hash_t)c->desc->lc_mask & 1) * 524429) + + ((connector_hash_t)c->desc->lc_letters) * 101; +#endif } /**