diff --git a/link-grammar/connectors.h b/link-grammar/connectors.h index 365830716..b46aca173 100644 --- a/link-grammar/connectors.h +++ b/link-grammar/connectors.h @@ -313,10 +313,25 @@ typedef uint32_t connector_hash_t; static inline connector_hash_t connector_hash(const Connector *c) { + // The use of (c->desc->lc_mask & 1) during hashing is important; + // See pull req #1487 for details. This raises other questions + // about hashing. Two forms are attempted below. They appear to + // be equivalent, in terms of measured elapsed-time performance. + // (I did not look at the quality of the distribution.) + // The second form uses some mixing bitshifts: + // 266281 == sum of 1 8 32 4096 (256*1024) It is a prime number + // 524429 == sum of 1 4 8 128 (512*1024) and it is a prime number +#ifdef SIMPLE_HASH return c->desc->uc_num + (c->multi << 19) + (((connector_hash_t)c->desc->lc_mask & 1) << 20) + (connector_hash_t)c->desc->lc_letters; +#else + return c->desc->uc_num + + c->multi * 266281 + + (((connector_hash_t)c->desc->lc_mask & 1) * 524429) + + ((connector_hash_t)c->desc->lc_letters) * 101; +#endif } /**