From 8f35c47d42fe45e06c72d03004a8d3c1db2fbc17 Mon Sep 17 00:00:00 2001 From: ampli Date: Fri, 17 May 2024 01:34:17 +0300 Subject: [PATCH] eliminate_duplicate_disjuncts(): Add collision debug stats --- link-grammar/disjunct-utils.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/link-grammar/disjunct-utils.c b/link-grammar/disjunct-utils.c index 0235dac00..3901d4d70 100644 --- a/link-grammar/disjunct-utils.c +++ b/link-grammar/disjunct-utils.c @@ -333,6 +333,7 @@ static void disjunct_dup_table_delete(disjunct_dup_table *dt) free(dt); } +#define DEDUP_DEBUG 0 /** * Takes the list of disjuncts pointed to by dw, eliminates all * duplicates. The elimination is done in-place. Because the first @@ -354,6 +355,9 @@ unsigned int eliminate_duplicate_disjuncts(Disjunct *dw, bool multi_string) dt = disjunct_dup_table_new(next_power_of_two_up(2 * count_disjuncts(dw))); +#if DEDUP_DEBUG + unsigned int coll = 0; +#endif for (Disjunct *d = dw; d != NULL; d = d->next) { Disjunct *dx; @@ -402,12 +406,28 @@ unsigned int eliminate_duplicate_disjuncts(Disjunct *dw, bool multi_string) } else { +#if DEDUP_DEBUG + if (dt->dup_table[h]) coll++; +#endif d->dup_table_next = dt->dup_table[h]; dt->dup_table[h] = d; prev = d; } } +#if DEDUP_DEBUG +#if 1 + // For particular words only. + unsigned int pw[] = { 2, 7, 12, 22, 34, 46 , 0}; + for (int i = 0; pw[i] != 0; i++) + if (dw->originating_gword->o_gword->sent_wordidx == pw[i]) +#endif + { + fprintf(stderr, "edd: %.2f%% coll %u/%u\n", + 100.f * coll / count_disjuncts(dw), coll, count_disjuncts(dw)); + } +#endif + lgdebug(+D_DISJ+(0==count)*1024, "w%zu: Killed %u duplicates%s\n", dw->originating_gword == NULL ? 0 : dw->originating_gword->o_gword->sent_wordidx, count,