Skip to content

Commit

Permalink
eliminate_duplicate_disjuncts(): Add collision debug stats
Browse files Browse the repository at this point in the history
  • Loading branch information
ampli committed May 17, 2024
1 parent 8840a37 commit 8f35c47
Showing 1 changed file with 20 additions and 0 deletions.
20 changes: 20 additions & 0 deletions link-grammar/disjunct-utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,7 @@ static void disjunct_dup_table_delete(disjunct_dup_table *dt)
free(dt);
}

#define DEDUP_DEBUG 0
/**
* Takes the list of disjuncts pointed to by dw, eliminates all
* duplicates. The elimination is done in-place. Because the first
Expand All @@ -354,6 +355,9 @@ unsigned int eliminate_duplicate_disjuncts(Disjunct *dw, bool multi_string)

dt = disjunct_dup_table_new(next_power_of_two_up(2 * count_disjuncts(dw)));

#if DEDUP_DEBUG
unsigned int coll = 0;
#endif
for (Disjunct *d = dw; d != NULL; d = d->next)
{
Disjunct *dx;
Expand Down Expand Up @@ -402,12 +406,28 @@ unsigned int eliminate_duplicate_disjuncts(Disjunct *dw, bool multi_string)
}
else
{
#if DEDUP_DEBUG
if (dt->dup_table[h]) coll++;
#endif
d->dup_table_next = dt->dup_table[h];
dt->dup_table[h] = d;
prev = d;
}
}

#if DEDUP_DEBUG
#if 1
// For particular words only.
unsigned int pw[] = { 2, 7, 12, 22, 34, 46 , 0};
for (int i = 0; pw[i] != 0; i++)
if (dw->originating_gword->o_gword->sent_wordidx == pw[i])
#endif
{
fprintf(stderr, "edd: %.2f%% coll %u/%u\n",
100.f * coll / count_disjuncts(dw), coll, count_disjuncts(dw));
}
#endif

lgdebug(+D_DISJ+(0==count)*1024, "w%zu: Killed %u duplicates%s\n",
dw->originating_gword == NULL ? 0 :
dw->originating_gword->o_gword->sent_wordidx, count,
Expand Down

0 comments on commit 8f35c47

Please sign in to comment.