Skip to content

Commit

Permalink
Merge pull request #1440 from ampli/parse-set
Browse files Browse the repository at this point in the history
Parse_set / Parse_choice shrinking
  • Loading branch information
linas authored Feb 20, 2023
2 parents 7822478 + 47267d1 commit 8a290fd
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 61 deletions.
8 changes: 8 additions & 0 deletions link-grammar/connectors.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,14 @@
*/
#define MAX_SENTENCE 254 /* Maximum number of words in a sentence */

/* Since tracon IDs are unique per sentence, for convenience NULL
* connectors (zero-length tracons) have tracon IDs equal to the word
* number on which their disjunct resides. To that end an initial block
* of IDs is reserved. It is the maximum number of words in a sentence
* rounded up to a power of 2.
*/
#define NULL_TRACON_BLOCK 256

/* For faster comparisons, the connector lc part is encoded into a number
* and a mask. Each letter is encoded using LC_BITS bits. With 7 bits, it
* is possible to encode up to 9 letters in an uint64_t. */
Expand Down
8 changes: 1 addition & 7 deletions link-grammar/disjunct-utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -806,12 +806,6 @@ static Disjunct *pack_disjuncts(Sentence sent, Tracon_sharing *ts,

#define TLSZ 8192 /* Initial size of the tracon list table */

/* Reserved tracon ID space for NULL connectors (zero-length tracons).
* Currently, tracons are unique per word. So this is actually the max.
* number of words in a sentence rounded up to a power of 2.
* FIXME: Derive it from MAX_SENTENCE. */
#define WORD_OFFSET 256

/** Create a context descriptor for disjuncts & connector memory "packing".
* Allocate a memory block for all the disjuncts & connectors.
* The current Connector struct size is 32 bytes, and the intention is
Expand Down Expand Up @@ -873,7 +867,7 @@ static Tracon_sharing *pack_sentence_init(Sentence sent, bool is_pruning)
ts->dblock = dblock;
ts->num_connectors = ccnt;
ts->num_disjuncts = dcnt;
ts->word_offset = is_pruning ? 1 : WORD_OFFSET;
ts->word_offset = is_pruning ? 1 : NULL_TRACON_BLOCK;
ts->is_pruning = is_pruning;
ts->next_id[0] = ts->next_id[1] = ts->word_offset;
ts->last_token = (uintptr_t)-1;
Expand Down
127 changes: 73 additions & 54 deletions link-grammar/parse/extract-links.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,19 +41,19 @@ struct Parse_choice_struct
Parse_choice * next;
Parse_set * set[2];
Disjunct *md; /* the chosen disjunct for the middle word */
Connector *lc, *rc; /* the connectors on the middle word */
int32_t l_id, r_id; /* the tracon IDs used in this disjunct */
};

/* Parse_set serves as a header of Parse_choice chained elements, that
* describe the possible parses with the specified null_count, using
* tracons l_id and r_id on words lw and rw, correspondingly. */
struct Parse_set_struct
{
Parse_choice *first;
Connector *le, *re;
Parse_choice *first;
unsigned int num_pc; /* number of Parse_choice elements */
uint8_t lw, rw; /* left and right word index */
uint8_t null_count; /* number of island words */
int32_t l_id, r_id; /* tracons on words lw, rw */

count_t count; /* The number of ways to parse. */
#ifdef RECOUNT
Expand Down Expand Up @@ -105,13 +105,12 @@ make_choice(Parse_set *lset, Connector * lrc,
Parse_set *rset, Connector * rlc,
Disjunct *md, extractor_t* pex)
{
Parse_choice *pc;
pc = pool_alloc(pex->Parse_choice_pool);
Parse_choice *pc = pool_alloc(pex->Parse_choice_pool);
pc->next = NULL;
pc->set[0] = lset;
pc->set[1] = rset;
pc->lc = lrc;
pc->rc = rlc;
pc->l_id = (lrc == NULL) ? -1 : lrc->tracon_id;
pc->r_id = (rlc == NULL) ? -1 : rlc->tracon_id;
pc->md = md;
return pc;
}
Expand All @@ -126,6 +125,7 @@ static void record_choice(
// Chain it into the parse set.
pc->next = s->first;
s->first = pc;
s->num_pc++;
}

/**
Expand Down Expand Up @@ -201,8 +201,8 @@ extractor_t * extractor_new(Sentence sent)
pex->x_table_size = (1 << log2_table_size);

#ifdef DEBUG_X_TABLE
printf("Allocating x_table of size %u (nwords %d)\n",
pex->x_table_size, nwords);
printf("Allocating x_table of size %u (log2 %d)\n",
pex->x_table_size, log2_table_size);
#endif /* DEBUG_X_TABLE */

pex->x_table = (Pset_bucket**) xalloc(pex->x_table_size * sizeof(Pset_bucket*));
Expand Down Expand Up @@ -267,23 +267,40 @@ void free_extractor(extractor_t * pex)
xfree((void *) pex, sizeof(extractor_t));
}

/**
* Return a dummy connector that represents a null tracon for word \p w.
* Its purpose is to greatly simplify the condition in x_table_pointer().
* \p w may be in the range [-1,sentence length].
* We assume here is that an integer check and assignment is thread-safe.
*/
static Connector *dummy_null_tracon(int w)
{
/* +1 for w+1 (see below).
* +1 for invocations with w equal to MAX_SENTENCE. */
static Connector dnt[MAX_SENTENCE+1+1];

/* w+1 supports invocations with w==-1. */
if (dnt[w+1].tracon_id != w) dnt[w+1].tracon_id = w;
return &dnt[w+1];
}

/**
* Returns the pointer to this info, NULL if not there.
* Note that there is no need to use (lw, rw) as keys because tracon_id
* values are not shared between words.
*/
static Pset_bucket * x_table_pointer(int lw, int rw,
Connector *le, Connector *re,
unsigned int null_count, extractor_t * pex)
static Pset_bucket *x_table_pointer(int lw, int rw,
Connector *le, Connector *re,
unsigned int null_count, extractor_t * pex)
{
Pset_bucket *t;
int l_id = (NULL != le) ? le->tracon_id : lw;
int r_id = (NULL != re) ? re->tracon_id : rw;
unsigned int hash = pair_hash(lw, rw, l_id, r_id, null_count);
t = pex->x_table[hash & (pex->x_table_size-1)];
Pset_bucket *t = pex->x_table[hash & (pex->x_table_size-1)];

for (; t != NULL; t = t->next) {
if ((t->set.l_id == l_id) && (t->set.r_id == r_id) &&
for (; t != NULL; t = t->next)
{
if ((t->set.le->tracon_id == l_id) && (t->set.re->tracon_id == r_id) &&
(t->set.null_count == null_count)) return t;
}
return NULL;
Expand All @@ -296,22 +313,21 @@ static Pset_bucket * x_table_store(int lw, int rw,
Connector *le, Connector *re,
unsigned int null_count, extractor_t * pex)
{
Pset_bucket **t, *n;
unsigned int h;
int32_t l_id = (NULL != le) ? le->tracon_id : lw;
int32_t r_id = (NULL != re) ? re->tracon_id : rw;
unsigned int h = pair_hash(lw, rw, l_id, r_id, null_count);
Pset_bucket **t = &pex->x_table[h & (pex->x_table_size -1)];
Pset_bucket *n = pool_alloc(pex->Pset_bucket_pool);

n = pool_alloc(pex->Pset_bucket_pool);
n->set.lw = lw;
n->set.rw = rw;
n->set.null_count = null_count;
n->set.l_id = (NULL != le) ? le->tracon_id : lw;
n->set.r_id = (NULL != re) ? re->tracon_id : rw;
n->set.le = le;
n->set.re = re;
n->set.le = (NULL != le) ? le : dummy_null_tracon(lw);
n->set.re = (NULL != re) ? re : dummy_null_tracon(rw);
n->set.count = 0;
n->set.first = NULL;
n->set.num_pc = 0;

h = pair_hash(lw, rw, n->set.l_id, n->set.r_id, null_count);
t = &pex->x_table[h & (pex->x_table_size -1)];
n->next = *t;
*t = n;
return n;
Expand Down Expand Up @@ -716,19 +732,35 @@ bool build_parse_set(extractor_t* pex, Sentence sent,
return set_overflowed(pex);
}

static Connector *get_tracon_by_id(const Disjunct *d, int32_t tracon_id,
int dir)
{
if (tracon_id < 0) return NULL; /* See make_choice() */
for (Connector *c = dir ? d->right : d->left; c != NULL; c = c->next)
if (tracon_id == c->tracon_id) return c;

assert(0, "tracon_id %d not found on disjunct %p in direction %d\n",
tracon_id, d, dir);
}

static bool is_zero_tracon(Connector *c)
{
return (c == NULL) || (c->tracon_id < NULL_TRACON_BLOCK);
}

/**
* Assemble the link array and the chosen_disjuncts of a linkage.
*/
static void issue_link(Linkage lkg, int lr, Parse_choice *pc,
const Parse_set *set)
{
Connector *lc = lr ? pc->rc : set->le;
if (lc == NULL) return; /* No choice to record. */
Connector *lc = lr ? get_tracon_by_id(pc->md, pc->r_id, 1) : set->le;
if (is_zero_tracon(lc)) return; /* No choice to record. */

lkg->chosen_disjuncts[lr ? pc->set[1]->lw : pc->set[0]->rw] = pc->md;

Connector *rc = lr ? set->re : pc->lc;
if (rc == NULL) return; /* No link to generate. */
Connector *rc = lr ? set->re : get_tracon_by_id(pc->md, pc->l_id, 0);
if (is_zero_tracon(rc)) return; /* No choice to record. */

assert(lkg->num_links < lkg->lasz, "Linkage array too small!");
Link *link = &lkg->link_array[lkg->num_links];
Expand Down Expand Up @@ -792,12 +824,13 @@ static void issue_links_for_choice(Linkage lkg, Parse_choice *pc,
* For S0: (Nindex % pc->set[0]->count) ranges from 0 to (S0ₘ-1).
* For S1: (Nindex / pc->set[0]->count) ranges from 0 to (S1ₘ-1).
*/
static void list_links(Linkage lkg, const Parse_set * set, int index)
static void list_links(Linkage lkg, Parse_set * set, int index)
{
Parse_choice *pc;
count_t n; /* No overflow - see extract_links() and process_linkages() */

if (set == NULL || set->first == NULL) return;
assert(set != NULL, "Unexpected NULL Parse_set");
if (set->first == NULL) return;
for (pc = set->first; pc != NULL; pc = pc->next) {
n = pc->set[0]->count * pc->set[1]->count;
if (index < n) break;
Expand All @@ -810,32 +843,18 @@ static void list_links(Linkage lkg, const Parse_set * set, int index)
}

static void list_random_links(Linkage lkg, unsigned int *rand_state,
const Parse_set * set)
Parse_set * set)
{
Parse_choice *pc;
int num_pc, new_index;
assert(set != NULL, "Unexpected NULL Parse_set");
if (set->first == NULL) return;

if (set == NULL || set->first == NULL) return;
/* Avoid calling rand_r() for the common case of a single element. */
unsigned int new_index = (set->num_pc == 1) ? 0 :
rand_r(rand_state) % set->num_pc;

/* Most of the time, there is only one list element. */
if (set->first->next == NULL)
{
pc = set->first;
}
else
{
num_pc = 0;
for (pc = set->first; pc != NULL; pc = pc->next) {
num_pc++;
}

new_index = rand_r(rand_state) % num_pc;

num_pc = 0;
for (pc = set->first; new_index != num_pc; pc = pc->next) {
num_pc++;
}
}
Parse_choice *pc;
for (pc = set->first; new_index > 0; pc = pc->next)
new_index--;

issue_links_for_choice(lkg, pc, set);
list_random_links(lkg, rand_state, pc->set[0]);
Expand Down

0 comments on commit 8a290fd

Please sign in to comment.