Skip to content

Commit

Permalink
cdata: Intern individual words from 256-bitsets.
Browse files Browse the repository at this point in the history
Each state has two 256-bitsets, stored as a uint64_t[4], but the
individual words in those have a lot of duplication. Add a table
with every unique word, sorted descending by frequency, and replace
the per-state labels and label_group_starts arrays with an array of
offsets into the label_word table. Typically these offsets will
fit in a uint8_t (though the code generation will switch to
a uint16_t when necessary), making the per-state data much smaller.
The lable_word table's most commonly used entries are all grouped
together and should stay in cache.
  • Loading branch information
silentbicycle committed Oct 29, 2024
1 parent d3d1c57 commit 5d44725
Showing 1 changed file with 133 additions and 15 deletions.
148 changes: 133 additions & 15 deletions src/libfsm/print/cdata.c
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,9 @@ struct cdata_config {
enum id_type t_endid_value;
enum id_type t_eager_output_value;

/* numeric type for entries in .bitset_words.pairs[] */
enum id_type t_label_word_id;

struct dst_buf {
size_t ceil;
size_t used;
Expand Down Expand Up @@ -154,6 +157,17 @@ struct cdata_config {
size_t eager_output;
} *state_info;

/* Collected 64-bit word counts for the .labels and
* .label_group_starts 256-bitsets. */
struct bitset_words {
size_t used;
size_t ceil;
struct bitset_word_pair {
uint64_t word;
size_t count;
} *pairs;
} bitset_words;

#if LOG_REUSE
struct reuse_stats {
size_t miss;
Expand Down Expand Up @@ -183,11 +197,11 @@ generate_struct_definition(FILE *f, const struct cdata_config *config, bool comm
if (comments) {
fprintf(f,
"\t\t\t/* To find the destination state for label character C,\n"
"\t\t\t * check if the bit C is set in .labels[]. If so, find the\n"
"\t\t\t * the 1 bit at or preceding C in .label_group_starts[],\n"
"\t\t\t * which represents the start of the Nth label group, the\n"
"\t\t\t * group label group that contains C. The dst state will be in\n"
"\t\t\t * .dst_table[.dst_table_offset + N]. This offset N is called\n"
"\t\t\t * check if the bit C is set in the word id'd by .labels[].\n"
"\t\t\t * If so, find the the 1 bit at or preceding C in\n"
"\t\t\t * .label_group_starts[], which represents the start of the Nth\n"
"\t\t\t * label group, the group label group that contains C. The dst state will\n"
"\t\t\t * be in .dst_table[.dst_table_offset + N]. This offset N is called\n"
"\t\t\t * the rank, and .rank_sums has precomputed sums for each\n"
"\t\t\t * word preceding .label_group_starts[C/64]. If .labels[]\n"
"\t\t\t * isn't set for C, the destination is .default_dst, or the\n"
Expand All @@ -196,10 +210,13 @@ generate_struct_definition(FILE *f, const struct cdata_config *config, bool comm
}
fprintf(f,
"\t\t\t%s_cdata_state default_dst; /* or %zu for NONE */\n"
"\t\t\tuint64_t labels[256/4]; /* which labels have non-default edges */\n"
"\t\t\tuint64_t label_group_starts[256/4]; /* start of each label group */\n"
"\t\t\t%s label_word_ids[4]; /* which labels have non-default edges */\n"
"\t\t\t%s label_group_start_word_ids[4]; /* start of each label group */\n"
"\t\t\tuint8_t rank_sums[4]; /* rank at end of label_group_starts[n] */\n"
"\n", prefix, config->state_count);
"\n",
prefix, config->state_count,
id_type_str(config->t_label_word_id),
id_type_str(config->t_label_word_id));

if (comments) {
fprintf(f, "\t\t\t/* Offsets into values in other tables */\n");
Expand Down Expand Up @@ -229,6 +246,14 @@ generate_struct_definition(FILE *f, const struct cdata_config *config, bool comm
"\t\t} state_end_info[%zd];\n"
"\n", config->state_count);

if (comments) {
fprintf(f,
"\t\t/* Table of individual words used in label and label_group_start\n"
"\t\t * bitsets, in descending order by frequency. */\n");
}
fprintf(f,
"\t\tuint64_t label_word_table[%zd];\n", config->bitset_words.used);

if (comments) {
fprintf(f,
"\t\t/* Destination states for each edge group in each state,\n"
Expand Down Expand Up @@ -267,6 +292,22 @@ generate_struct_definition(FILE *f, const struct cdata_config *config, bool comm
return true;
}

static void
lookup_label_ids(const struct cdata_config *config, const uint64_t *labels, unsigned ids[4]) {
for (size_t w_i = 0; w_i < 4; w_i++) {
bool found = false;
const uint64_t w = labels[w_i];
for (size_t i = 0; i < config->bitset_words.used; i++) {
if (config->bitset_words.pairs[i].word == w) {
ids[w_i] = i;
found = true;
break;
}
}
assert(found);
}
}

static bool
generate_data(FILE *f, const struct cdata_config *config,
bool comments, const char *prefix, const struct ir *ir)
Expand Down Expand Up @@ -295,8 +336,10 @@ generate_data(FILE *f, const struct cdata_config *config,
}
fprintf(f, "\n");
}
fprintf(f, "\t\t\t\t.labels = { 0x%lx, 0x%lx, 0x%lx, 0x%lx },\n",
si->labels[0], si->labels[1], si->labels[2], si->labels[3]);
unsigned label_ids[4];
lookup_label_ids(config, si->labels, label_ids);
fprintf(f, "\t\t\t\t.label_word_ids = { %u, %u, %u, %u },\n",
label_ids[0], label_ids[1], label_ids[2], label_ids[3]);

size_t dst_count = 0;
if (comments) {
Expand All @@ -311,8 +354,9 @@ generate_data(FILE *f, const struct cdata_config *config,
}
fprintf(f, "\n");
}
fprintf(f, "\t\t\t\t.label_group_starts = { 0x%lx, 0x%lx, 0x%lx, 0x%lx },\n",
si->label_group_starts[0], si->label_group_starts[1], si->label_group_starts[2], si->label_group_starts[3]);
lookup_label_ids(config, si->label_group_starts, label_ids);
fprintf(f, "\t\t\t\t.label_group_start_word_ids = { %u, %u, %u, %u },\n",
label_ids[0], label_ids[1], label_ids[2], label_ids[3]);

/* rank_sums[0] is always 0, but allows us to avoid a subtraction in the inner loop,
* and the space would be wasted otherwise anyway due to alignment. */
Expand Down Expand Up @@ -395,6 +439,20 @@ generate_data(FILE *f, const struct cdata_config *config,
}
fprintf(f, "\t\t},\n");

fprintf(f,
"\t\t.label_word_table = {\n\t\t\t");
for (size_t i = 0; i < config->bitset_words.used; i++) {
fprintf(f, " 0x%016lx,", config->bitset_words.pairs[i].word);
if ((i & 3) == 3) {
fprintf(f, "\n\t\t\t");
}
}
if ((config->bitset_words.used & 3) != 3) {
fprintf(f, "\n");
}

fprintf(f, "\t\t},\n");

fprintf(f,
"\t\t.dst_table = {");

Expand Down Expand Up @@ -554,11 +612,14 @@ generate_interpreter(FILE *f, const struct cdata_config *config, const struct fs
"\t\tconst size_t w_i = c/64;\n"
"\t\tconst size_t word_rem = c & 63;\n"
"\t\tconst uint64_t bit = (uint64_t)1 << word_rem;\n"
"\t\tif (state->labels[w_i] & bit) { /* if state has label */\n"
"\t\tconst %s label_word_id = state->label_word_ids[w_i];\n"
"\t\tconst uint64_t label_word = %s_dfa_data.label_word_table[label_word_id];\n"
"\t\tif (label_word & bit) { /* if state has label */\n"
"\t\t\tif (debug_traces) {\n"
"\t\t\t\tfprintf(stderr, \"-- label '%%c' (0x%%02x) -> w_i %%zd, bit 0x%%016lx\\n\", isprint(c) ? c : 'c', c, w_i, bit);\n"
"\t\t\t}\n"
"\t\t\tconst uint64_t lgs_word = state->label_group_starts[w_i];\n"
"\t\t\tconst uint64_t lgs_word_id = state->label_group_start_word_ids[w_i];\n"
"\t\t\tconst uint64_t lgs_word = %s_dfa_data.label_word_table[lgs_word_id];\n"
"\t\t\tconst size_t back = (lgs_word & bit) ? 0 : 1; /* back to start of label group */\n"
"\t\t\tconst uint64_t mask = bit - 1;\n"
"\t\t\tconst uint64_t masked_word = lgs_word & mask;\n"
Expand All @@ -583,7 +644,8 @@ generate_interpreter(FILE *f, const struct cdata_config *config, const struct fs
"\t\t\treturn 0; /* no match */\n"
"\t\t}\n"
"\t}\n",
popcount, prefix, prefix);
id_type_str(config->t_label_word_id),
prefix, prefix, popcount, prefix, prefix);

/* At the end of the input, check if the current state is an end.
* If not, there's no match. */
Expand Down Expand Up @@ -898,6 +960,42 @@ append_dst(const struct fsm_alloc *alloc, struct dst_buf *buf, uint32_t dst)
return true;
}

static int
cmp_bitset_word_pair(const void *pa, const void *pb)
{
const struct bitset_word_pair *a = (const struct bitset_word_pair *)pa;
const struct bitset_word_pair *b = (const struct bitset_word_pair *)pb;

/* for sorting by descending count */
return a->count < b->count ? 1 : a->count > b->count ? -1 : 0;
}

static void
increment_bitset_word_count(const struct fsm_alloc *alloc, struct bitset_words *bws, uint64_t w)
{
/* This table tends to stay fairly small, so linear search is probably good enough. */
for (size_t i = 0; i < bws->used; i++) {
if (bws->pairs[i].word == w) {
bws->pairs[i].count++;
return;
}
}

if (bws->used == bws->ceil) {
const size_t nceil = (bws->ceil == 0 ? 8 : 2*bws->ceil);
struct bitset_word_pair *npairs = f_realloc(alloc,
bws->pairs, nceil * sizeof(npairs[0]));
assert(npairs != NULL);
bws->ceil = nceil;
bws->pairs = npairs;
}

struct bitset_word_pair *p = &bws->pairs[bws->used];
p->word = w;
p->count = 1;
bws->used++;
}

static bool
save_state_edge_group_destinations(struct cdata_config *config, struct state_info *si,
size_t group_count, const struct ir_group *groups)
Expand Down Expand Up @@ -966,6 +1064,12 @@ save_state_edge_group_destinations(struct cdata_config *config, struct state_inf
}
}

struct bitset_words *bws = &config->bitset_words;
for (size_t i = 0; i < 4; i++) {
increment_bitset_word_count(config->alloc, bws, si->labels[i]);
increment_bitset_word_count(config->alloc, bws, si->label_group_starts[i]);
}

/* Precompute label_group_starts[] rank sums so lookup only needs to
* compute rank for the label's word, not every word preceding it. */
si->rank_sums[0] = 0;
Expand Down Expand Up @@ -1058,6 +1162,14 @@ populate_config_from_ir(struct cdata_config *config, const struct fsm_alloc *all
si->eager_output = STATE_OFFSET_NONE;
}

/* add a single entry for 0, in case the IR only has a single IR_NONE or IR_SAME state */
config->bitset_words.ceil = 1;
config->bitset_words.used = 1;
config->bitset_words.pairs = calloc(1, sizeof(config->bitset_words.pairs[0]));
assert(config->bitset_words.pairs != NULL);
config->bitset_words.pairs[0].word = 0x0;
config->bitset_words.pairs[0].count = 1;

for (size_t s_i = 0; s_i < ir->n; s_i++) {
const struct ir_state *s = &ir->states[s_i];

Expand Down Expand Up @@ -1124,6 +1236,12 @@ populate_config_from_ir(struct cdata_config *config, const struct fsm_alloc *all
* a pointer to the array of IDs. */
config->t_endid_value = UNSIGNED; //size_needed(config->max_endid);

/* Sort by use frequency, descending, so the most frequently used
* bitset words will stay in cache. */
qsort(config->bitset_words.pairs, config->bitset_words.used,
sizeof(config->bitset_words.pairs[0]), cmp_bitset_word_pair);
config->t_label_word_id = size_needed(config->bitset_words.used);

config->t_eager_output_value = size_needed(config->max_eager_output_id);
return true;

Expand Down

0 comments on commit 5d44725

Please sign in to comment.