Skip to content

Commit

Permalink
Merge pull request katef#479 from katef/kate/endleaf-rejiggle
Browse files Browse the repository at this point in the history
Endleaf rejiggle, .fsm syntax for endids, refactor endid get/set api, various related bugfixes
  • Loading branch information
katef authored Jun 18, 2024
2 parents 2483417 + f663cd4 commit 0402f39
Show file tree
Hide file tree
Showing 108 changed files with 2,417 additions and 1,387 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ SUBDIR += tests/subtract
SUBDIR += tests/determinise
SUBDIR += tests/endids
SUBDIR += tests/epsilons
SUBDIR += tests/fsm
SUBDIR += tests/glob
SUBDIR += tests/like
SUBDIR += tests/literal
Expand Down
38 changes: 17 additions & 21 deletions include/fsm/fsm.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,6 @@ typedef unsigned int fsm_end_id_t;

#define FSM_END_ID_MAX UINT_MAX

/* struct used to return a collection of end IDs. */
struct fsm_end_ids {
unsigned count;
fsm_end_id_t ids[1];
};

/*
* Create a new FSM. This is to be freed with fsm_free(). A structure allocated
* from fsm_new() is expected to be passed as the "fsm" argument to the
Expand Down Expand Up @@ -222,29 +216,31 @@ fsm_setendid(struct fsm *fsm, fsm_end_id_t id);
* Returns 1 on success, 0 on error.
* */
int
fsm_setendidstate(struct fsm *fsm, fsm_state_t end_state, fsm_end_id_t id);
fsm_endid_set(struct fsm *fsm, fsm_state_t end_state, fsm_end_id_t id);

/* Get the end IDs associated with an end state, if any.
* If id_buf has enough cells to store all the end IDs (according
* to id_buf_count) then they are written into id_buf[] and
* *ids_written is set to the number of IDs. The end IDs in the
* buffer may appear in any order, but should not have duplicates.
* id_buf is expected to have enough cells (according to id_buf_count)
* to store all the end IDs. You can find this with fsm_endid_count().
*
* The end IDs in the buffer may appear in any order,
* but will not have duplicates.
*
* A state with no end IDs set is considered equivalent to a state
* that has the empty set, this API does not distinguish these cases.
* This is not an error.
*
* It is an error to attempt to get end IDs associated with a state
* that is not marked as an end state.
*
* Returns 0 if there is not enough space in id_buf for the
* end IDs, or 1 if zero or more end IDs were returned. */
enum fsm_getendids_res {
FSM_GETENDIDS_NOT_FOUND,
FSM_GETENDIDS_FOUND,
FSM_GETENDIDS_ERROR_INSUFFICIENT_SPACE = -1
};
enum fsm_getendids_res
fsm_getendids(const struct fsm *fsm, fsm_state_t end_state,
size_t id_buf_count, fsm_end_id_t *id_buf,
size_t *ids_written);
int
fsm_endid_get(const struct fsm *fsm, fsm_state_t end_state,
size_t id_buf_count, fsm_end_id_t *id_buf);

/* Get the number of end IDs associated with an end state. */
size_t
fsm_getendidcount(const struct fsm *fsm, fsm_state_t end_state);
fsm_endid_count(const struct fsm *fsm, fsm_state_t end_state);

/* Callback function to remap the end ids of a state. This function can
* remap to fewer end ids, but cannot add additional end ids, and cannot
Expand Down
7 changes: 5 additions & 2 deletions include/fsm/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,15 @@ struct fsm_options {
const char *cp;

/* TODO: explain. for C code fragment output */
int (*leaf)(FILE *, const struct fsm_end_ids *ids,
int (*leaf)(FILE *, const fsm_end_id_t *ids, size_t count,
const void *leaf_opaque);
void *leaf_opaque;

/* TODO: explain. for C code fragment output */
int (*endleaf)(FILE *, const struct fsm_end_ids *ids,
/* Placement in the output stream depends on the format.
* This replaces an entire "return xyz;" statement for C-like formats,
* but appends extra information for others. */
int (*endleaf)(FILE *, const fsm_end_id_t *ids, size_t count,
const void *endleaf_opaque);
void *endleaf_opaque;

Expand Down
25 changes: 17 additions & 8 deletions src/fsm/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -477,17 +477,14 @@ main(int argc, char *argv[])
struct fsm *q;

if ((op & OP_ARITY) == 1) {
if (argc > 1) {
usage();
exit(EXIT_FAILURE);
}
/* argc < 1 is okay */

q = fsm_parse((argc == 0) ? stdin : xopen(argv[0]), &opt);
if (q == NULL) {
exit(EXIT_FAILURE);
}
} else {
if (argc != 2) {
if (argc < 2) {
usage();
exit(EXIT_FAILURE);
}
Expand Down Expand Up @@ -610,6 +607,17 @@ main(int argc, char *argv[])
printf("=> total %g ms (avg %g ms)\n", elapsed, elapsed / iterations);
}

/* we're done consuming filenames, remaining argv is text to match */
if ((op & OP_ARITY) == 1) {
if (argc > 0) {
argc -= 1;
argv += 1;
}
} else {
argc -= 2;
argv += 2;
}

/* henceforth, r is $?-convention (0 for success) */

if (fsm == NULL) {
Expand Down Expand Up @@ -661,14 +669,15 @@ main(int argc, char *argv[])
}
}

/* TODO: optional -- to delimit texts as opposed to .fsm filenames */
if (op == OP_IDENTITY && argc > 0) {
/* match text */
if (argc > 0) {
int i;

/* TODO: option to print input texts which match. like grep(1) does.
* This is not the same as printing patterns which match (by associating
* a pattern to the end state), like lx(1) does */

/* TODO: optional -- to delimit texts as opposed to .fsm filenames */
for (i = 0; i < argc; i++) {
fsm_state_t state;
int e;
Expand All @@ -694,7 +703,7 @@ main(int argc, char *argv[])
continue;
}

/* TODO: option to print state number? */
/* TODO: option to print matching end-ids */
}
}

Expand Down
4 changes: 1 addition & 3 deletions src/libfsm/clone.c
Original file line number Diff line number Diff line change
Expand Up @@ -127,15 +127,13 @@ static int
copy_end_ids_cb(fsm_state_t state, const fsm_end_id_t id, void *opaque)
{
struct copy_end_ids_env *env = opaque;
enum fsm_endid_set_res sres;
assert(env->tag == 'c');

#if LOG_CLONE_ENDIDS
fprintf(stderr, "clone[%d] <- %d\n", state, id);
#endif

sres = fsm_endid_set(env->dst, state, id);
if (sres == FSM_ENDID_SET_ERROR_ALLOC_FAIL) {
if (!fsm_endid_set(env->dst, state, id)) {
env->ok = 0;
return 0;
}
Expand Down
9 changes: 2 additions & 7 deletions src/libfsm/consolidate.c
Original file line number Diff line number Diff line change
Expand Up @@ -231,19 +231,14 @@ static int
consolidate_end_ids_cb(fsm_state_t state, const fsm_end_id_t *ids, size_t num_ids, void *opaque)
{
struct consolidate_end_ids_env *env = opaque;
enum fsm_endid_set_res sres;
fsm_state_t s;
assert(env->tag == 'C');

assert(state < env->mapping_count);
s = env->mapping[state];

sres = fsm_endid_set_bulk(env->dst, s, num_ids, ids, FSM_ENDID_BULK_APPEND);
if (sres == FSM_ENDID_SET_ERROR_ALLOC_FAIL) {
return 0;
}

return 1;
return fsm_endid_set_bulk(env->dst, s,
num_ids, ids, FSM_ENDID_BULK_APPEND);
}

static int
Expand Down
Loading

0 comments on commit 0402f39

Please sign in to comment.