Skip to content

Commit

Permalink
Merge branch 'sv/determine-which-characters-must-appear-in-input-to-m…
Browse files Browse the repository at this point in the history
…atch--remove-struct-bm' into sv/tmp-integration-branch-to-vendor-for-da2lx-subside-go-or-no-go
  • Loading branch information
silentbicycle committed Sep 13, 2024
2 parents 521789f + abd09a5 commit 69f3bd7
Show file tree
Hide file tree
Showing 12 changed files with 582 additions and 0 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ SUBDIR += tests/intersect
SUBDIR += tests/eclosure
SUBDIR += tests/equals
SUBDIR += tests/subtract
SUBDIR += tests/detect_required
SUBDIR += tests/determinise
SUBDIR += tests/endids
SUBDIR += tests/epsilons
Expand Down
18 changes: 18 additions & 0 deletions include/adt/bitmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
#define ADT_BITMAP_H

#include <stdint.h>
#include <limits.h>

#include <stdio.h>
#include "print/esc.h"

struct fsm_state;
Expand All @@ -23,6 +26,9 @@ bm_get(const struct bm *bm, size_t i);
void
bm_set(struct bm *bm, size_t i);

void
bm_unset(struct bm *bm, size_t i);

/* Get a writeable pointer to the Nth word of the char set bitmap,
* or NULL if out of bounds. */
uint64_t *
Expand Down Expand Up @@ -51,5 +57,17 @@ bm_snprint(const struct bm *bm, const struct fsm_options *opt,
int boxed,
escputc *escputc);

void
bm_copy(struct bm *dst, const struct bm *src);

void
bm_intersect(struct bm *dst, const struct bm *src);

void
bm_union(struct bm *dst, const struct bm *src);

int
bm_any(const struct bm *bm);

#endif

27 changes: 27 additions & 0 deletions include/fsm/walk.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
#ifndef FSM_WALK_H
#define FSM_WALK_H

#include <stdint.h>

struct fsm;
struct fsm_state;

Expand Down Expand Up @@ -128,5 +130,30 @@ fsm_generate_matches_cb fsm_generate_cb_printf;
* to escape all characters or just nonprintable ones. */
fsm_generate_matches_cb fsm_generate_cb_printf_escaped;

/* Walk a DFA and detect which characters MUST appear in the input for a
* match to be possible. For example, if input for the DFA corresponding
* to /^(abc|dbe)$/ does not contain 'b' at all, there's no way it can
* ever match, so executing the regex is unnecessary. This does not detect
* which characters must appear before/after others or how many times, just
* which must be present.
*
* The input must be a DFA. When run with EXPENSIVE_CHECKS this will
* check and return ERROR_MISUSE if it is not, otherwise this is an
* unchecked error.
*
* There is an optional step_limit -- if this is reached, then it will
* return FSM_DETECT_REQUIRED_CHARACTERS_STEP_LIMIT_REACHED and a
* cleared bitmap, because any partial information could still have been
* contradicted later. If the step_limit is 0 it will be ignored. */
enum fsm_detect_required_characters_res {
FSM_DETECT_REQUIRED_CHARACTERS_WRITTEN,
FSM_DETECT_REQUIRED_CHARACTERS_STEP_LIMIT_REACHED,
FSM_DETECT_REQUIRED_CHARACTERS_ERROR_MISUSE = -1,
FSM_DETECT_REQUIRED_CHARACTERS_ERROR_ALLOC = -2,
};
enum fsm_detect_required_characters_res
fsm_detect_required_characters(const struct fsm *dfa, size_t step_limit,
uint64_t bitmap[4], size_t *char_count);

#endif

41 changes: 41 additions & 0 deletions src/adt/bitmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <limits.h>
#include <ctype.h>
#include <stdint.h>
#include <string.h>

#include <adt/bitmap.h>
#include <adt/u64bitset.h>
Expand All @@ -34,6 +35,15 @@ bm_set(struct bm *bm, size_t i)
u64bitset_set(bm->map, i);
}

void
bm_unset(struct bm *bm, size_t i)
{
assert(bm != NULL);
assert(i <= UCHAR_MAX);

u64bitset_clear(bm->map, i);
}

uint64_t *
bm_nth_word(struct bm *bm, size_t n)
{
Expand Down Expand Up @@ -325,3 +335,34 @@ bm_snprint(const struct bm *bm, const struct fsm_options *opt,

return -1;
}

void
bm_copy(struct bm *dst, const struct bm *src)
{
memcpy(dst, src, sizeof(*src));
}

void
bm_intersect(struct bm *dst, const struct bm *src)
{
for (size_t i = 0; i < sizeof(src->map)/sizeof(src->map[0]); i++) {
dst->map[i] &= src->map[i];
}
}

void
bm_union(struct bm *dst, const struct bm *src)
{
for (size_t i = 0; i < sizeof(src->map)/sizeof(src->map[0]); i++) {
dst->map[i] |= src->map[i];
}
}

int
bm_any(const struct bm *bm)
{
for (size_t i = 0; i < sizeof(bm->map)/sizeof(bm->map[0]); i++) {
if (bm->map[i]) { return 1; }
}
return 0;
}
1 change: 1 addition & 0 deletions src/libfsm/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ SRC += src/libfsm/complete.c
SRC += src/libfsm/consolidate.c
SRC += src/libfsm/clone.c
SRC += src/libfsm/closure.c
SRC += src/libfsm/detect_required.c
SRC += src/libfsm/edge.c
SRC += src/libfsm/empty.c
SRC += src/libfsm/end.c
Expand Down
Loading

0 comments on commit 69f3bd7

Please sign in to comment.