From 92e6e827e2d69eb28110a26628b15d353ec498a6 Mon Sep 17 00:00:00 2001 From: Scott Vokes Date: Mon, 16 Sep 2024 16:00:21 -0400 Subject: [PATCH] Add re_is_anchored interface. --- include/re/re.h | 16 ++++++++++++++++ src/libre/libre.syms | 1 + src/libre/re.c | 37 +++++++++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+) diff --git a/include/re/re.h b/include/re/re.h index 20408e98a..841e4e946 100644 --- a/include/re/re.h +++ b/include/re/re.h @@ -136,6 +136,22 @@ re_comp(enum re_dialect dialect, const struct fsm_alloc *alloc, enum re_flags flags, struct re_err *err); +/* Parse and analyze the regex enough to determine whether it is + * anchored at the start and/or end. + * + * As long as the result is checked for RE_IS_ANCHORED_ERROR first, + * the result can be used like a bitset. */ +enum re_is_anchored_res { + RE_IS_ANCHORED_NONE = 0x00, + RE_IS_ANCHORED_START = 0x01, + RE_IS_ANCHORED_END = 0x02, + RE_IS_ANCHORED_BOTH = 0x03, + RE_IS_ANCHORED_ERROR = 0xFFFF, +}; +enum re_is_anchored_res +re_is_anchored(enum re_dialect dialect, re_getchar_fun *f, void *opaque, + enum re_flags flags, struct re_err *err); + /* * Return a human-readable string describing a given error code. The string * returned has static storage, and must not be freed. diff --git a/src/libre/libre.syms b/src/libre/libre.syms index a4f1a223b..9d381cb0f 100644 --- a/src/libre/libre.syms +++ b/src/libre/libre.syms @@ -3,6 +3,7 @@ re_is_literal re_flags re_strerror re_perror +re_is_anchored ast_print ast_print_dot diff --git a/src/libre/re.c b/src/libre/re.c index 15af848b5..013e2b58c 100644 --- a/src/libre/re.c +++ b/src/libre/re.c @@ -335,3 +335,40 @@ re_is_literal(enum re_dialect dialect, int (*getc)(void *opaque), void *opaque, return -1; } +enum re_is_anchored_res +re_is_anchored(enum re_dialect dialect, re_getchar_fun *getc, void *opaque, + enum re_flags flags, struct re_err *err) +{ + /* FIXME: copy/pasted from above, factor out common code later. */ + + struct ast *ast; + const struct dialect *m; + int unsatisfiable; + + assert(getc != NULL); + + m = re_dialect(dialect); + if (m == NULL) { + if (err != NULL) { err->e = RE_EBADDIALECT; } + return RE_IS_ANCHORED_ERROR; + } + + flags |= m->flags; + + ast = re_parse(dialect, getc, opaque, flags, err, &unsatisfiable); + if (ast == NULL) { + return RE_IS_ANCHORED_ERROR; + } + + /* Copy anchoring flags, ending up with NONE, START, END, or BOTH. */ + enum re_is_anchored_res res = RE_IS_ANCHORED_NONE; + if (ast->expr->flags & AST_FLAG_ANCHORED_START) { + res |= RE_IS_ANCHORED_START; + } + if (ast->expr->flags & AST_FLAG_ANCHORED_END) { + res |= RE_IS_ANCHORED_END; + } + + ast_free(ast); + return res; +}