diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 429ec2523..3315fc025 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -374,7 +374,7 @@ jobs:
         cc: [ clang ]
         make: [ bmake ]
         debug: [ DEBUG, RELEASE ] # RELEASE=1 is a no-op
-        mode: [ m, p, d ]
+        mode: [ r, s, m, i, M, p ]
         exclude:
           - os: macos
             cc: gcc # it's clang anyway
@@ -409,8 +409,15 @@ jobs:
         path: ${{ env.build }}
         key: build-${{ matrix.make }}-${{ matrix.os }}-${{ matrix.cc }}-${{ matrix.debug }}-${{ matrix.san }}-${{ github.sha }}
 
-    # note we do the fuzzing unconditionally; each run adds to the corpus
+    # note we do the fuzzing unconditionally; each run adds to the corpus.
+    #
+    # We only run fuzzing for PRs in the base repo, this prevents attempting
+    # to purge the seed cache from a PR syncing a forked repo, which fails
+    # due to a permissions error (I'm unsure why, I think PRs from clones can't
+    # purge a cache in CI presumably for security/DoS reasons). PRs from clones
+    # still run fuzzing, just from empty, and do not save their seeds.
     - name: Restore seeds (mode ${{ matrix.mode }})
+      if: github.repository == 'katef/libfsm'
       uses: actions/cache/restore@v3
       id: cache-seeds
       with:
diff --git a/Makefile b/Makefile
index 8d742883e..f1f4f1396 100644
--- a/Makefile
+++ b/Makefile
@@ -108,6 +108,7 @@ SUBDIR += src
 SUBDIR += tests/capture
 SUBDIR += tests/complement
 SUBDIR += tests/gen
+SUBDIR += tests/idmap
 SUBDIR += tests/intersect
 #SUBDIR += tests/ir # XXX: fragile due to state numbering
 SUBDIR += tests/eclosure
diff --git a/fuzz/Makefile b/fuzz/Makefile
index 1d418cd97..2327a811b 100644
--- a/fuzz/Makefile
+++ b/fuzz/Makefile
@@ -7,6 +7,15 @@ ${BUILD}/fuzz/: ${BUILD}
 
 DIR += ${BUILD}/fuzz
 
+# Uncomment to enable capture fuzzing using PCRE as a test oracle.
+#PCRE_CMP=1
+
+.if PCRE_CMP
+PKG += libpcre2-8
+LFLAGS.fuzzer += ${LIBS.libpcre2-8}
+CFLAGS.${SRC:Mfuzz/target.c} += -DCMP_PCRE=1
+.endif
+
 .for src in ${SRC:Mfuzz/*.c}
 CFLAGS.${src} += -std=c99
 .endfor
@@ -15,7 +24,7 @@ CFLAGS.${src} += -std=c99
 fuzz:: ${BUILD}/fuzz/fuzzer
 
 ${BUILD}/fuzz/fuzzer: mkdir
-	${CC} -o $@ ${LFLAGS} ${.ALLSRC:M*.o} ${.ALLSRC:M*.a}
+	${CC} -o $@ ${LFLAGS} ${LFLAGS.fuzzer} ${.ALLSRC:M*.o} ${.ALLSRC:M*.a}
 
 .for lib in ${LIB:Mlibfsm} ${LIB:Mlibre}
 ${BUILD}/fuzz/fuzzer: ${BUILD}/lib/${lib:R}.a
diff --git a/fuzz/run_fuzzer b/fuzz/run_fuzzer
index be8ba1d95..429ffa961 100755
--- a/fuzz/run_fuzzer
+++ b/fuzz/run_fuzzer
@@ -4,6 +4,8 @@ BUILD=../build
 FUZZER=${BUILD}/fuzz/fuzzer
 SEEDS=${BUILD}/fuzz/fuzzer_seeds
 
+ARG=$1
+
 SECONDS=${SECONDS:-60}
 WORKERS=${WORKERS:-4}
 SEEDS=${SEEDS:-seeds}
@@ -25,5 +27,9 @@ if [ ! -d "${SEEDS}" ]; then
    mkdir -p "${SEEDS}"
 fi
 
-echo "\n==== ${FUZZER}"
-${FUZZER} -jobs=${WORKERS} -workers=${WORKERS} -max_total_time=${SECONDS} ${SEEDS}
+if [ -z "${ARG}" ]; then
+    echo "\n==== ${FUZZER}"
+    exec ${FUZZER} -jobs=${WORKERS} -workers=${WORKERS} -max_total_time=${SECONDS} ${SEEDS}
+else
+    exec ${FUZZER} ${ARG}
+fi
\ No newline at end of file
diff --git a/fuzz/target.c b/fuzz/target.c
index 87086b929..736c2d889 100644
--- a/fuzz/target.c
+++ b/fuzz/target.c
@@ -12,6 +12,7 @@
 #include <errno.h>
 
 #include <fsm/fsm.h>
+#include <fsm/alloc.h>
 #include <fsm/bool.h>
 #include <fsm/cost.h>
 #include <fsm/print.h>
@@ -23,17 +24,47 @@
 
 #include "../src/libfsm/minimise_test_oracle.h"
 
+/* for fsm_capture_dump */
+/* FIXME: should this be a public interface? */
+#include "../src/libfsm/capture.h"
+
+/* Buffer for sanitized fuzzer input */
+#define MAX_FUZZER_DATA (64 * 1024)
+static uint8_t data_buf[MAX_FUZZER_DATA + 1];
+
+/* Should fuzzer harness code be built that compares behavior
+ * with PCRE? (Obviously, this depends on PCRE.) */
+#ifndef CMP_PCRE
+#define CMP_PCRE 0
+#endif
+
+#if CMP_PCRE
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
+
+static int
+compare_with_pcre(const char *pattern, struct fsm *fsm);
+#endif
+
 /* 10 seconds */
 #define TIMEOUT_USEC (10ULL * 1000 * 1000)
 
+/* for TRACK_TIMES and EXPENSIVE_CHECKS */
+#include "../src/libfsm/internal.h"
+
 enum run_mode {
-	MODE_DEFAULT,
+	MODE_REGEX,
+	MODE_REGEX_SINGLE_ONLY,
+	MODE_REGEX_MULTI_ONLY,
+	MODE_IDEMPOTENT_DET_MIN,
 	MODE_SHUFFLE_MINIMISE,
 	MODE_ALL_PRINT_FUNCTIONS,
 };
 
+static size_t
+get_env_config(size_t default_value, const char *env_var_name);
 
-/* This stuff will already exist elsewhere once other branches are merged. */
+/* TODO: These could be moved to a common file for test utils. */
 #if 1
 static void
 time_get(struct timeval *tv)
@@ -75,7 +106,110 @@ scanner_next(void *opaque)
 }
 #endif
 
-static const struct fsm_options opt;
+/* This is used to track allocation during each fuzzer
+ * run. Note that hwm is not reduced when memory is
+ * free'd or realloc'd, because the size info is not
+ * passed to those calls. */
+#define MB(X) ((size_t)X * 1000 * 1000)
+#define FH_ALLOCATOR_HWM_LIMIT (MB(50))
+struct fh_allocator_stats {
+	size_t hwm;		/* high water mark */
+};
+
+static void
+fh_memory_hwm_limit_hook(const char *caller_name)
+{
+	/* It doesn't really help to exit here because libfuzzer will
+	 * still treat it as a failure, but at least we can print a
+	 * message about hitting the allocator limit and exit so we
+	 * don't need to spend time investigating timeouts or ooms
+	 * that are due to obvious resource exhaustion. */
+	fprintf(stderr, "%s: hit FH_ALLOCATOR_HWM_LIMIT (%zu), exiting\n",
+	    caller_name, FH_ALLOCATOR_HWM_LIMIT);
+	exit(EXIT_SUCCESS);
+}
+
+static void
+fh_free(void *opaque, void *p)
+{
+	(void)opaque;
+	free(p);
+}
+
+static void *
+fh_calloc(void *opaque, size_t n, size_t sz)
+{
+	struct fh_allocator_stats *stats = opaque;
+	stats->hwm += sz;
+	if (stats->hwm > FH_ALLOCATOR_HWM_LIMIT) {
+		fh_memory_hwm_limit_hook(__func__);
+		return NULL;
+	}
+
+	(void)opaque;
+	return calloc(n, sz);
+}
+
+static void *
+fh_malloc(void *opaque, size_t sz)
+{
+	struct fh_allocator_stats *stats = opaque;
+	stats->hwm += sz;
+	if (stats->hwm > FH_ALLOCATOR_HWM_LIMIT) {
+		fh_memory_hwm_limit_hook(__func__);
+		return NULL;
+	}
+
+	return malloc(sz);
+}
+
+static void *
+fh_realloc(void *opaque, void *p, size_t sz)
+{
+	struct fh_allocator_stats *stats = opaque;
+	stats->hwm += sz;
+	if (stats->hwm > FH_ALLOCATOR_HWM_LIMIT) {
+		fh_memory_hwm_limit_hook(__func__);
+		return NULL;
+	}
+
+	return realloc(p, sz);
+}
+
+static struct fh_allocator_stats allocator_stats;
+
+/* fuzzer harness allocators */
+static struct fsm_alloc custom_allocators = {
+	.free = fh_free,
+	.calloc = fh_calloc,
+	.malloc = fh_malloc,
+	.realloc = fh_realloc,
+	.opaque = &allocator_stats,
+};
+
+static const struct fsm_options fsm_options = {
+	.group_edges = 1,	/* make output readable */
+	.alloc = &custom_allocators,
+};
+
+static void
+dump_pattern(const char *pattern)
+{
+	const size_t pattern_length = strlen(pattern);
+	fprintf(stderr, "-- Pattern: %zu bytes\n", pattern_length);
+	for (size_t i = 0; i < pattern_length; i++) {
+		fprintf(stderr, " %02x", (uint8_t)pattern[i]);
+		if ((i & 31) == 31) { fprintf(stderr, "\n"); }
+	}
+	if ((pattern_length & 31) != 31) {
+		fprintf(stderr, "\n");
+	}
+	for (size_t i = 0; i < pattern_length; i++) {
+		fprintf(stderr, "%c", isprint(pattern[i]) ? pattern[i] : '.');
+		if ((i & 63) == 63) { fprintf(stderr, "\n"); }
+	}
+	fprintf(stderr, "\n");
+}
 
 static struct fsm *
 build(const char *pattern)
@@ -95,7 +229,7 @@ build(const char *pattern)
 	};
 
 	time_get(&pre);
-	fsm = re_comp(RE_PCRE, scanner_next, &s, &opt, RE_MULTI, &err);
+	fsm = re_comp(RE_PCRE, scanner_next, &s, &fsm_options, RE_MULTI, &err);
 	time_get(&post);
 	delta_usec = time_diff_usec(&pre, &post);
 	total_usec += delta_usec;
@@ -122,8 +256,10 @@ build(const char *pattern)
 
 	if (total_usec > TIMEOUT_USEC) {
 #ifndef EXPENSIVE_CHECKS
+		dump_pattern(pattern);
 		assert(!"timeout");
 #else
+		(void)dump_pattern;
 		fprintf(stderr, "exiting zero due to timeout under EXPENSIVE_CHECKS\n");
 		exit(0);
 #endif
@@ -132,6 +268,17 @@ build(const char *pattern)
 	return fsm;
 }
 
+static size_t
+get_env_config(size_t default_value, const char *env_var_name)
+{
+	const char *s = getenv(env_var_name);
+	if (s == NULL) {
+		return default_value;
+	} else {
+		return strtoul(s, NULL, 10);
+	}
+}
+
 static int
 codegen(const struct fsm *fsm)
 {
@@ -143,14 +290,46 @@ codegen(const struct fsm *fsm)
 }
 
 static int
-build_and_codegen(const char *pattern)
+build_and_check_single(const char *pattern)
 {
+	const int verbosity = get_env_config(0, "VERBOSITY");
+	if (verbosity > 1) {
+		fprintf(stderr, "pattern: \"%s\"\n", pattern);
+	}
+
+	INIT_TIMERS();
+	TIME(&pre);
 	struct fsm *fsm = build(pattern);
 	if (fsm == NULL) {
 		return EXIT_SUCCESS;
 	}
+	TIME(&post);
+	DIFF_MSEC("build", pre, post, NULL);
+
+	if (getenv("DUMP")) {
+		fprintf(stderr,"==================================================\n");
+		fsm_print_fsm(stderr, fsm);
+		fprintf(stderr,"==================================================\n");
+		fsm_capture_dump(stderr, "CAPTURE", fsm);
+		fprintf(stderr,"==================================================\n");
+	}
+
+#if CMP_PCRE
+	TIME(&pre);
+	const int cmp_res = compare_with_pcre(pattern, fsm);
+	TIME(&post);
+	DIFF_MSEC("cmp", pre, post, NULL);
+	if (!cmp_res) {
+		fsm_free(fsm);
+		return EXIT_SUCCESS;
+	}
+#endif
 
-	if (!codegen(fsm)) {
+	TIME(&pre);
+	const int codegen_res = codegen(fsm);
+	TIME(&post);
+	DIFF_MSEC("codegen", pre, post, NULL);
+	if (!codegen_res) {
 		return EXIT_SUCCESS;
 	}
 
@@ -158,6 +337,1022 @@ build_and_codegen(const char *pattern)
 	return EXIT_SUCCESS;
 }
 
+#define DEF_MAX_DEPTH 20
+#define DEF_MAX_LENGTH 10
+#define DEF_MAX_STEPS 10000
+#define DEF_MAX_MATCH_COUNT 1000
+
+#if CMP_PCRE
+/* These two are only used with PCRE2 */
+#define ANCHORED_PCRE 0
+#define FUZZ_RE_MATCH_LIMIT 10000
+#define FUZZ_RE_RECURSION_LIMIT 200
+#define MAX_OVEC_SIZE 512
+
+static pcre2_match_context *pcre2_mc = NULL;
+
+struct cmp_pcre_env {
+	int verbosity;
+	const char *pattern;
+	const struct fsm *fsm;
+	pcre2_match_data *md;
+	pcre2_code *p;
+
+	struct fsm_capture *captures;
+	size_t captures_length;
+
+	size_t max_depth;
+	size_t max_steps;
+	size_t max_match_count;
+};
+
+struct test_pcre_match_info {
+	int res;
+	int pcre_error;
+	size_t ovector[MAX_OVEC_SIZE];
+};
+
+static pcre2_code *
+build_pcre2(const char *pattern, int verbosity)
+{
+	const uint32_t options = ANCHORED_PCRE ? PCRE2_ANCHORED : 0;
+	int errorcode;
+	PCRE2_SIZE erroffset = 0;
+	pcre2_compile_context *cctx = NULL;
+
+	/* Set match limits */
+	if (pcre2_mc == NULL) {
+		pcre2_mc = pcre2_match_context_create(NULL);
+		assert(pcre2_mc != NULL);
+
+		pcre2_set_match_limit(pcre2_mc, FUZZ_RE_MATCH_LIMIT);
+		pcre2_set_recursion_limit(pcre2_mc, FUZZ_RE_RECURSION_LIMIT);
+	}
+
+	pcre2_code *p = pcre2_compile((const unsigned char *)pattern,
+	    PCRE2_ZERO_TERMINATED,
+	    options, &errorcode, &erroffset, cctx);
+	if (verbosity > 0 && p == NULL && errorcode != 0) {
+#define ERRSIZE 4096
+		unsigned char errbuf[ERRSIZE] = {0};
+		if (!pcre2_get_error_message(errorcode,
+			errbuf, ERRSIZE)) {
+			fprintf(stderr, "pcre2_get_error_message: failed\n");
+		}
+		fprintf(stderr, "pcre2_compile: error: %s\n", errbuf);
+	}
+	return p;
+}
+
+enum do_pcre_match_res {
+	DO_PCRE_MATCH_HIT,
+	DO_PCRE_MATCH_MISS,
+	DO_PCRE_MATCH_SKIP, /* an exceptional case we don't care about */
+	DO_PCRE_MATCH_ERROR = -1,
+};
+enum do_pcre_match_res
+do_pcre_match(FILE *f, const pcre2_code *p, pcre2_match_data *md, int verbosity,
+	const char *input, struct test_pcre_match_info *match_info)
+{
+#define MAX_BUF (64*1024)
+	const size_t input_len = strlen(input);
+	enum do_pcre_match_res mres;
+
+	/* turn off the JIT because it can give inconsistent results while fuzzing */
+	const uint32_t options = (ANCHORED_PCRE ? PCRE2_ANCHORED : 0)
+	    | PCRE2_NO_JIT;
+
+	assert(pcre2_mc != NULL);
+
+	/* The value returned by pcre2_match() is one more than the
+	 * highest numbered pair that has been set. */
+	int res = pcre2_match(p, (const unsigned char *)input, input_len,
+	    0, options, md, pcre2_mc);
+
+	if (res == PCRE2_ERROR_NOMATCH || res == PCRE2_ERROR_PARTIAL) {
+		if (f != NULL && verbosity > 1) {
+			fprintf(f, " -- no match (%s)\n",
+				res == PCRE2_ERROR_NOMATCH ? "NOMATCH"
+			    : res == PCRE2_ERROR_PARTIAL ? "PARTIAL"
+			    : "<unknown>");
+		}
+		mres = DO_PCRE_MATCH_MISS;
+		goto cleanup;
+	} else if (res == PCRE2_ERROR_MATCHLIMIT || res == PCRE2_ERROR_DEPTHLIMIT) {
+		/* It's possible to exhaust PCRE's internal limits with pathologically
+		 * nested regexes like "(((((((((^.)?)*)?)?)?)*)+)+)*$" and
+		 * "((((((((akbzaabdcOaa)|((((b*))))?|.|.|.*|.|.))+)+)+$)*)?)" , but
+		 * as long as they don't cause it to block for excessively long or
+		 * exhaust resources that's fine. */
+		if (f != NULL) {
+			fprintf(f, " -- PCRE2_ERROR_MATCHLIMIT (returning SKIP)\n");
+		}
+		mres = DO_PCRE_MATCH_SKIP;
+	} else if (res <= 0) {
+		if (f != NULL) {
+#define ERR_MAX 4096
+			unsigned char err_buf[ERR_MAX];
+			if (pcre2_get_error_message(res, err_buf, ERR_MAX)) {
+				fprintf(f, " -- error %d: %s\n", res, err_buf);
+			} else {
+				fprintf(f, " -- error %d\n", res);
+			}
+#undef ERR_MAX
+		}
+		if (match_info != NULL) {
+			match_info->pcre_error = res;
+		}
+		mres = DO_PCRE_MATCH_ERROR;
+		goto cleanup;
+	} else {
+		const uint32_t ovc = pcre2_get_ovector_count(md);
+		PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md);
+		assert(res >= 0);
+		size_t ures = (size_t)res;
+		assert(ovc > ures);
+
+		assert(ovector[1] >= ovector[0]);
+		const size_t mlen = ovector[1] - ovector[0];
+		if (ANCHORED_PCRE && (ovector[0] != 0 || mlen != input_len)) {
+			mres = DO_PCRE_MATCH_MISS;
+			goto cleanup;
+		}
+		mres = DO_PCRE_MATCH_HIT;
+
+		if (f != NULL && verbosity > 1) {
+			for (size_t i = 0; i < ures; i++) {
+				char buf[MAX_BUF] = { 0 };
+				memcpy(buf, &input[ovector[2*i]],
+				    ovector[2*i + 1U] - ovector[2*i]);
+				fprintf(f, " -- %zu: \"%s\"\n", i, buf);
+			}
+		}
+
+		if (match_info != NULL && res < MAX_OVEC_SIZE) {
+			match_info->res = res;
+			assert(res >= 0);
+			const size_t ures = (size_t)res;
+
+			for (size_t i = 0; i < 2*ures; i++) {
+				match_info->ovector[i] = ovector[i];
+			}
+		}
+	}
+
+cleanup:
+	return mres;
+#undef MAX_BUF
+}
+
+static bool
+exec_and_compare_captures(struct cmp_pcre_env *env,
+	const char *input, size_t input_size,
+	const struct test_pcre_match_info *match_info)
+{
+	bool matching = true;
+	fsm_state_t end_state;
+	const uint8_t *u8_input = (const uint8_t *)input;
+	int res = fsm_exec_with_captures(env->fsm, u8_input, input_size,
+	    &end_state, env->captures, env->captures_length);
+
+	if (res < 0) {
+		if (env->verbosity > 1) {
+			fprintf(stderr, "got res of %d\n", res);
+		}
+
+		return false;
+	}
+
+	if (res > 0) {
+		assert(match_info->res >= 0);
+		const size_t ures = (size_t)match_info->res;
+
+		if (env->verbosity > 1) {
+			fprintf(stderr, "ures %zu\n", ures);
+		}
+
+		for (size_t i = 0; i < ures; i++) {
+			if (env->verbosity > 1) {
+				fprintf(stderr, "%zu/%zu: pcre [%ld, %ld] <-> libfsm [%ld, %ld]\n",
+				    i, ures,
+				    match_info->ovector[2*i], match_info->ovector[2*i + 1],
+				    env->captures[i].pos[0], env->captures[i].pos[1]);
+			}
+			if ((match_info->ovector[2*i] != env->captures[i].pos[0])
+			    || (match_info->ovector[2*i + 1] != env->captures[i].pos[1])) {
+				matching = false;
+			}
+		}
+
+		if (!matching) {
+			for (size_t i = 0; i < ures; i++) {
+				fprintf(stderr, "%zu/%zu: pcre [%ld, %ld] <-> libfsm [%ld, %ld]\n",
+				    i, ures,
+				    match_info->ovector[2*i], match_info->ovector[2*i + 1],
+				    env->captures[i].pos[0], env->captures[i].pos[1]);
+			}
+		}
+	}
+
+	return matching;
+}
+
+static void
+dump_pattern_and_input(const char *pattern, const char *input, size_t input_length)
+{
+	dump_pattern(pattern);
+
+	fprintf(stderr, "-- Input: %zu bytes\n", input_length);
+	for (size_t i = 0; i < input_length; i++) {
+		fprintf(stderr, " %02x", (uint8_t)input[i]);
+		if ((i & 31) == 31) { fprintf(stderr, "\n"); }
+	}
+	if ((input_length & 31) != 31) {
+		fprintf(stderr, "\n");
+	}
+	for (size_t i = 0; i < input_length; i++) {
+		fprintf(stderr, "%c", isprint(input[i]) ? input[i] : '.');
+		if ((i & 63) == 63) { fprintf(stderr, "\n"); }
+	}
+	fprintf(stderr, "\n");
+}
+
+static enum fsm_generate_matches_cb_res
+cmp_pcre_gen_cb(const struct fsm *fsm,
+    size_t depth, size_t match_count, size_t steps,
+    const char *input, size_t input_length,
+    fsm_state_t end_state, void *opaque)
+{
+	struct cmp_pcre_env *env = opaque;
+	assert(env != NULL);
+
+	(void)fsm;
+	(void)depth;
+	(void)end_state;
+
+	const size_t len = strlen(input);
+
+	if (env->verbosity > 4) {
+		fprintf(stderr, "%s: depth %zu/%zu, match_count %zu/%zu, steps %zu/%zu\n",
+		    __func__,
+		    depth, env->max_depth,
+		    match_count, env->max_match_count,
+		    steps, env->max_steps);
+	}
+
+	if (steps > env->max_steps) {
+		return FSM_GENERATE_MATCHES_CB_RES_HALT;
+	}
+
+	if (match_count > env->max_match_count) {
+		return FSM_GENERATE_MATCHES_CB_RES_HALT;
+	}
+
+	if (depth > env->max_depth) {
+		return FSM_GENERATE_MATCHES_CB_RES_PRUNE;
+	}
+
+	/* Completely avoid exploring inputs with embedded 0x00 bytes. */
+	if (input_length != len) {
+		return FSM_GENERATE_MATCHES_CB_RES_PRUNE;
+	}
+
+	if (len > 0 && input[len - 1] == '\n') {
+		/* These will need to be handled properly, but PCRE has
+		 * special cases for '\n' handling. */
+		/* fprintf(stderr, " -- skipping input ending with '\\n'.\n"); */
+		return FSM_GENERATE_MATCHES_CB_RES_PRUNE;
+	}
+
+	struct test_pcre_match_info match_info = { .pcre_error = 0 };
+	enum do_pcre_match_res mres = do_pcre_match(stderr,
+	    env->p, env->md, env->verbosity, input, &match_info);
+	switch (mres) {
+	case DO_PCRE_MATCH_SKIP:
+		break;
+	case DO_PCRE_MATCH_MISS:
+		dump_pattern_and_input(env->pattern, input, input_length);
+		assert(!"matches libfsm but not with PCRE");
+		return FSM_GENERATE_MATCHES_CB_RES_HALT;
+	case DO_PCRE_MATCH_ERROR:
+		fprintf(stderr, "FAIL: PCRE returned ERROR %d: pattern \"%s\"\n",
+		    match_info.pcre_error, env->pattern);
+		return FSM_GENERATE_MATCHES_CB_RES_HALT;
+	case DO_PCRE_MATCH_HIT:
+		break;	/* okay; continue below */
+	}
+
+	if (env->verbosity > 1) {
+		fprintf(stderr, "-- comparing captures for pattern \"%s\", input \"%s\" (len %zu)\n",
+		    env->pattern, input, len);
+	}
+
+	if (!exec_and_compare_captures(env, input, input_length, &match_info)) {
+		if (env->verbosity > 1 || 1) {
+			dump_pattern_and_input(env->pattern, input, input_length);
+			fsm_print_fsm(stderr, env->fsm);
+			fsm_capture_dump(stderr, "fsm", env->fsm);
+		}
+		assert(!"captures don't match");
+	}
+
+	return FSM_GENERATE_MATCHES_CB_RES_CONTINUE;
+}
+
+static int
+compare_fixed_input(struct fsm *fsm, const char *pattern, const char *input, pcre2_match_data *md, pcre2_code *p)
+{
+	fsm_state_t end_state;
+	const size_t capture_ceil = fsm_capture_ceiling(fsm);
+
+	struct fsm_capture *captures = malloc(capture_ceil * sizeof(captures[0]));
+	assert(captures != NULL);
+	for (size_t i = 0; i < capture_ceil; i++) {
+		/* clobber with meaningless but visually distinct values */
+		captures[i].pos[0] = 88888888;
+		captures[i].pos[1] = 99999999;
+	};
+
+	const uint8_t *u8_input = (const uint8_t *)input;
+	const size_t input_len = strlen(input);
+	const int libfsm_res = fsm_exec_with_captures(fsm, u8_input, input_len,
+	    &end_state, captures, capture_ceil);
+
+	const bool libfsm_matching = libfsm_res > 0;
+
+	int res = 1;
+
+	struct test_pcre_match_info match_info = { .pcre_error = 0 };
+	enum do_pcre_match_res mres = do_pcre_match(stderr,
+	    p, md, 0, input, &match_info);
+	switch (mres) {
+	case DO_PCRE_MATCH_SKIP:
+		return 1;
+	case DO_PCRE_MATCH_MISS:
+		if (!libfsm_matching) {
+			goto cleanup;
+		}
+		dump_pattern_and_input(pattern, input, 0);
+		assert(!"matches libfsm but not with PCRE");
+		return 0;
+	case DO_PCRE_MATCH_ERROR:
+		fprintf(stderr, "FAIL: PCRE returned ERROR %d: pattern \"%s\"\n",
+		    match_info.pcre_error, pattern);
+		return 0;
+	case DO_PCRE_MATCH_HIT:
+		if (!libfsm_matching) {
+			dump_pattern_and_input(pattern, input, input_len);
+			assert(!"matches PCRE but not libfsm");
+			res = 0;
+			goto cleanup;
+		}
+
+		const size_t ures = (size_t)match_info.res;
+		if (ures > capture_ceil) {
+			dump_pattern_and_input(pattern, input, 0);
+			fprintf(stderr, "error: capture_ceil: %zu exceeded by ures: %zd\n",
+			    capture_ceil, ures);
+			assert(!"both PCRE and libfsm match but with different capture counts");
+		}
+
+		bool matching = true;
+		for (size_t i = 0; i < ures; i++) {
+			if ((match_info.ovector[2*i] != captures[i].pos[0])
+			    || (match_info.ovector[2*i + 1] != captures[i].pos[1])) {
+				matching = false;
+			}
+		}
+		for (size_t i = 0; i < ures; i++) {
+			if (!matching) {
+				fprintf(stderr, "%zu/%zu: pcre [%ld, %ld] <-> libfsm [%ld, %ld]\n",
+				    i, ures,
+				    match_info.ovector[2*i], match_info.ovector[2*i + 1],
+				    captures[i].pos[0], captures[i].pos[1]);
+			}
+		}
+
+		if (!matching) {
+			dump_pattern_and_input(pattern, input, 0);
+			assert(!"both PCRE and libfsm match but with different captures");
+		}
+
+		goto cleanup; /* ok, both matched */
+	}
+
+	assert(!"unreachable");
+
+cleanup:
+	free(captures);
+	return res;
+
+}
+
+static int
+compare_with_pcre(const char *pattern, struct fsm *fsm)
+{
+	size_t verbosity = get_env_config(0, "VERBOSITY");
+	size_t max_length = get_env_config(DEF_MAX_LENGTH, "MAX_LENGTH");
+	size_t max_steps = get_env_config(DEF_MAX_STEPS, "MAX_STEPS");
+	size_t max_depth = get_env_config(DEF_MAX_DEPTH, "MAX_DEPTH");
+	size_t max_match_count = get_env_config(DEF_MAX_MATCH_COUNT, "MAX_MATCH_COUNT");
+	int res = 1;
+
+	pcre2_match_data *md;
+
+	pcre2_code *p = build_pcre2(pattern, 0);
+	if (p == NULL) {
+		return 1;
+	}
+
+	md = pcre2_match_data_create(MAX_OVEC_SIZE, NULL);
+	assert(md != NULL);
+
+	/* Check the empty string and "\n", because PCRE has an awkward
+	 * special case for "\n" that has complicated interactions
+	 * with start and end anchoring. */
+	if (!compare_fixed_input(fsm, pattern, "", md, p)
+	    || !compare_fixed_input(fsm, pattern, "\n", md, p)) {
+		pcre2_match_data_free(md);
+		pcre2_code_free(p);
+		return res;
+	}
+
+	struct fsm_capture captures[MAX_OVEC_SIZE/2] = { 0 };
+
+	const size_t pattern_length = strlen(pattern);
+	if (pattern_length >= max_length) {
+		max_length = pattern_length + 1;
+		static size_t max_max_length;
+		if (max_length > max_max_length) {
+			fprintf(stderr, "Note: increasing max_length to %zu\n",
+			    pattern_length + 1);
+			max_max_length = max_length;
+			if (max_depth < max_length) {
+				max_depth = max_length + 1;
+			}
+		}
+	}
+
+	struct cmp_pcre_env env = {
+		.verbosity = (int)verbosity,
+		.pattern = pattern,
+		.fsm = fsm,
+		.captures = captures,
+		.captures_length = MAX_OVEC_SIZE/2,
+		.md = md,
+		.p = p,
+		.max_steps = max_steps,
+		.max_depth = max_depth,
+		.max_match_count = max_match_count,
+	};
+
+	if (!fsm_generate_matches(fsm, max_length, cmp_pcre_gen_cb, &env)) {
+		res = 0;
+	}
+
+	pcre2_match_data_free(md);
+	pcre2_code_free(p);
+	return res;
+}
+#endif
+
+/* Note: combined_fsm and fsms[] are non-const because fsm_generate_matches
+ * calls fsm_trim on them. */
+static int
+compare_separate_and_combined(int verbosity, size_t max_length, size_t count,
+    struct fsm *combined_fsm, const struct fsm_combined_base_pair *bases,
+    struct fsm **fsms);
+
+static enum fsm_generate_matches_cb_res
+cmp_separate_and_combined_cb(const struct fsm *fsm,
+    size_t depth, size_t match_count, size_t steps,
+    const char *input, size_t input_length,
+    fsm_state_t end_state, void *opaque);
+
+static int
+build_and_check_multi(const char *input)
+{
+	int res = EXIT_FAILURE;
+	const int verbosity = get_env_config(0, "VERBOSITY");
+#define MAX_PATTERNS 8
+#define MAX_PATTERN_LEN 256
+	char patterns[MAX_PATTERNS][MAX_PATTERN_LEN] = { 0 };
+	size_t count = 0;
+	const size_t len = strlen(input);
+	size_t max_length = get_env_config(DEF_MAX_LENGTH, "MAX_LENGTH");
+	INIT_TIMERS();
+
+	/* if nonzero, apply a timeout to the combined FSM det/min below */
+	const size_t timeout = get_env_config(0, "TIMEOUT");
+
+	if (timeout > 0) {
+		if (TRACK_TIMES == 0) {
+			fprintf(stderr, "\n\n\n\n\nError: src/libfsm/internal.h:TRACK_TIMES needs to be nonzero for this use case, exiting.\n\n\n\n\n");
+			exit(EXIT_FAILURE);
+		} else {
+			static bool printed_timeout_msg;
+			if (!printed_timeout_msg) {
+				fprintf(stderr, "Using timeout of %zu msec for fsm_determinise/fsm_minimise on combined FSM.\n",
+				    timeout);
+				printed_timeout_msg = true;
+			}
+		}
+	}
+
+	size_t i, j;
+	for (i = 0, j = 0; i < len; i++) {
+		const char c = input[i];
+		if (c == '\n' || c == '\r') {
+			if (j > max_length) {
+				max_length = j;
+			}
+			count++;
+			if (count == MAX_PATTERNS) {
+				/* ignore: too many patterns */
+				return EXIT_SUCCESS;
+			}
+			j = 0;
+		} else {
+			patterns[count][j] = c;
+			j++;
+			if (j == MAX_PATTERN_LEN) {
+				/* ignore: pattern too long */
+				return EXIT_SUCCESS;
+			}
+		}
+	}
+	if (j > 0) { count++; }
+
+	if (count == 1) {
+		/* multi mode with only one pattern is pointless */
+		return EXIT_SUCCESS;
+	}
+
+	struct re_err err;
+	const enum re_flags flags = 0;
+
+	/* build each regex, combining them and keeping track of capture offsets */
+	struct fsm *fsms[count];
+	struct fsm *fsms_cp[count];
+	struct fsm_combined_base_pair bases[count];
+	struct fsm *combined_fsm = NULL;
+	for (size_t i = 0; i < count; i++) {
+		fsms[i] = NULL;
+		fsms_cp[i] = NULL;
+
+		bases[i].state = 0;
+		bases[i].capture = 0;
+	}
+
+	/* compile each individually */
+	/* FIXME: apply and check endids */
+	for (size_t i = 0; i < count; i++) {
+		if (verbosity > 1) {
+			fprintf(stderr, "%s: compiling \"%s\"\n",
+			    __func__, patterns[i]);
+		}
+
+		struct scanner s = {
+			.str    = (const uint8_t *)patterns[i],
+			.size   = strlen(patterns[i]),
+		};
+
+		struct fsm *fsm = re_comp(RE_PCRE, scanner_next, &s, &fsm_options, flags, &err);
+		if (fsm == NULL) {
+			res = EXIT_SUCCESS; /* invalid regex, so skip this batch */
+			goto cleanup;
+		}
+
+		/* set endid to associate each FSM with its pattern */
+		if (!fsm_setendid(fsm, (fsm_end_id_t)i)) {
+			goto cleanup;
+		}
+
+		char label_buf[100];
+		snprintf(label_buf, 100, "single_determisise_%zu", i);
+
+		TIME(&pre);
+		if (!fsm_determinise(fsm)) {
+			goto cleanup;
+		}
+		TIME(&post);
+		DIFF_MSEC(label_buf, pre, post, NULL);
+
+		snprintf(label_buf, 100, "single_minimise_%zu", i);
+		TIME(&pre);
+		if (!fsm_minimise(fsm)) {
+			goto cleanup;
+		}
+		TIME(&post);
+		DIFF_MSEC(label_buf, pre, post, NULL);
+
+		if (verbosity > 4) {
+			char tag_buf[16] = { 0 };
+			snprintf(tag_buf, sizeof(tag_buf), "fsm[%zu]", i);
+
+			fprintf(stderr, "==== fsm[%zu]\n", i);
+			fsm_print_fsm(stderr, fsm);
+			fsm_capture_dump(stderr, tag_buf, fsm);
+		}
+
+		fsms[i] = fsm;
+		fsms_cp[i] = fsm_clone(fsm); /* save a copy for comparison */
+	}
+
+	combined_fsm = fsm_union_array(count, fsms, bases);
+	assert(combined_fsm != NULL);
+	if (verbosity > 1) {
+		fprintf(stderr, "%s: combined_fsm: %d states after fsm_union_array\n",
+		    __func__, fsm_countstates(combined_fsm));
+	}
+	if (verbosity > 1) {
+		for (size_t i = 0; i < count; i++) {
+			fprintf(stderr, "%s: base[%zu]: state %d, capture %u\n",
+			    __func__, i, bases[i].state, bases[i].capture);
+		}
+	}
+
+	TIME(&pre);
+	if (!fsm_determinise(combined_fsm)) {
+		goto cleanup;
+	}
+	TIME(&post);
+	size_t timeout_accum = 0;
+	if (timeout != 0) {
+		if (verbosity > 1) {
+			DIFF_MSEC_ALWAYS("combined_determinise", pre, post, &timeout_accum);
+		} else {
+			DIFF_MSEC("combined_determinise", pre, post, &timeout_accum);
+		}
+		assert(timeout_accum < timeout);
+		timeout_accum = 0;
+	}
+
+	const unsigned states_after_determinise = fsm_countstates(combined_fsm);
+	if (verbosity > 1) {
+		fprintf(stderr, "%s: combined_fsm: %d states after determinise\n",
+		    __func__, states_after_determinise);
+	}
+
+	TIME(&pre);
+	if (!fsm_minimise(combined_fsm)) {
+		goto cleanup;
+	}
+	TIME(&post);
+	if (timeout != 0) {
+		if (verbosity > 1) {
+			DIFF_MSEC_ALWAYS("combined_minimise", pre, post, &timeout_accum);
+		} else {
+			DIFF_MSEC("combined_minimise", pre, post, &timeout_accum);
+		}
+		assert(timeout_accum < timeout);
+		timeout_accum = 0;
+	}
+
+	const unsigned states_after_minimise = fsm_countstates(combined_fsm);
+	if (verbosity > 1) {
+		fprintf(stderr, "%s: combined_fsm: %d states after minimise\n",
+		    __func__, states_after_minimise);
+	}
+
+	if (verbosity > 4) {
+		fprintf(stderr, "==== combined\n");
+		fsm_print_fsm(stderr, combined_fsm);
+		fsm_capture_dump(stderr, "combined", combined_fsm);
+	}
+
+	res = compare_separate_and_combined(verbosity, max_length,
+	    count, combined_fsm, bases, (struct fsm **)fsms_cp);
+
+	for (i = 0; i < count; i++) {
+		fsm_free(fsms_cp[i]);
+	}
+	fsm_free(combined_fsm);
+
+	if (res == EXIT_SUCCESS) {
+		static size_t pass_count;
+		if (verbosity == 1) {
+			fprintf(stderr, "%s: pass: %zu, %zu patterns\n",
+			    __func__, ++pass_count, count);
+		} else if (verbosity > 1) {
+			fprintf(stderr, "%s: pass: %zu, %zu patterns\n",
+			    __func__, ++pass_count, count);
+			for (i = 0; i < count; i++) {
+				fprintf(stderr, " -- %zu: \"%s\"\n",
+				    i, patterns[i]);
+			}
+		}
+	}
+
+	return res;
+
+cleanup:
+	for (i = 0; i < count; i++) {
+		if (fsms[i] != NULL) {
+			fsm_free(fsms[i]);
+		}
+		if (fsms_cp[i] != NULL) {
+			fsm_free(fsms_cp[i]);
+		}
+	}
+	if (combined_fsm != NULL) {
+		fsm_free(combined_fsm);
+	}
+	return res;
+}
+
+struct cmp_combined_env {
+	bool ok;
+	int verbosity;
+	size_t count;
+	struct fsm *combined_fsm;
+	const struct fsm_combined_base_pair *bases;
+	size_t current_i;
+	struct fsm **fsms;
+	size_t max_depth;
+	size_t max_steps;
+	size_t max_match_count;
+};
+
+static enum fsm_generate_matches_cb_res
+cmp_combined_with_separate_cb(const struct fsm *fsm,
+    size_t depth, size_t match_count, size_t steps,
+    const char *input, size_t input_length,
+    fsm_state_t end_state, void *opaque);
+
+static int
+compare_separate_and_combined(int verbosity, size_t max_length, size_t count,
+    struct fsm *combined_fsm, const struct fsm_combined_base_pair *bases,
+    struct fsm **fsms)
+{
+	const size_t max_steps = get_env_config(DEF_MAX_STEPS, "MAX_STEPS");
+	const size_t max_depth = get_env_config(DEF_MAX_DEPTH, "MAX_DEPTH");
+	const size_t max_match_count = get_env_config(DEF_MAX_MATCH_COUNT, "MAX_MATCH_COUNT");
+
+	struct cmp_combined_env env = {
+		.ok = true,
+		.verbosity = verbosity,
+		.count = count,
+		.combined_fsm = combined_fsm,
+		.bases = bases,
+		.fsms = fsms,
+		.max_steps = max_steps,
+		.max_depth = max_depth,
+		.max_match_count = max_match_count,
+	};
+
+	/* For each individual fsm, generate matching inputs and check that
+	 * they match with the same captures in the combined fsm. */
+	for (env.current_i = 0; env.current_i < count; env.current_i++) {
+		if (!fsm_generate_matches(env.fsms[env.current_i], max_length,
+			cmp_separate_and_combined_cb, &env)) {
+			env.ok = false;
+		}
+		if (!env.ok) {
+			break;
+		}
+	}
+	env.current_i = (size_t)-1;
+
+	/* Also go in the other direction, generating matches with
+	 * combined and check the individual ones match as expected. */
+	if (env.ok) {
+		if (!fsm_generate_matches(env.combined_fsm, max_length,
+			cmp_combined_with_separate_cb, &env)) {
+			env.ok = false;
+		}
+	}
+
+	return env.ok ? EXIT_SUCCESS : EXIT_FAILURE;
+}
+
+static enum fsm_generate_matches_cb_res
+cmp_separate_and_combined_cb(const struct fsm *fsm,
+    size_t depth, size_t match_count, size_t steps,
+    const char *input, size_t input_length,
+    fsm_state_t end_state, void *opaque)
+{
+	struct cmp_combined_env *env = opaque;
+	(void)end_state;
+
+	if (steps > env->max_steps) {
+		return FSM_GENERATE_MATCHES_CB_RES_HALT;
+	}
+
+	if (depth > env->max_depth) {
+		return FSM_GENERATE_MATCHES_CB_RES_PRUNE;
+	}
+
+	if (match_count > env->max_match_count) {
+		return FSM_GENERATE_MATCHES_CB_RES_HALT;
+	}
+
+#define MAX_CAPTURES 256
+	struct fsm_capture captures_single[MAX_CAPTURES];
+	struct fsm_capture captures_combined[MAX_CAPTURES];
+
+	const fsm_end_id_t expected_end_id = (fsm_end_id_t)env->current_i;
+
+	const uint8_t *u8_input = (const uint8_t *)input;
+	fsm_state_t end_state_combined, end_state_single;
+
+	const int res_combined = fsm_exec_with_captures(env->combined_fsm, u8_input, input_length,
+	    &end_state_combined, captures_combined, MAX_CAPTURES);
+	const int res_single = fsm_exec_with_captures(fsm, u8_input, input_length,
+	    &end_state_single, captures_single, MAX_CAPTURES);
+
+	if (res_combined != res_single) {
+		env->ok = false;
+		if (env->verbosity > 0) {
+			fprintf(stderr, "%s: res_combined %d != res_single %d\n",
+			    __func__, res_combined, res_single);
+		}
+		return FSM_GENERATE_MATCHES_CB_RES_HALT;
+	}
+
+	fsm_end_id_t id_buf_combined[MAX_PATTERNS];
+	size_t written_combined = 0;
+	if (res_combined > 0) {
+		const size_t exp_written = fsm_getendidcount(env->combined_fsm, end_state_combined);
+		assert(exp_written <= env->count);
+		const enum fsm_getendids_res gres = fsm_getendids(env->combined_fsm,
+		    end_state_combined, MAX_PATTERNS, id_buf_combined, &written_combined);
+		assert(gres == FSM_GETENDIDS_FOUND);
+		assert(written_combined == exp_written);
+	}
+
+	/* we got here, so we have a match */
+	assert(res_single > 0);
+
+	if (env->verbosity > 3) {
+		fprintf(stderr, "%s: res %d (single and combined)\n", __func__, res_single);
+	}
+
+	/* Check that the end state's endid for the single DFA is among the
+	 * endids for the combined DFA's end state. */
+	assert(fsm_getendidcount(fsm, end_state_single) == 1);
+	assert(fsm_getendidcount(env->combined_fsm, end_state_combined) <= env->count);
+
+	fsm_end_id_t id_buf_single[1];
+	size_t written;
+	const enum fsm_getendids_res gres = fsm_getendids(fsm,
+	    end_state_single, 1, id_buf_single, &written);
+	assert(gres == FSM_GETENDIDS_FOUND);
+	assert(written == 1);
+	assert(id_buf_single[0] == expected_end_id);
+
+	bool found_single_id_in_combined = false;
+	for (size_t i = 0; i < written_combined; i++) {
+		if (id_buf_combined[i] == expected_end_id) {
+			found_single_id_in_combined = true;
+			break;
+		}
+	}
+	assert(found_single_id_in_combined);
+
+	bool matching = true;
+	const unsigned base = env->bases[env->current_i].capture;
+	assert(base < MAX_CAPTURES);
+	for (int i = 0; i < res_single; i++) {
+		if (env->verbosity > 3) {
+			fprintf(stderr, "%d/%d: single [%ld, %ld] <-> combined [%ld, %ld]\n",
+			    i, res_single,
+			    captures_single[i].pos[0], captures_single[i].pos[1],
+			    captures_combined[i + base].pos[0], captures_combined[i + base].pos[1]);
+		}
+		if ((captures_single[i].pos[0] != captures_combined[i + base].pos[0]) ||
+		    (captures_single[i].pos[1] != captures_combined[i + base].pos[1])) {
+			matching = false;
+		}
+	}
+
+	if (!matching) {
+		for (int i = 0; i < res_single; i++) {
+			fprintf(stderr, "%d/%d: single [%ld, %ld] <-> combined [%ld, %ld]\n",
+			    i, res_single,
+			    captures_single[i].pos[0], captures_single[i].pos[1],
+			    captures_combined[i + base].pos[0], captures_combined[i + base].pos[1]);
+		}
+		env->ok = false;
+		return FSM_GENERATE_MATCHES_CB_RES_HALT;
+	}
+
+	return FSM_GENERATE_MATCHES_CB_RES_CONTINUE;
+}
+
+static enum fsm_generate_matches_cb_res
+cmp_combined_with_separate_cb(const struct fsm *fsm,
+    size_t depth, size_t match_count, size_t steps,
+    const char *input, size_t input_length,
+    fsm_state_t end_state, void *opaque)
+{
+	/* We have an input that matched the combined DFA,
+	 * use the set of end IDs to check which of the
+	 * single DFAs it should/should not match, and check
+	 * the endid behavior. */
+
+	struct cmp_combined_env *env = opaque;
+
+	if (steps > env->max_steps) {
+		return FSM_GENERATE_MATCHES_CB_RES_HALT;
+	}
+
+	if (depth > env->max_depth) {
+		return FSM_GENERATE_MATCHES_CB_RES_PRUNE;
+	}
+
+	if (match_count > env->max_match_count) {
+		return FSM_GENERATE_MATCHES_CB_RES_HALT;
+	}
+
+#define MAX_CAPTURES 256
+	struct fsm_capture captures_single[MAX_CAPTURES];
+	struct fsm_capture captures_combined[MAX_CAPTURES];
+
+	const uint8_t *u8_input = (const uint8_t *)input;
+
+	fsm_state_t end_state_combined;
+	assert(fsm == env->combined_fsm);
+	const int res_combined = fsm_exec_with_captures(env->combined_fsm, u8_input, input_length,
+	    &end_state_combined, captures_combined, MAX_CAPTURES);
+	assert(res_combined > 0); /* we got here, so we have a match */
+	assert(end_state_combined == end_state);
+
+	fsm_end_id_t id_buf_combined[MAX_PATTERNS];
+	size_t written_combined = 0;
+	{
+		const size_t exp_written = fsm_getendidcount(env->combined_fsm, end_state_combined);
+		assert(exp_written <= env->count);
+		const enum fsm_getendids_res gres = fsm_getendids(env->combined_fsm,
+		    end_state_combined, MAX_PATTERNS, id_buf_combined, &written_combined);
+		assert(gres == FSM_GETENDIDS_FOUND);
+		assert(written_combined == exp_written);
+	}
+
+	/* For each pattern, check if its endid is in the combined DFA's end state
+	 * endids. If so, it should match, otherwise it should not. */
+	for (size_t pattern_i = 0; pattern_i < env->count; pattern_i++) {
+		const struct fsm *single_fsm = env->fsms[pattern_i];
+		bool found = false;
+		for (size_t endid_i = 0; endid_i < written_combined; endid_i++) {
+			const fsm_end_id_t endid = id_buf_combined[endid_i];
+			if (endid == pattern_i) {
+				found = true;
+				break;
+			}
+		}
+		fsm_state_t end_state_single;
+
+		const int res_single = fsm_exec_with_captures(single_fsm,
+		    u8_input, input_length,
+		    &end_state_single, captures_single, MAX_CAPTURES);
+
+		if (found) {
+			assert(res_single > 0);
+			fsm_end_id_t id_buf_single[1];
+			size_t written;
+			const enum fsm_getendids_res gres = fsm_getendids(single_fsm,
+			    end_state_single, 1, id_buf_single, &written);
+			assert(gres == FSM_GETENDIDS_FOUND);
+			assert(written == 1);
+			assert(id_buf_single[0] == pattern_i);
+
+			/* check captures */
+			bool matching = true;
+			const unsigned base = env->bases[pattern_i].capture;
+			assert(base < MAX_CAPTURES);
+			for (int i = 0; i < res_single; i++) {
+				if (env->verbosity > 3) {
+					fprintf(stderr, "%d/%d: single [%ld, %ld] <-> combined [%ld, %ld]\n",
+					    i, res_single,
+					    captures_single[i].pos[0], captures_single[i].pos[1],
+					    captures_combined[i + base].pos[0], captures_combined[i + base].pos[1]);
+				}
+				if ((captures_single[i].pos[0] != captures_combined[i + base].pos[0]) ||
+				    (captures_single[i].pos[1] != captures_combined[i + base].pos[1])) {
+					matching = false;
+				}
+			}
+
+			if (!matching) {
+				for (int i = 0; i < res_single; i++) {
+					fprintf(stderr, "%d/%d: single [%ld, %ld] <-> combined [%ld, %ld]\n",
+					    i, res_single,
+					    captures_single[i].pos[0], captures_single[i].pos[1],
+					    captures_combined[i + base].pos[0], captures_combined[i + base].pos[1]);
+				}
+				env->ok = false;
+				return FSM_GENERATE_MATCHES_CB_RES_HALT;
+			}
+		} else {
+			assert(res_single == 0); /* no match */
+		}
+	}
+
+	return FSM_GENERATE_MATCHES_CB_RES_CONTINUE;
+}
+
+
 #define DEF_MAX_SHUFFLE 10
 #define DEF_MAX_MINIMISE_ORACLE_STATE_COUNT 1000
 
@@ -176,7 +1371,7 @@ shuffle_minimise(const char *pattern)
 		.offset = 0
 	};
 
-	fsm = re_comp(RE_PCRE, scanner_next, &s, &opt, RE_MULTI, &err);
+	fsm = re_comp(RE_PCRE, scanner_next, &s, &fsm_options, RE_MULTI, &err);
 
 	if (fsm == NULL) {
 		/* ignore invalid regexp syntax, etc. */
@@ -335,23 +1530,108 @@ fuzz_all_print_functions(FILE *f, const char *pattern, bool det, bool min, const
 	return EXIT_SUCCESS;
 }
 
-#define MAX_FUZZER_DATA (64 * 1024)
-static uint8_t data_buf[MAX_FUZZER_DATA + 1];
+static int
+build_and_test_idempotent_det_and_min(const char *pattern)
+{
+	const int verbosity = get_env_config(0, "VERBOSITY");
+	assert(pattern != NULL);
+
+	struct re_err err;
+	struct fsm *fsm;
+	const size_t length = strlen(pattern);
+
+	struct scanner s = {
+		.str    = (const uint8_t *)pattern,
+		.size   = length,
+	};
+
+	fsm = re_comp(RE_PCRE, scanner_next, &s, &fsm_options, RE_MULTI, &err);
+	if (fsm == NULL) {
+		return EXIT_SUCCESS;
+	}
+
+	if (!fsm_determinise(fsm)) {
+		return EXIT_FAILURE;
+	}
+	if (verbosity >= 3) {
+		fprintf(stderr, "=== post_det_a\n");
+		fsm_print_fsm(stderr, fsm);
+	}
+	const size_t post_det_a = fsm_countstates(fsm);
+
+	if (!fsm_determinise(fsm)) {
+		return EXIT_FAILURE;
+	}
+	if (verbosity >= 3) {
+		fprintf(stderr, "=== post_det_b\n");
+		fsm_print_fsm(stderr, fsm);
+	}
+	const size_t post_det_b = fsm_countstates(fsm);
+	assert(post_det_b == post_det_a);
+
+	if (!fsm_minimise(fsm)) {
+		return EXIT_FAILURE;
+	}
+	if (verbosity >= 3) {
+		fprintf(stderr, "=== post_min_a\n");
+		fsm_print_fsm(stderr, fsm);
+		fsm_capture_dump(stderr, "post_a", fsm);
+	}
+	const size_t post_min_a = fsm_countstates(fsm);
+
+	if (!fsm_minimise(fsm)) {
+		return EXIT_FAILURE;
+	}
+	if (verbosity >= 3) {
+		fprintf(stderr, "=== post_min_b\n");
+		fsm_print_fsm(stderr, fsm);
+		fsm_capture_dump(stderr, "post_b", fsm);
+	}
+	const size_t post_min_b = fsm_countstates(fsm);
+	assert(post_min_b == post_min_a);
+
+	if (!fsm_determinise(fsm)) {
+		return EXIT_FAILURE;
+	}
+	const size_t post_det_c = fsm_countstates(fsm);
+	assert(post_det_c == post_min_b);
+
+	if (!fsm_minimise(fsm)) {
+		return EXIT_FAILURE;
+	}
+	const size_t post_min_c = fsm_countstates(fsm);
+	assert(post_min_c == post_det_c);
+
+	fsm_free(fsm);
+	return EXIT_SUCCESS;
+}
 
 static enum run_mode
 get_run_mode(void)
 {
 	const char *mode = getenv("MODE");
 	if (mode == NULL) {
-		return MODE_DEFAULT;
+		return MODE_REGEX; /* default */
 	}
 
 	switch (mode[0]) {
-	case 'm': return MODE_SHUFFLE_MINIMISE;
+	case '\0': return MODE_REGEX; /* default */
+	case 'r': return MODE_REGEX;
+	case 's': return MODE_REGEX_SINGLE_ONLY;
+	case 'm': return MODE_REGEX_MULTI_ONLY;
+	case 'i': return MODE_IDEMPOTENT_DET_MIN;
+	case 'M': return MODE_SHUFFLE_MINIMISE;
 	case 'p': return MODE_ALL_PRINT_FUNCTIONS;
-	case 'd':
 	default:
-		return MODE_DEFAULT;
+		fprintf(stderr, "Unrecognized mode '%c', expect one of:\n", mode[0]);
+		fprintf(stderr, " - r.egex (default)\n");
+		fprintf(stderr, " - s.ingle regex only\n");
+		fprintf(stderr, " - m.ulti regex only\n");
+		fprintf(stderr, " - M.inimisation shuffling\n");
+		fprintf(stderr, " - i.dempotent determinise/minimise\n");
+		fprintf(stderr, " - p.rint functions\n");
+		exit(EXIT_FAILURE);
+		break;
 	}
 }
 
@@ -369,12 +1649,87 @@ harness_fuzzer_target(const uint8_t *data, size_t size)
 		size = MAX_FUZZER_DATA;
 	}
 	memcpy(data_buf, data, size);
+	/* ensure the buffer is 0-terminated */
+	data_buf[size] = 0;
+
+	/* truncate to a valid c string */
+	size = strlen((const char *)data_buf);
+	data_buf[size] = 0;
+
+	/* reset for each run */
+	allocator_stats.hwm = 0;
+
+	size_t dot_count = 0;
+	bool has_newline = false;
+	size_t first_newline;
+
+	for (size_t i = 0; i < size; i++) {
+		const uint8_t c = data_buf[i];
+		if (c == '.') {
+			dot_count++;
+			if (dot_count >= 4) {
+				/* Too many '.'s can lead to a regex that is
+				 * very slow to determinise/minimise, but that
+				 * failure mode is not interesting to this
+				 * particular fuzzer. */
+				return EXIT_SUCCESS;
+			}
+		}
+
+		if (c == '(') {
+			/* This triggers an "unreached" assertion in the parser.
+			 * It's already been reported (issue #386), but once the
+			 * fuzzer finds it, it will report it over and over.
+			 * Exit here so that the fuzzer considers it uninteresting. */
+			if (size - i >= 3 && 0 == memcmp("(*:", &data_buf[i], 3)) {
+				return EXIT_SUCCESS;
+			}
+		}
+
+		if (c == '\\') {
+			/* Not supported yet. */
+			return EXIT_SUCCESS;
+		}
+
+		if (c == '\r' || c == '\n') {
+			if (!has_newline) {
+				first_newline = i;
+			}
+			has_newline = true;
+		}
+	}
 
 	const char *pattern = (const char *)data_buf;
 
 	switch (get_run_mode()) {
-	case MODE_DEFAULT:
-		return build_and_codegen(pattern);
+	case MODE_REGEX:
+		if (has_newline) {
+			return build_and_check_multi(pattern);
+		} else {
+			return build_and_check_single(pattern);
+		}
+
+	case MODE_REGEX_SINGLE_ONLY:
+		if (has_newline) {
+			return EXIT_SUCCESS; /* ignore */
+		} else {
+			return build_and_check_single(pattern);
+		}
+
+	case MODE_REGEX_MULTI_ONLY:
+		if (has_newline) {
+			return build_and_check_multi(pattern);
+		} else {
+			return EXIT_SUCCESS; /* ignore */
+		}
+
+	case MODE_IDEMPOTENT_DET_MIN:
+		if (has_newline) {
+			assert(data_buf[first_newline] == '\n'
+			    || data_buf[first_newline] == '\r');
+			data_buf[first_newline] = '\0';
+		}
+		return build_and_test_idempotent_det_and_min(pattern);
 
 	case MODE_SHUFFLE_MINIMISE:
 		return shuffle_minimise(pattern);
@@ -390,11 +1745,14 @@ harness_fuzzer_target(const uint8_t *data, size_t size)
 		const bool det = b0 & 0x1;
 		const bool min = b0 & 0x2;
 		const enum fsm_io io_mode = (b0 >> 2) % 3;
-		
+
 		const char *shifted_pattern = (const char *)&data_buf[1];
 		int res = fuzz_all_print_functions(dev_null, shifted_pattern, det, min, io_mode);
 		return res;
 	}
+
+default:
+		assert(!"match fail");
 	}
 
 	assert(!"unreached");
diff --git a/include/adt/hashrec.h b/include/adt/hashrec.h
index 54816286e..545a20960 100644
--- a/include/adt/hashrec.h
+++ b/include/adt/hashrec.h
@@ -7,7 +7,7 @@
 #ifndef ADT_HASHREC_H
 #define ADT_HASHREC_H
 
-unsigned long
+uint64_t
 hashrec(const void *p, size_t n);
 
 #endif
diff --git a/include/adt/idmap.h b/include/adt/idmap.h
new file mode 100644
index 000000000..504fd382b
--- /dev/null
+++ b/include/adt/idmap.h
@@ -0,0 +1,59 @@
+#ifndef IDMAP_H
+#define IDMAP_H
+
+/* Mapping between one fsm_state_t and a set of
+ * unsigned IDs. The implementation assumes that both
+ * IDs are sequentially assigned and don't need a sparse
+ * mapping -- it will handle 10 -> [1, 3, 47] well, but
+ * not 1000000 -> [14, 524288, 1073741823]. */
+
+#include <stdlib.h>
+
+#include "fsm/fsm.h"
+#include "fsm/alloc.h"
+
+struct idmap;			/* Opaque handle. */
+
+struct idmap *
+idmap_new(const struct fsm_alloc *alloc);
+
+void
+idmap_free(struct idmap *m);
+
+/* Associate a value with a state (if not already present.)
+ * Returns 1 on success, or 0 on allocation failure. */
+int
+idmap_set(struct idmap *m, fsm_state_t state_id, unsigned value);
+
+/* How many values are associated with an ID? */
+size_t
+idmap_get_value_count(const struct idmap *m, fsm_state_t state_id);
+
+/* Get the values associated with an ID.
+ *
+ * Returns 1 on success and writes them into the buffer, in ascending
+ * order, with the count in *written (if non-NULL).
+ *
+ * Returns 0 on error (insufficient buffer space). */
+int
+idmap_get(const struct idmap *m, fsm_state_t state_id,
+	size_t buf_size, unsigned *buf, size_t *written);
+
+/* Iterator callback.
+ * Return status indicates whether to continue. */
+typedef int
+idmap_iter_fun(fsm_state_t state_id, unsigned value, void *opaque);
+
+/* Iterate over the ID map. State IDs may be yielded out of order,
+ * values will be in ascending order. */
+void
+idmap_iter(const struct idmap *m,
+	idmap_iter_fun *cb, void *opaque);
+
+/* Iterate over the values associated with a single state
+ * (in ascending order). */
+void
+idmap_iter_for_state(const struct idmap *m, fsm_state_t state_id,
+	idmap_iter_fun *cb, void *opaque);
+
+#endif
diff --git a/include/adt/mappingset.h b/include/adt/mappingset.h
deleted file mode 100644
index d4f91105e..000000000
--- a/include/adt/mappingset.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright 2019 Shannon F. Stewman
- *
- * See LICENCE for the full copyright terms.
- */
-
-#ifndef ADT_MAPPINGSET_H
-#define ADT_MAPPINGSET_H
-
-struct fsm_alloc;
-struct mapping_set;
-struct mapping;
-
-struct mapping_iter {
-	struct hashset_iter iter;
-};
-
-struct mapping_set *
-mapping_set_create(const struct fsm_alloc *a,
-	unsigned long (*hash)(const struct mapping *a),
-	int (*cmp)(const void *a, const void *b));
-
-void
-mapping_set_free(struct mapping_set *set);
-
-struct mapping *
-mapping_set_add(struct mapping_set *set, struct mapping *item);
-
-struct mapping *
-mapping_set_contains(const struct mapping_set *set, const struct mapping *item);
-
-void
-mapping_set_clear(struct mapping_set *set);
-
-struct mapping *
-mapping_set_first(const struct mapping_set *set, struct mapping_iter *it);
-
-struct mapping *
-mapping_set_next(struct mapping_iter *it);
-
-#endif
-
diff --git a/include/adt/stateset.h b/include/adt/stateset.h
index 83e835467..becd263ad 100644
--- a/include/adt/stateset.h
+++ b/include/adt/stateset.h
@@ -7,6 +7,8 @@
 #ifndef ADT_STATESET_H
 #define ADT_STATESET_H
 
+#include <stdint.h>
+
 struct set;
 struct fsm_alloc;
 struct state_set;
@@ -72,7 +74,7 @@ state_set_rebase(struct state_set **set, fsm_state_t base);
 void
 state_set_replace(struct state_set **set, fsm_state_t old, fsm_state_t new);
 
-unsigned long
+uint64_t
 state_set_hash(const struct state_set *set);
 
 #endif
diff --git a/include/fsm/capture.h b/include/fsm/capture.h
index d3c1aaa54..0fd418925 100644
--- a/include/fsm/capture.h
+++ b/include/fsm/capture.h
@@ -23,41 +23,32 @@ struct fsm_capture {
 	size_t pos[2];
 };
 
-/* How many captures does the FSM use? */
+/* What is the max capture ID an FSM uses? */
 unsigned
-fsm_countcaptures(const struct fsm *fsm);
+fsm_capture_ceiling(const struct fsm *fsm);
 
 /* Does a specific state have any capture actions? */
 int
 fsm_capture_has_capture_actions(const struct fsm *fsm, fsm_state_t state);
 
-/* Set a capture path on an FSM. This means that during matching, the
- * portion of a match between the path's START and END states will be
- * captured. As the FSM is transformed (determinisation, minimisation,
- * unioning, etc.), the path will be converted to refer to the pair(s)
- * of new states instead. If the path's END state is no longer reachable
- * from its START state, then the capture path will be ignored.
- * Multiple instances of the same capture_id and path are ignored. */
-int
-fsm_capture_set_path(struct fsm *fsm, unsigned capture_id,
-    fsm_state_t start, fsm_state_t end);
-
-/* Increase the base capture ID for all captures in an fsm.
- * This could be used before combining multiple FSMs -- for
- * example, before unioning a and b, where a has 3 captures
- * and b has 2, b may be rebase'd to 3 -- so a has captures
- * 0-2 and b has 3-4. */
-void
-fsm_capture_rebase_capture_id(struct fsm *fsm, unsigned base);
+/* Allocate a capture buffer with enough space for
+ * the current FSM's captures.
+ *
+ * This is provided for convenience -- the necessary array
+ * count can be checked with fsm_capture_ceiling, and then
+ * the buffer can be allocated directly. */
+struct fsm_capture *
+fsm_capture_alloc_capture_buffer(const struct fsm *fsm);
 
-/* Same, but for capture action states. */
+/* Free a capture buffer. */
 void
-fsm_capture_rebase_capture_action_states(struct fsm *fsm, fsm_state_t base);
+fsm_capture_free_capture_buffer(const struct fsm *fsm, struct fsm_capture *capture_buffer);
 
-/* Allocate a capture buffer with enough space for
- * the current FSM's captures. */
-struct fsm_capture *
-fsm_capture_alloc(const struct fsm *fsm);
+/* Note that a capture is active for a particular end state.
+ * Using this for a non-end state is an unchecked error. */
+int
+fsm_capture_set_active_for_end(struct fsm *fsm,
+    unsigned capture_id, fsm_state_t end_state);
 
 #ifndef NDEBUG
 #include <stdio.h>
diff --git a/include/fsm/fsm.h b/include/fsm/fsm.h
index 7c3883749..b269a05db 100644
--- a/include/fsm/fsm.h
+++ b/include/fsm/fsm.h
@@ -192,6 +192,10 @@ fsm_findmode(const struct fsm *fsm, fsm_state_t state, unsigned int *freq);
 void
 fsm_setend(struct fsm *fsm, fsm_state_t state, int end);
 
+/* Associate a numeric ID with a single end state in an FSM. */
+int
+fsm_setendid_state(struct fsm *fsm, fsm_state_t s, fsm_end_id_t id);
+
 /* Associate a numeric ID with the end states in an fsm.
  * This can be used to track which of the original fsms matched
  * input when multiple fsms are combined.
@@ -462,8 +466,21 @@ fsm_shortest(const struct fsm *fsm,
  * The given FSM is expected to be a DFA.
  */
 int
-fsm_exec(const struct fsm *fsm, int (*fsm_getc)(void *opaque), void *opaque,
-	fsm_state_t *end, struct fsm_capture *captures);
+fsm_exec(const struct fsm *fsm,
+	int (*fsm_getc)(void *opaque), void *opaque, fsm_state_t *end);
+
+/* Same as fsm_exec, but also populate information about captures if
+ * *captures is non-NULL and capture metadata is available for the DFA.
+ * Captures is expected to be large enough to fit captures from the FSM.
+ * To check, use `fsm_capture_ceiling`.
+ *
+ * The current implementation requires all input to be buffered ahead of
+ * time, so this takes a pointer to an input array rather than a
+ * character iterator. */
+int
+fsm_exec_with_captures(const struct fsm *fsm, const unsigned char *input,
+	size_t input_length, fsm_state_t *end,
+	struct fsm_capture *captures, size_t capture_buf_length);
 
 /*
  * Callbacks which may be passed to fsm_exec(). These are conveniences for
diff --git a/include/re/re.h b/include/re/re.h
index deab6caed..ab5f09b39 100644
--- a/include/re/re.h
+++ b/include/re/re.h
@@ -20,16 +20,18 @@ enum re_dialect {
 };
 
 enum re_flags {
-	RE_ICASE   = 1 << 0,
-	RE_TEXT    = 1 << 1,
-	RE_MULTI   = 1 << 2,
-	RE_REVERSE = 1 << 3,
-	RE_SINGLE  = 1 << 4, /* aka PCRE_DOTALL */
-	RE_ZONE    = 1 << 5,
-	RE_ANCHORED = 1 << 6,
-	RE_EXTENDED = 1 << 7,  /* PCRE extended mode */
-	RE_END_NL  = 1 << 8, /* end anchor matches '\n' */
-	RE_FLAGS_NONE = 0
+	RE_ICASE          = 1 << 0,
+	RE_TEXT           = 1 << 1,
+	RE_MULTI          = 1 << 2,
+	RE_REVERSE        = 1 << 3,
+	RE_SINGLE         = 1 << 4,  /* aka PCRE_DOTALL */
+	RE_ZONE           = 1 << 5,
+	RE_ANCHORED       = 1 << 6,
+	RE_EXTENDED       = 1 << 7,  /* PCRE extended mode */
+	RE_NOCAPTURE      = 1 << 8,  /* disable captures */
+	RE_END_NL         = 1 << 9,  /* end anchor matches '\n' */
+	RE_END_NL_DISABLE = 1 << 10, /* disable end anchor matching '\n' */
+	RE_FLAGS_NONE     = 0
 };
 
 #define RE_ANCHOR (RE_TEXT | RE_MULTI | RE_ZONE)
@@ -46,6 +48,8 @@ enum re_errno {
 	RE_EERRNO       =  1 | RE_MISC,
 	RE_EBADDIALECT  =  2 | RE_MISC,
 	RE_EBADGROUP    =  3 | RE_MISC,
+	RE_EUNSUPCAPTUR =  4 | RE_MISC,
+	RE_EUNSUPPPCRE  =  5 | RE_MISC,
 
 	RE_ENEGRANGE    =  0 | RE_MARK | RE_GROUP,
 	RE_ENEGCOUNT    =  1 | RE_MARK | RE_GROUP,
diff --git a/src/adt/Makefile b/src/adt/Makefile
index 05199f2dc..6fae4e7ca 100644
--- a/src/adt/Makefile
+++ b/src/adt/Makefile
@@ -2,6 +2,7 @@
 
 SRC += src/adt/alloc.c
 SRC += src/adt/bitmap.c
+SRC += src/adt/idmap.c
 SRC += src/adt/internedstateset.c
 SRC += src/adt/priq.c
 SRC += src/adt/path.c
@@ -19,12 +20,10 @@ CFLAGS.${src} += -I src # XXX: for internal.h
 DFLAGS.${src} += -I src # XXX: for internal.h
 .endfor
 
-# not all concrete set interfaces use all static functions from set.inc
-.if ${CC:T:Mgcc*} || ${CC:T:Mclang*}
-.for src in ${SRC:Msrc/adt/stateset.c} ${SRC:Msrc/adt/tupleset.c} ${SRC:Msrc/adt/edgeset.c}
-CFLAGS.${src} += -Wno-unused-function  
+.for src in ${SRC:Msrc/adt/siphash.c} ${SRC:Msrc/adt/edgeset.c} ${SRC:Msrc/adt/idmap.c} ${SRC:Msrc/adt/ipriq.c} ${SRC:Msrc/adt/internedstateset.c}
+CFLAGS.${src} += -std=c99 # XXX: for internal.h
+DFLAGS.${src} += -std=c99 # XXX: for internal.h
 .endfor
-.endif
 
 # I want to assert on things which are currently true for this platform,
 # but not true in general.
diff --git a/src/adt/edgeset.c b/src/adt/edgeset.c
index c718727ca..9658213c8 100644
--- a/src/adt/edgeset.c
+++ b/src/adt/edgeset.c
@@ -11,6 +11,7 @@
 #include <inttypes.h>
 
 #define LOG_BITSET 0
+#define LOG_BSEARCH 0
 
 #include "libfsm/internal.h" /* XXX: for allocating struct fsm_edge, and the edges array */
 
@@ -184,6 +185,100 @@ edge_set_advise_growth(struct edge_set **pset, const struct fsm_alloc *alloc,
 	return 1;
 }
 
+enum fsp_res {
+	FSP_FOUND_INSERT_POSITION,
+	FSP_FOUND_VALUE_PRESENT,
+};
+
+/* Use binary search to find the first position N where set->groups[N].to >= state,
+ * which includes the position immediately following the last entry. Return an enum
+ * which indicates whether state is already present. */
+static enum fsp_res
+find_state_position(const struct edge_set *set, fsm_state_t state, size_t *dst)
+{
+	size_t lo = 0, hi = set->count;
+	if (LOG_BSEARCH) {
+		fprintf(stderr, "%s: looking for %d in %p (count %zu)\n",
+		    __func__, state, (void *)set, set->count);
+	}
+
+#if EXPENSIVE_CHECKS
+	/* invariant: input is unique and sorted */
+	for (size_t i = 1; i < set->count; i++) {
+		assert(set->groups[i - 1].to < set->groups[i].to);
+	}
+#endif
+
+	if (set->count == 0) {
+		if (LOG_BSEARCH) {
+			fprintf(stderr, "%s: empty, returning 0\n", __func__);
+		}
+		*dst = 0;
+		return FSP_FOUND_INSERT_POSITION;
+	} else {
+		if (LOG_BSEARCH) {
+			fprintf(stderr, "%s: fast path: looking for %d, set->groups[last].to %d\n",
+			    __func__, state, set->groups[hi - 1].to);
+		}
+
+		/* Check the last entry so we can append in constant time. */
+		const fsm_state_t last = set->groups[hi - 1].to;
+		if (state > last) {
+			*dst = hi;
+			return FSP_FOUND_INSERT_POSITION;
+		} else if (state == last) {
+			*dst = hi - 1;
+			return FSP_FOUND_VALUE_PRESENT;
+		}
+	}
+
+	size_t mid;
+	while (lo < hi) {		/* lo <= mid < hi */
+		mid = lo + (hi - lo)/2; /* avoid overflow */
+		const struct edge_group *eg = &set->groups[mid];
+		const fsm_state_t cur = eg->to;
+		if (LOG_BSEARCH) {
+			fprintf(stderr, "%s: lo %zu, hi %zu, mid %zu, cur %d, looking for %d\n",
+			    __func__, lo, hi, mid, cur, state);
+		}
+
+		if (state == cur) {
+			*dst = mid;
+			return FSP_FOUND_VALUE_PRESENT;
+		} else if (state > cur) {
+			lo = mid + 1;
+			if (LOG_BSEARCH) {
+				fprintf(stderr, "%s: new lo %zd\n", __func__, lo);
+			}
+
+			/* Update mid if we're about to halt, because we're looking
+			 * for the first position >= state, not the last position <=. */
+			if (lo == hi) {
+				mid = lo;
+				if (LOG_BSEARCH) {
+					fprintf(stderr, "%s: special case, updating mid to %zd\n", __func__, mid);
+				}
+			}
+		} else if (state < cur) {
+			hi = mid;
+			if (LOG_BSEARCH) {
+				fprintf(stderr, "%s: new hi %zd\n", __func__, hi);
+			}
+		}
+	}
+
+	if (LOG_BSEARCH) {
+		fprintf(stderr, "%s: halting at %zd (looking for %d, cur %d)\n",
+		    __func__, mid, state, set->groups[mid].to);
+	}
+
+	/* dst is now the first position > state (== case is handled above),
+	 * which may be one past the end of the array. */
+	assert(mid == set->count || set->groups[mid].to > state);
+	*dst = mid;
+	return FSP_FOUND_INSERT_POSITION;
+}
+
 int
 edge_set_add_bulk(struct edge_set **pset, const struct fsm_alloc *alloc,
 	uint64_t symbols[256/64], fsm_state_t state)
@@ -223,30 +318,24 @@ edge_set_add_bulk(struct edge_set **pset, const struct fsm_alloc *alloc,
 	assert(set->count <= set->ceil);
 
 #if LOG_BITSET
-		fprintf(stderr, " -- edge_set_add: symbols [0x%lx, 0x%lx, 0x%lx, 0x%lx] -> state %d on %p\n",
-		    symbols[0], symbols[1], symbols[2], symbols[3],
-		    state, (void *)set);
+	fprintf(stderr, " -- edge_set_add: symbols [0x%lx, 0x%lx, 0x%lx, 0x%lx] -> state %d on %p\n",
+	    symbols[0], symbols[1], symbols[2], symbols[3],
+	    state, (void *)set);
 #endif
 
-	/* Linear search for a group with the same destination
-	 * state, or the position where that group would go. */
-	for (i = 0; i < set->count; i++) {
+	switch (find_state_position(set, state, &i)) {
+	case FSP_FOUND_VALUE_PRESENT:
+		assert(i < set->count);
 		eg = &set->groups[i];
-
-		if (eg->to == state) {
-			/* This API does not indicate whether that
-			 * symbol -> to edge was already present. */
-			size_t i;
-			for (i = 0; i < 256/64; i++) {
-				eg->symbols[i] |= symbols[i];
-			}
-			dump_edge_set(set);
-			return 1;
-		} else if (eg->to > state) {
-			break;	/* will shift down and insert below */
-		} else {
-			continue;
+		for (i = 0; i < 256/64; i++) {
+			eg->symbols[i] |= symbols[i];
 		}
+		dump_edge_set(set);
+		return 1;
+
+		break;
+	case FSP_FOUND_INSERT_POSITION:
+		break;		/* continue below */
 	}
 
 	/* insert/append at i */
diff --git a/src/adt/hashrec.c b/src/adt/hashrec.c
index 6a341710b..7348cbc38 100644
--- a/src/adt/hashrec.c
+++ b/src/adt/hashrec.c
@@ -23,7 +23,7 @@ static const unsigned char hashk[] = {
 	0x14, 0xa8, 0xff, 0x36, 0x15, 0x16, 0x2c, 0xf7, 0xf4, 0xce, 0xb8, 0x66, 0x74, 0xf4, 0x3d, 0x64,
 };
 
-unsigned long
+uint64_t
 hashrec(const void *p, size_t n)
 {
 	uint64_t h = 0;
diff --git a/src/adt/idmap.c b/src/adt/idmap.c
new file mode 100644
index 000000000..d1a265861
--- /dev/null
+++ b/src/adt/idmap.c
@@ -0,0 +1,396 @@
+/*
+ * Copyright 2021 Scott Vokes
+ *
+ * See LICENCE for the full copyright terms.
+ */
+
+#include "adt/idmap.h"
+
+#include "adt/alloc.h"
+#include "adt/hash.h"
+#include "adt/u64bitset.h"
+
+#include <stdint.h>
+#include <assert.h>
+#include <stdio.h>
+
+#define NO_STATE ((fsm_state_t)-1)
+
+#define DEF_BUCKET_COUNT 4
+
+struct idmap {
+	const struct fsm_alloc *alloc;
+	unsigned bucket_count;
+	unsigned buckets_used;
+
+	/* All buckets' values are assumed to be large
+	 * enough to store this value, and they will all
+	 * grow as necessary. */
+	unsigned max_value;
+
+	/* Basic linear-probing, add-only hash table. */
+	struct idmap_bucket {
+		fsm_state_t state; /* Key. NO_STATE when empty. */
+
+		/* values[] is always either NULL or has at least
+		 * max_value + 1 bits; all grow on demand. */
+		uint64_t *values;
+	} *buckets;
+};
+
+static unsigned
+value_words(unsigned max_value) {
+	if (max_value == 0) {
+		/* Still allocate one word, for storing 0. */
+		return 1;
+	} else {
+		return u64bitset_words(max_value);
+	}
+}
+
+struct idmap *
+idmap_new(const struct fsm_alloc *alloc)
+{
+	struct idmap *res = NULL;
+	struct idmap_bucket *buckets = NULL;
+
+	res = f_malloc(alloc, sizeof(*res));
+	if (res == NULL) {
+		goto cleanup;
+	}
+
+	buckets = f_calloc(alloc,
+	    DEF_BUCKET_COUNT, sizeof(buckets[0]));
+	if (buckets == NULL) {
+		goto cleanup;
+	}
+
+	for (size_t i = 0; i < DEF_BUCKET_COUNT; i++) {
+		buckets[i].state = NO_STATE;
+	}
+
+	res->alloc = alloc;
+	res->buckets_used = 0;
+        res->bucket_count = DEF_BUCKET_COUNT;
+	res->max_value = 0;
+	res->buckets = buckets;
+
+	return res;
+
+cleanup:
+	f_free(alloc, res);
+	f_free(alloc, buckets);
+	return NULL;
+}
+
+void
+idmap_free(struct idmap *m)
+{
+	if (m == NULL) {
+		return;
+	}
+
+	for (size_t i = 0; i < m->bucket_count; i++) {
+		if (m->buckets[i].state == NO_STATE) {
+			continue;
+		}
+		f_free(m->alloc, m->buckets[i].values);
+	}
+
+	f_free(m->alloc, m->buckets);
+	f_free(m->alloc, m);
+}
+
+static int
+grow_bucket_values(struct idmap *m, unsigned old_words, unsigned new_words)
+{
+	assert(new_words > old_words);
+
+	for (size_t b_i = 0; b_i < m->bucket_count; b_i++) {
+		struct idmap_bucket *b = &m->buckets[b_i];
+		if (b->state == NO_STATE) {
+			assert(b->values == NULL);
+			continue;
+		}
+
+		uint64_t *nv = f_calloc(m->alloc,
+		    new_words, sizeof(nv[0]));
+		if (nv == NULL) {
+			return 0;
+		}
+
+		for (size_t w_i = 0; w_i < old_words; w_i++) {
+			nv[w_i] = b->values[w_i];
+		}
+		f_free(m->alloc, b->values);
+		b->values = nv;
+	}
+	return 1;
+}
+
+static int
+grow_buckets(struct idmap *m)
+{
+	const size_t ocount = m->bucket_count;
+	const size_t ncount = 2*ocount;
+	assert(ncount > m->bucket_count);
+
+	struct idmap_bucket *nbuckets = f_calloc(m->alloc,
+	    ncount, sizeof(nbuckets[0]));
+	if (nbuckets == NULL) {
+		return 0;
+	}
+	for (size_t nb_i = 0; nb_i < ncount; nb_i++) {
+		nbuckets[nb_i].state = NO_STATE;
+	}
+
+	const size_t nmask = ncount - 1;
+
+	for (size_t ob_i = 0; ob_i < ocount; ob_i++) {
+		const struct idmap_bucket *ob = &m->buckets[ob_i];
+		if (ob->state == NO_STATE) {
+			continue;
+		}
+
+		const uint64_t h = hash_id(ob->state);
+		for (size_t nb_i = 0; nb_i < ncount; nb_i++) {
+			struct idmap_bucket *nb = &nbuckets[(h + nb_i) & nmask];
+			if (nb->state == NO_STATE) {
+				nb->state = ob->state;
+				nb->values = ob->values;
+				break;
+			} else {
+				assert(nb->state != ob->state);
+				/* collision */
+				continue;
+			}
+		}
+	}
+
+	f_free(m->alloc, m->buckets);
+
+	m->buckets = nbuckets;
+	m->bucket_count = ncount;
+
+	return 1;
+}
+
+int
+idmap_set(struct idmap *m, fsm_state_t state_id,
+    unsigned value)
+{
+	assert(state_id != NO_STATE);
+
+	const uint64_t h = hash_id(state_id);
+	if (value > m->max_value) {
+		const unsigned ovw = value_words(m->max_value);
+		const unsigned nvw = value_words(value);
+		/* If this value won't fit in the existing value
+		 * arrays, then grow them all. We do not track the
+		 * number of bits in each individual array. */
+		if (nvw > ovw && !grow_bucket_values(m, ovw, nvw)) {
+			return 0;
+		}
+		m->max_value = value;
+	}
+
+	assert(m->max_value >= value);
+
+	if (m->buckets_used >= m->bucket_count/2) {
+		if (!grow_buckets(m)) {
+			return 0;
+		}
+	}
+
+	const uint64_t mask = m->bucket_count - 1;
+	for (size_t b_i = 0; b_i < m->bucket_count; b_i++) {
+		struct idmap_bucket *b = &m->buckets[(h + b_i) & mask];
+		if (b->state == state_id) {
+			assert(b->values != NULL);
+			u64bitset_set(b->values, value);
+			return 1;
+		} else if (b->state == NO_STATE) {
+			b->state = state_id;
+			assert(b->values == NULL);
+
+			const unsigned vw = value_words(m->max_value);
+			b->values = f_calloc(m->alloc,
+			    vw, sizeof(b->values[0]));
+			if (b->values == NULL) {
+				return 0;
+			}
+			m->buckets_used++;
+
+			u64bitset_set(b->values, value);
+			return 1;
+		} else {
+			continue; /* collision */
+		}
+
+	}
+
+	assert(!"unreachable");
+	return 0;
+}
+
+static const struct idmap_bucket *
+get_bucket(const struct idmap *m, fsm_state_t state_id)
+{
+	const uint64_t h = hash_id(state_id);
+	const uint64_t mask = m->bucket_count - 1;
+	for (size_t b_i = 0; b_i < m->bucket_count; b_i++) {
+		const struct idmap_bucket *b = &m->buckets[(h + b_i) & mask];
+		if (b->state == NO_STATE) {
+			return NULL;
+		} else if (b->state == state_id) {
+			return b;
+		}
+	}
+
+	return NULL;
+}
+
+size_t
+idmap_get_value_count(const struct idmap *m, fsm_state_t state_id)
+{
+	const struct idmap_bucket *b = get_bucket(m, state_id);
+	if (b == NULL) {
+		return 0;
+	}
+	assert(b->values != NULL);
+
+	size_t res = 0;
+	const size_t words = value_words(m->max_value);
+	for (size_t w_i = 0; w_i < words; w_i++) {
+		const uint64_t w = b->values[w_i];
+		/* This could use popcount64(w). */
+		if (w == 0) {
+			continue;
+		}
+		for (uint64_t bit = 1; bit; bit <<= 1) {
+			if (w & bit) {
+				res++;
+			}
+		}
+	}
+
+	return res;
+}
+
+int
+idmap_get(const struct idmap *m, fsm_state_t state_id,
+    size_t buf_size, unsigned *buf, size_t *written)
+{
+	const struct idmap_bucket *b = get_bucket(m, state_id);
+	if (b == NULL) {
+		if (written != NULL) {
+			*written = 0;
+		}
+		return 1;
+	}
+
+	size_t buf_offset = 0;
+	const size_t words = value_words(m->max_value);
+	for (size_t w_i = 0; w_i < words; w_i++) {
+		const uint64_t w = b->values[w_i];
+		if (w == 0) {
+			continue;
+		}
+
+		for (uint64_t b_i = 0; b_i < 64; b_i++) {
+			if (w & ((uint64_t)1 << b_i)) {
+				if (buf_offset * sizeof(buf[0]) >= buf_size) {
+					return 0;
+				}
+				buf[buf_offset] = 64*w_i + b_i;
+				buf_offset++;
+			}
+		}
+	}
+
+	if (written != NULL) {
+		*written = buf_offset;
+	}
+	return 1;
+}
+
+void
+idmap_iter(const struct idmap *m,
+    idmap_iter_fun *cb, void *opaque)
+{
+	const size_t words = value_words(m->max_value);
+
+	for (size_t b_i = 0; b_i < m->bucket_count; b_i++) {
+		const struct idmap_bucket *b = &m->buckets[b_i];
+		if (b->state == NO_STATE) {
+			continue;
+		}
+
+		for (size_t w_i = 0; w_i < words; w_i++) {
+			const uint64_t w = b->values[w_i];
+			if (w == 0) {
+				continue;
+			}
+			for (uint64_t b_i = 0; b_i < 64; b_i++) {
+				if (w & ((uint64_t)1 << b_i)) {
+					const unsigned v = 64*w_i + b_i;
+					if (!cb(b->state, v, opaque)) {
+						return;
+					}
+				}
+			}
+		}
+	}
+}
+
+void
+idmap_iter_for_state(const struct idmap *m, fsm_state_t state_id,
+    idmap_iter_fun *cb, void *opaque)
+{
+	const size_t words = value_words(m->max_value);
+	const struct idmap_bucket *b = get_bucket(m, state_id);
+	if (b == NULL) {
+		return;
+	}
+
+	for (size_t w_i = 0; w_i < words; w_i++) {
+		const uint64_t w = b->values[w_i];
+		if (w == 0) {
+			continue;
+		}
+		/* if N contiguous bits are all zero, skip them all at once */
+#define BLOCK_BITS 16
+		uint64_t block = ((uint64_t)1 << BLOCK_BITS) - 1;
+		size_t block_count = 0;
+
+		uint64_t b_i = 0;
+		while (b_i < 64) {
+			if ((w & block) == 0) {
+				block <<= BLOCK_BITS;
+				b_i += BLOCK_BITS;
+				continue;
+			}
+
+			if (w & ((uint64_t)1 << b_i)) {
+				const unsigned v = 64*w_i + b_i;
+				if (!cb(b->state, v, opaque)) {
+					return;
+				}
+				block_count++;
+			}
+			b_i++;
+			block <<= 1;
+		}
+
+#define CHECK 0
+#if CHECK
+		size_t check_count = 0;
+		for (uint64_t b_i = 0; b_i < 64; b_i++) {
+			if (w & ((uint64_t)1 << b_i)) {
+				check_count++;
+			}
+		}
+		assert(block_count == check_count);
+#endif
+	}
+}
diff --git a/src/adt/stateset.c b/src/adt/stateset.c
index c1cff9933..fa3d0c54a 100644
--- a/src/adt/stateset.c
+++ b/src/adt/stateset.c
@@ -15,6 +15,11 @@
 #include <adt/stateset.h>
 #include <adt/hashrec.h>
 
+/* This is used here because the calls to
+ * state_set_contains change the order of growth. */
+#include <adt/common.h>
+
+
 /*
  * TODO: now fsm_state_t is a numeric index, this could be a dynamically
  * allocated bitmap, instead of a set.inc's array of items.
@@ -44,8 +49,8 @@
 struct state_set {
 	const struct fsm_alloc *alloc;
 	fsm_state_t *a;
-	size_t i;
-	size_t n;
+	size_t i;		/* used */
+	size_t n;		/* ceil */
 };
 
 int
@@ -138,7 +143,8 @@ state_set_cmp(const struct state_set *a, const struct state_set *b)
 }
 
 /*
- * Return where an item would be, if it were inserted
+ * Return where an item would be, if it were inserted.
+ * When insertion would append this returns one past the array.
  */
 static size_t
 state_set_search(const struct state_set *set, fsm_state_t state)
@@ -150,6 +156,11 @@ state_set_search(const struct state_set *set, fsm_state_t state)
 	assert(!IS_SINGLETON(set));
 	assert(set->a != NULL);
 
+	/* fast path: append case */
+	if (set->i > 0 && state > set->a[set->i - 1]) {
+		return set->i;
+	}
+
 	start = mid = 0;
 	end = set->i;
 
@@ -161,6 +172,12 @@ state_set_search(const struct state_set *set, fsm_state_t state)
 			end = mid;
 		} else if (r > 0) {
 			start = mid + 1;
+			/* update mid if we're about to halt, because
+			 * we're looking for the first position >= state,
+			 * not the last position <= */
+			if (start == end) {
+				mid = start;
+			}
 		} else {
 			return mid;
 		}
@@ -242,7 +259,7 @@ state_set_add(struct state_set **setp, const struct fsm_alloc *alloc,
 	 */
 	if (!state_set_empty(set)) {
 		i = state_set_search(set, state);
-		if (set->a[i] == state) {
+		if (i < set->i && set->a[i] == state) {
 			return 1;
 		}
 	}
@@ -261,11 +278,7 @@ state_set_add(struct state_set **setp, const struct fsm_alloc *alloc,
 			set->n *= 2;
 		}
 
-		if (state_set_cmpval(state, set->a[i]) > 0) {
-			i++;
-		}
-
-		if (i <= set->i) {
+		if (i < set->i) {
 			memmove(&set->a[i + 1], &set->a[i], (set->i - i) * (sizeof *set->a));
 		}
 
@@ -276,6 +289,8 @@ state_set_add(struct state_set **setp, const struct fsm_alloc *alloc,
 		set->i = 1;
 	}
 
+	/* This assert can be pretty expensive in -O0 but in -O3 it has very
+	 * little impact on the overall runtime. */
 	assert(state_set_contains(set, state));
 
 	return 1;
@@ -470,7 +485,7 @@ state_set_remove(struct state_set **setp, fsm_state_t state)
 	}
 
 	i = state_set_search(set, state);
-	if (set->a[i] == state) {
+	if (i < set->i && set->a[i] == state) {
 		if (i < set->i) {
 			memmove(&set->a[i], &set->a[i + 1], (set->i - i - 1) * (sizeof *set->a));
 		}
@@ -478,7 +493,9 @@ state_set_remove(struct state_set **setp, fsm_state_t state)
 		set->i--;
 	}
 
+#if EXPENSIVE_CHECKS
 	assert(!state_set_contains(set, state));
+#endif
 }
 
 int
@@ -524,7 +541,7 @@ state_set_contains(const struct state_set *set, fsm_state_t state)
 	}
 
 	i = state_set_search(set, state);
-	if (set->a[i] == state) {
+	if (i < set->i && set->a[i] == state) {
 		return 1;
 	}
 
@@ -659,7 +676,7 @@ state_set_replace(struct state_set **setp, fsm_state_t old, fsm_state_t new)
 	}
 }
 
-unsigned long
+uint64_t
 state_set_hash(const struct state_set *set)
 {
 	if (set == NULL) {
diff --git a/src/fsm/main.c b/src/fsm/main.c
index 8e668b779..192ab7de3 100644
--- a/src/fsm/main.c
+++ b/src/fsm/main.c
@@ -599,7 +599,7 @@ main(int argc, char *argv[])
 			size_t n;
 			struct state_iter it;
 
-			closures = epsilon_closure(fsm);
+			closures = fsm_epsilon_closure(fsm);
 			if (closures == NULL) {
 				return -1;
 			}
@@ -620,7 +620,7 @@ main(int argc, char *argv[])
 				printf("\n");
 			}
 
-			closure_free(closures, fsm->statecount);
+			fsm_closure_free(closures, fsm->statecount);
 
 			return 0;
 		} else {
@@ -647,7 +647,7 @@ main(int argc, char *argv[])
 
 				f = xopen(argv[0]);
 
-				e = fsm_exec(fsm, fsm_fgetc, f, &state, NULL);
+				e = fsm_exec(fsm, fsm_fgetc, f, &state);
 
 				fclose(f);
 			} else {
@@ -655,7 +655,7 @@ main(int argc, char *argv[])
 
 				s = argv[i];
 
-				e = fsm_exec(fsm, fsm_sgetc, &s, &state, NULL);
+				e = fsm_exec(fsm, fsm_sgetc, &s, &state);
 			}
 
 			if (e != 1) {
diff --git a/src/libfsm/Makefile b/src/libfsm/Makefile
index 9af51a5a4..bfa8e67db 100644
--- a/src/libfsm/Makefile
+++ b/src/libfsm/Makefile
@@ -1,6 +1,8 @@
 .include "../../share/mk/top.mk"
 
 SRC += src/libfsm/capture.c
+SRC += src/libfsm/capture_vm.c
+SRC += src/libfsm/capture_vm_exec.c
 SRC += src/libfsm/collate.c
 SRC += src/libfsm/complete.c
 SRC += src/libfsm/consolidate.c
diff --git a/src/libfsm/capture.c b/src/libfsm/capture.c
index 806bb3b12..21f32d06e 100644
--- a/src/libfsm/capture.c
+++ b/src/libfsm/capture.c
@@ -6,31 +6,82 @@
 
 #include <stdio.h>
 
-#include "capture_internal.h"
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+
+#include <fsm/alloc.h>
+#include <fsm/capture.h>
+#include <fsm/fsm.h>
+#include <fsm/pred.h>
+
+#include <adt/hash.h>
+#include <adt/idmap.h>
+
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+
+#include "internal.h"
+#include "capture.h"
+#include "capture_vm_program.h"
+#include "capture_log.h"
+#include "capture_vm.h"
+#include "endids.h"
+
+#define DEF_PROGRAMS_CEIL 4
+
+struct fsm_capture_info {
+	unsigned max_capture_id;
+
+	/* For particular end states, which captures are active? */
+	struct idmap *end_capture_map;
+
+	/* Set of capture resolution programs associated with specific
+	 * end states. */
+	struct capvm_program_set {
+		uint32_t ceil;
+		uint32_t used;
+		struct capvm_program **set;
+	} programs;
+
+	/* For particular end states, which capture programs are
+	 * associtaed with them? */
+	struct idmap *end_capvm_program_map;
+};
 
 int
 fsm_capture_init(struct fsm *fsm)
 {
 	struct fsm_capture_info *ci = NULL;
-	size_t i;
+	struct idmap *end_capture_map = NULL;
+	struct idmap *end_capvm_program_map = NULL;
 
 	ci = f_calloc(fsm->opt->alloc,
 	    1, sizeof(*ci));
 	if (ci == NULL) {
 		goto cleanup;
 	}
-	fsm->capture_info = ci;
+	end_capture_map = idmap_new(fsm->opt->alloc);
+	if (end_capture_map == NULL) {
+		goto cleanup;
+	}
+	ci->end_capture_map = end_capture_map;
 
-	for (i = 0; i < fsm->statealloc; i++) {
-		fsm->states[i].has_capture_actions = 0;
+	end_capvm_program_map = idmap_new(fsm->opt->alloc);
+	if (end_capvm_program_map == NULL) {
+		goto cleanup;
 	}
+	ci->end_capvm_program_map = end_capvm_program_map;
+
+	fsm->capture_info = ci;
 
 	return 1;
 
 cleanup:
-	if (ci != NULL) {
-		f_free(fsm->opt->alloc, ci);
-	}
+	f_free(fsm->opt->alloc, ci);
+	idmap_free(end_capture_map);
+	idmap_free(end_capvm_program_map);
 	return 0;
 }
 
@@ -41,799 +92,570 @@ fsm_capture_free(struct fsm *fsm)
 	if (ci == NULL) {
 		return;
 	}
-	f_free(fsm->opt->alloc, ci->buckets);
+
+	idmap_free(ci->end_capture_map);
+	idmap_free(ci->end_capvm_program_map);
+
+	for (size_t p_i = 0; p_i < ci->programs.used; p_i++) {
+		fsm_capvm_program_free(fsm->opt->alloc, ci->programs.set[p_i]);
+	}
+	f_free(fsm->opt->alloc, ci->programs.set);
+
 	f_free(fsm->opt->alloc, ci);
 	fsm->capture_info = NULL;
 }
 
 unsigned
-fsm_countcaptures(const struct fsm *fsm)
+fsm_capture_ceiling(const struct fsm *fsm)
 {
-	(void)fsm;
 	if (fsm->capture_info == NULL) {
 		return 0;
 	}
-	if (fsm->capture_info->buckets_used == 0) {
-		return 0;
-	}
 
-	/* check actual */
 #if EXPENSIVE_CHECKS
-	{
-		struct fsm_capture_info *ci = fsm->capture_info;
-		size_t i;
-		for (i = 0; i < ci->bucket_count; i++) {
-			struct fsm_capture_action_bucket *b = &ci->buckets[i];
-			if (b->state == CAPTURE_NO_STATE) { /* empty */
-				continue;
-			}
-			assert(ci->max_capture_id >= b->action.id);
+	/* check actual */
+	unsigned res = 0;
+	for (size_t i = 0; i < fsm->capture_info->programs.used; i++) {
+		const unsigned id = fsm_capvm_program_get_max_capture_id(fsm->capture_info->programs.set[i]);
+		if (id > res) {
+			res = id;
 		}
 	}
+	assert(res == fsm->capture_info->max_capture_id);
 #endif
 
 	return fsm->capture_info->max_capture_id + 1;
 }
 
+struct fsm_capture *
+fsm_capture_alloc_capture_buffer(const struct fsm *fsm)
+{
+	assert(fsm != NULL);
+	const size_t len = fsm_capture_ceiling(fsm);
+	struct fsm_capture *res = f_malloc(fsm->opt->alloc,
+	    len * sizeof(res[0]));
+	return res;
+}
+
+void
+fsm_capture_free_capture_buffer(const struct fsm *fsm,
+    struct fsm_capture *capture_buffer)
+{
+	assert(fsm != NULL);
+	f_free(fsm->opt->alloc, capture_buffer);
+}
+
+
 int
 fsm_capture_has_captures(const struct fsm *fsm)
 {
 	return fsm->capture_info
-	    ? fsm->capture_info->buckets_used > 0
+	    ? fsm->capture_info->programs.used > 0
 	    : 0;
 }
 
-int
-fsm_capture_has_capture_actions(const struct fsm *fsm, fsm_state_t state)
+void
+fsm_capture_dump_programs(FILE *f, const struct fsm *fsm)
 {
-	assert(state < fsm->statecount);
-	return fsm->states[state].has_capture_actions;
+	fprintf(f, "\n==== %s:\n", __func__);
+	struct fsm_capture_info *ci = fsm->capture_info;
+	for (uint32_t i = 0; i < ci->programs.used; i++) {
+		const struct capvm_program *p = ci->programs.set[i];
+		fprintf(f, "# program %u, capture_count %u, base %u\n",
+		    i, p->capture_count, p->capture_base);
+		fsm_capvm_program_dump(f, p);
+		fprintf(f, "\n");
+	}
 }
 
 int
-fsm_capture_set_path(struct fsm *fsm, unsigned capture_id,
-    fsm_state_t start, fsm_state_t end)
+fsm_capture_set_active_for_end(struct fsm *fsm,
+    unsigned capture_id, fsm_state_t end_state)
 {
-	struct fsm_capture_info *ci;
-	struct capture_set_path_env env;
-	size_t seen_words;
-	int res = 0;
-
-	assert(fsm != NULL);
-	assert(start < fsm->statecount);
-	assert(end < fsm->statecount);
-
-	ci = fsm->capture_info;
+	struct fsm_capture_info *ci = fsm->capture_info;
 	assert(ci != NULL);
+	struct idmap *m = ci->end_capture_map;
+	assert(m != NULL);
 
-	/* captures should no longer be stored as paths -- instead, set
-	 * the info on the states _here_, and convert it as necessary. */
-
-#if LOG_CAPTURE > 0
-	fprintf(stderr, "fsm_capture_set_path: capture %u: <%u, %u>\n",
-	    capture_id, start, end);
-#endif
-
-	if (capture_id > FSM_CAPTURE_MAX) {
-		return 0;	/* ID out of range */
-	}
-
-	if (!init_capture_action_htab(fsm, ci)) {
-		return 0;
-	}
-
-	/* This will create a trail and do a depth-first search from the
-	 * start state, marking every unique path to the end state. */
-	env.fsm = fsm;
-	env.capture_id = capture_id;
-	env.start = start;
-	env.end = end;
-
-	env.trail_ceil = 0;
-	env.trail = NULL;
-	env.seen = NULL;
+	#if EXPENSIVE_CHECKS
+	assert(fsm_isend(fsm, end_state));
+	#endif
 
-	env.trail = f_malloc(fsm->opt->alloc,
-	    DEF_TRAIL_CEIL * sizeof(env.trail[0]));
-	if (env.trail == NULL) {
-		goto cleanup;
-	}
-	env.trail_ceil = DEF_TRAIL_CEIL;
-
-	seen_words = fsm->statecount/64 + 1;
-	env.seen = f_malloc(fsm->opt->alloc,
-	    seen_words * sizeof(env.seen[0]));
+	return idmap_set(m, end_state, capture_id);
+}
 
-	if (!mark_capture_path(&env)) {
-		goto cleanup;
-	}
+void
+fsm_capture_iter_active_for_end_state(const struct fsm *fsm, fsm_state_t state,
+    fsm_capture_iter_active_for_end_cb *cb, void *opaque)
+{
+	/* These types should be the same. */
+	idmap_iter_fun *idmap_cb = cb;
+	idmap_iter_for_state(fsm->capture_info->end_capture_map, state,
+	    idmap_cb, opaque);
+}
 
-	if (capture_id >= ci->max_capture_id) {
-		ci->max_capture_id = capture_id;
-	}
+void
+fsm_capture_iter_active_for_all_end_states(const struct fsm *fsm,
+    fsm_capture_iter_active_for_end_cb *cb, void *opaque)
+{
+	/* These types should be the same. */
+	idmap_iter_fun *idmap_cb = cb;
+	idmap_iter(fsm->capture_info->end_capture_map,
+	    idmap_cb, opaque);
+}
 
-	res = 1;
-	/* fall through */
+void
+fsm_capture_iter_program_ids_for_end_state(const struct fsm *fsm, fsm_state_t state,
+    fsm_capture_iter_program_ids_for_end_state_cb *cb, void *opaque)
+{
+	/* These types should be the same. */
+	idmap_iter_fun *idmap_cb = cb;
+	idmap_iter_for_state(fsm->capture_info->end_capvm_program_map, state,
+	    idmap_cb, opaque);
+}
 
-cleanup:
-	f_free(fsm->opt->alloc, env.trail);
-	f_free(fsm->opt->alloc, env.seen);
-	return res;
+void
+fsm_capture_iter_program_ids_for_all_end_states(const struct fsm *fsm,
+    fsm_capture_iter_program_ids_for_end_state_cb *cb, void *opaque)
+{
+	/* These types should be the same. */
+	idmap_iter_fun *idmap_cb = cb;
+	idmap_iter(fsm->capture_info->end_capvm_program_map,
+	    idmap_cb, opaque);
 }
 
 static int
-init_capture_action_htab(struct fsm *fsm, struct fsm_capture_info *ci)
+dump_active_for_ends_cb(fsm_state_t state_id, unsigned value, void *opaque)
 {
-	size_t count, i;
-	assert(fsm != NULL);
-	assert(ci != NULL);
+	FILE *f = opaque;
+	fprintf(f, " -- state %d: value %u\n", state_id, value);
+	return 1;
+}
 
-	if (ci->bucket_count > 0) {
-		assert(ci->buckets != NULL);
-		return 1;	/* done */
-	}
+void
+fsm_capture_dump_active_for_ends(FILE *f, const struct fsm *fsm)
+{
+	fprintf(f, "%s:\n", __func__);
+	idmap_iter(fsm->capture_info->end_capture_map, dump_active_for_ends_cb, f);
+}
 
-	assert(ci->buckets == NULL);
-	assert(ci->buckets_used == 0);
+void
+fsm_capture_dump_program_end_mapping(FILE *f, const struct fsm *fsm)
+{
+	fprintf(f, "%s:\n", __func__);
+	idmap_iter(fsm->capture_info->end_capvm_program_map, dump_active_for_ends_cb, f);
+}
 
-	count = DEF_CAPTURE_ACTION_BUCKET_COUNT;
-	ci->buckets = f_malloc(fsm->opt->alloc,
-	    count * sizeof(ci->buckets[0]));
-	if (ci->buckets == NULL) {
-		return 0;
-	}
+/* Dump capture metadata about an FSM. */
+void
+fsm_capture_dump(FILE *f, const char *tag, const struct fsm *fsm)
+{
+	struct fsm_capture_info *ci;
 
-	/* Init buckets to CAPTURE_NO_STATE -> empty. */
-	for (i = 0; i < count; i++) {
-		ci->buckets[i].state = CAPTURE_NO_STATE;
+	assert(fsm != NULL);
+	ci = fsm->capture_info;
+	if (ci == NULL) {
+		fprintf(f, "==== %s -- no captures\n", tag);
+		return;
 	}
 
-	ci->bucket_count = count;
-	return 1;
+	fsm_endid_dump(f, fsm);
+	fsm_capture_dump_active_for_ends(f, fsm);
+	fsm_capture_dump_programs(f, fsm);
+	fsm_capture_dump_program_end_mapping(f, fsm);
 }
 
+struct carry_active_captures_env {
+	fsm_state_t dst;
+	struct idmap *dst_m;
+	int ok;
+};
+
 static int
-mark_capture_path(struct capture_set_path_env *env)
+copy_active_captures_cb(fsm_state_t state_id, unsigned value, void *opaque)
 {
-	const size_t seen_words = env->fsm->statecount/64 + 1;
-
-#if LOG_CAPTURE > 0
-	fprintf(stderr, "mark_capture_path: path [id %u, %u - %u]\n",
-	    env->capture_id, env->start, env->end);
-#endif
+	(void)state_id;
 
-	if (env->start == env->end) {
-		struct fsm_capture_action action;
-		action.type = CAPTURE_ACTION_COMMIT_ZERO_STEP;
-		action.id = env->capture_id;
-		action.to = CAPTURE_NO_STATE;
-		if (!add_capture_action(env->fsm, env->fsm->capture_info,
-			env->start, &action)) {
-			return 0;
-		}
-		return 1;
-	}
-
-	memset(env->seen, 0x00,
-	    seen_words * sizeof(env->seen[0]));
-
-	/* initialize to starting node */
-	env->trail_i = 1;
-	env->trail[0].state = env->start;
-	env->trail[0].step = TRAIL_STEP_START;
-	env->trail[0].has_self_edge = 0;
-
-	while (env->trail_i > 0) {
-		const enum trail_step step = env->trail[env->trail_i - 1].step;
-#if LOG_CAPTURE > 0
-		fprintf(stderr, "mark_capture_path: trail %u/%u, cur %u, step %d\n",
-		    env->trail_i, env->trail_ceil,
-		    env->trail[env->trail_i - 1].state,
-		    step);
-#endif
-
-		switch (step) {
-		case TRAIL_STEP_START:
-			if (!step_trail_start(env)) {
-				return 0;
-			}
-			break;
-		case TRAIL_STEP_ITER_EDGES:
-			if (!step_trail_iter_edges(env)) {
-				return 0;
-			}
-			break;
-		case TRAIL_STEP_ITER_EPSILONS:
-			if (!step_trail_iter_epsilons(env)) {
-				return 0;
-			}
-			break;
-		case TRAIL_STEP_DONE:
-			if (!step_trail_done(env)) {
-				return 0;
-			}
-			break;
-		default:
-			assert(!"match fail");
-		}
+	struct carry_active_captures_env *env = opaque;
+	if (!idmap_set(env->dst_m, env->dst, value)) {
+		env->ok = false;
+		return 0;
 	}
-
 	return 1;
 }
 
 static int
-cmp_action(const struct fsm_capture_action *a,
-    const struct fsm_capture_action *b) {
-	/* could use memcmp here, provided padding is always zeroed. */
-	return a->id < b->id ? -1
-	    : a->id > b->id ? 1
-	    : a->type < b->type ? -1
-	    : a->type > b->type ? 1
-	    : a->to < b->to ? -1
-	    : a->to > b->to ? 1
-	    : 0;
-}
-
-int
-fsm_capture_add_action(struct fsm *fsm,
-    fsm_state_t state, enum capture_action_type type,
-    unsigned id, fsm_state_t to)
+copy_program_associations_cb(fsm_state_t state_id, unsigned value, void *opaque)
 {
-	struct fsm_capture_action action;
-	assert(fsm->capture_info != NULL);
+	(void)state_id;
 
-	action.type = type;
-	action.id = id;
-	action.to = to;
-	return add_capture_action(fsm, fsm->capture_info,
-	    state, &action);
+	struct carry_active_captures_env *env = opaque;
+	if (!idmap_set(env->dst_m, env->dst, value)) {
+		env->ok = false;
+		return 0;
+	}
+	return 1;
 }
 
-static int
-add_capture_action(struct fsm *fsm, struct fsm_capture_info *ci,
-    fsm_state_t state, const struct fsm_capture_action *action)
+int
+fsm_capture_copy_active_for_ends(const struct fsm *src_fsm,
+	const struct state_set *states,
+	struct fsm *dst_fsm, fsm_state_t dst_state)
 {
-	uint64_t h;
-	size_t b_i, mask;
-
-	assert(state < fsm->statecount);
-	assert(action->to == CAPTURE_NO_STATE || action->to < fsm->statecount);
-
-#if LOG_CAPTURE > 0
-	fprintf(stderr, "add_capture_action: state %u, type %s, ID %u, TO %d\n",
-	    state, fsm_capture_action_type_name[action->type],
-	    action->id, action->to);
-#endif
-
-	if (ci->bucket_count == 0) {
-		if (!init_capture_action_htab(fsm, ci)) {
-			return 0;
-		}
-	} else if (ci->buckets_used >= ci->bucket_count/2) { /* grow */
-		if (!grow_capture_action_buckets(fsm->opt->alloc, ci)) {
-			return 0;
+	struct state_iter it;
+	fsm_state_t s;
+
+	assert(src_fsm != NULL);
+	assert(src_fsm->capture_info != NULL);
+	assert(src_fsm->capture_info->end_capture_map != NULL);
+	assert(dst_fsm != NULL);
+	assert(dst_fsm->capture_info != NULL);
+	assert(dst_fsm->capture_info->end_capture_map != NULL);
+	struct idmap *src_m = src_fsm->capture_info->end_capture_map;
+	struct idmap *dst_m = dst_fsm->capture_info->end_capture_map;
+
+	struct carry_active_captures_env env = {
+		.dst_m = dst_m,
+		.dst = dst_state,
+		.ok = true,
+	};
+
+	state_set_reset(states, &it);
+	while (state_set_next(&it, &s)) {
+		if (!fsm_isend(src_fsm, s)) {
+			continue;
 		}
-	}
 
-	h = hash_id(state);
-	mask = ci->bucket_count - 1;
-
-	for (b_i = 0; b_i < ci->bucket_count; b_i++) {
-		struct fsm_capture_action_bucket *b = &ci->buckets[(h + b_i) & mask];
-		if (b->state == CAPTURE_NO_STATE) { /* empty */
-			b->state = state;
-			memcpy(&b->action, action, sizeof(*action));
-			ci->buckets_used++;
-			fsm->states[state].has_capture_actions = 1;
-			if (action->id > ci->max_capture_id) {
-				ci->max_capture_id = action->id;
-			}
-			return 1;
-		} else if (b->state == state &&
-		    0 == cmp_action(action, &b->action)) {
-			/* already present, ignore duplicate */
-			assert(fsm->states[state].has_capture_actions);
-			assert(ci->max_capture_id >= action->id);
-			return 1;
-		} else {
-			continue; /* skip past collision */
+		idmap_iter_for_state(src_m, s, copy_active_captures_cb, &env);
+		if (!env.ok) {
+			goto cleanup;
 		}
 	}
 
-	assert(!"unreachable");
-	return 0;
+cleanup:
+	return env.ok;
 }
 
-static int
-grow_capture_action_buckets(const struct fsm_alloc *alloc,
-    struct fsm_capture_info *ci)
+int
+fsm_capture_copy_program_end_state_associations(const struct fsm *src_fsm,
+	const struct state_set *states,
+	struct fsm *dst_fsm, fsm_state_t dst_state)
 {
-	const size_t ncount = 2 * ci->bucket_count;
-	struct fsm_capture_action_bucket *nbuckets;
-	size_t nused = 0;
-	size_t i;
+	struct state_iter it;
+	fsm_state_t s;
+
+	assert(src_fsm != NULL);
+	assert(src_fsm->capture_info != NULL);
+	assert(src_fsm->capture_info->end_capvm_program_map != NULL);
+	assert(dst_fsm != NULL);
+	assert(dst_fsm->capture_info != NULL);
+	assert(dst_fsm->capture_info->end_capvm_program_map != NULL);
+	struct idmap *src_m = src_fsm->capture_info->end_capvm_program_map;
+	struct idmap *dst_m = dst_fsm->capture_info->end_capvm_program_map;
+
+	struct carry_active_captures_env env = {
+		.dst_m = dst_m,
+		.dst = dst_state,
+		.ok = true,
+	};
+
+	state_set_reset(states, &it);
+	while (state_set_next(&it, &s)) {
+		if (!fsm_isend(src_fsm, s)) {
+			continue;
+		}
 
-	assert(ncount != 0);
-	nbuckets = f_malloc(alloc, ncount * sizeof(nbuckets[0]));
-	if (nbuckets == NULL) {
-		return 0;
-	}
+		LOG(5 - LOG_CAPTURE_COMBINING_ANALYSIS,
+		    "%s: dst_state %d, state_set_next => %d\n",
+		    __func__, dst_state, s);
 
-	for (i = 0; i < ncount; i++) {
-		nbuckets[i].state = CAPTURE_NO_STATE;
+		idmap_iter_for_state(src_m, s, copy_program_associations_cb, &env);
+		if (!env.ok) {
+			goto cleanup;
+		}
 	}
 
-	for (i = 0; i < ci->bucket_count; i++) {
-		const struct fsm_capture_action_bucket *src_b = &ci->buckets[i];
-		uint64_t h;
-		const size_t mask = ncount - 1;
-		size_t b_i;
+cleanup:
+	return env.ok;
+}
 
-		if (src_b->state == CAPTURE_NO_STATE) {
-			continue;
+int
+fsm_capture_copy_programs(const struct fsm *src_fsm,
+	struct fsm *dst_fsm)
+{
+	const struct fsm_alloc *alloc = src_fsm->opt->alloc;
+	assert(alloc == dst_fsm->opt->alloc);
+	const struct fsm_capture_info *src_ci = src_fsm->capture_info;
+
+	for (uint32_t p_i = 0; p_i < src_ci->programs.used; p_i++) {
+		const struct capvm_program *p = src_ci->programs.set[p_i];
+		struct capvm_program *cp = capvm_program_copy(alloc, p);
+		if (cp == NULL) {
+			return 0;
 		}
 
-		h = hash_id(src_b->state);
-		for (b_i = 0; b_i < ncount; b_i++) {
-			struct fsm_capture_action_bucket *dst_b;
-			dst_b = &nbuckets[(h + b_i) & mask];
-			if (dst_b->state == CAPTURE_NO_STATE) {
-				memcpy(dst_b, src_b, sizeof(*src_b));
-				nused++;
-				break;
-			} else {
-				continue;
-			}
+		/* unused: because this is an in-order copy, it's assumed
+		 * the programs will retain their order. */
+		uint32_t prog_id;
+		if (!fsm_capture_add_program(dst_fsm, cp, &prog_id)) {
+			return 0;
 		}
 	}
-
-	assert(nused == ci->buckets_used);
-	f_free(alloc, ci->buckets);
-	ci->buckets = nbuckets;
-	ci->bucket_count = ncount;
 	return 1;
 }
 
-static int
-grow_trail(struct capture_set_path_env *env)
+size_t
+fsm_capture_program_count(const struct fsm *fsm)
 {
-	struct trail_cell *ntrail;
-	unsigned nceil;
-	assert(env != NULL);
-
-	nceil = 2 * env->trail_ceil;
-	assert(nceil > env->trail_ceil);
+	return fsm->capture_info->programs.used;
+}
 
-	ntrail = f_realloc(env->fsm->opt->alloc, env->trail,
-	    nceil * sizeof(env->trail[0]));
-	if (ntrail == NULL) {
-		return 0;
-	}
+struct check_program_mappings_env {
+	const struct fsm *fsm;
+};
 
-	env->trail = ntrail;
-	env->trail_ceil = nceil;
+static int
+check_program_mappings_cb(fsm_state_t state_id, unsigned value, void *opaque)
+{
+	const uint32_t prog_id = (uint32_t)value;
+	struct check_program_mappings_env *env = opaque;
+	assert(state_id < env->fsm->statecount);
+	assert(prog_id < env->fsm->capture_info->programs.used);
 	return 1;
 }
 
-static int
-step_trail_start(struct capture_set_path_env *env)
-{
-	struct trail_cell *tc = &env->trail[env->trail_i - 1];
-	const fsm_state_t cur = tc->state;
-	size_t i;
-	struct edge_set *edge_set = NULL;
-
-	/* check if node is endpoint, if so mark trail,
-	 * then pop trail and continue */
-	if (cur == env->end) {
-		struct fsm_capture_action action;
-#if LOG_CAPTURE > 0
-		fprintf(stderr, " -- GOT END at %u\n", cur);
-#endif
-		action.id = env->capture_id;
+void
+fsm_capture_integrity_check(const struct fsm *fsm)
+{
+	if (!EXPENSIVE_CHECKS) { return; }
 
-		for (i = 0; i < env->trail_i; i++) {
-			fsm_state_t state = env->trail[i].state;
-#if LOG_CAPTURE > 0
-			fprintf(stderr, " -- %lu: %d\n",
-			    i, state);
-#endif
+	/* check that all program mappings are in range */
+	struct check_program_mappings_env env = {
+		.fsm = fsm,
+	};
+	idmap_iter(fsm->capture_info->end_capvm_program_map, check_program_mappings_cb, &env);
+}
 
-			/* Special case: if this is marked as having
-			 * a self-edge on the path, then also add an
-			 * extend for that. */
-			if (env->trail[i].has_self_edge) {
-				struct fsm_capture_action self_action;
-				self_action.type = CAPTURE_ACTION_EXTEND;
-				self_action.id = env->capture_id;
-				self_action.to = state;
-
-				if (!add_capture_action(env->fsm,
-					env->fsm->capture_info,
-					state, &self_action)) {
-					return 0;
-				}
-			}
-
-
-			if (i == 0) {
-				action.type = CAPTURE_ACTION_START;
-			} else {
-				action.type = (i < env->trail_i - 1
-				    ? CAPTURE_ACTION_EXTEND
-				    : CAPTURE_ACTION_COMMIT);
-			}
-
-			if (i < env->trail_i - 1) {
-				action.to = env->trail[i + 1].state;
-			} else {
-				action.to = CAPTURE_NO_STATE;
-			}
-
-			if (!add_capture_action(env->fsm,
-				env->fsm->capture_info,
-				state, &action)) {
-				return 0;
-			}
-		}
+struct capture_idmap_compact_env {
+	int ok;
+	struct idmap *dst;
+	const fsm_state_t *mapping;
+	size_t orig_statecount;
+};
 
-		tc->step = TRAIL_STEP_DONE;
-		return 1;
-	}
+static int
+copy_with_mapping_cb(fsm_state_t state_id, unsigned value, void *opaque)
+{
+	fsm_state_t dst_id;
+	struct capture_idmap_compact_env *env = opaque;
 
-#if LOG_CAPTURE > 0
-	fprintf(stderr, " -- resetting edge iterator\n");
-#endif
-	edge_set = env->fsm->states[cur].edges;
+	assert(state_id < env->orig_statecount);
+	dst_id = env->mapping[state_id];
 
-	MARK_SEEN(env, cur);
-#if LOG_CAPTURE > 0
-	fprintf(stderr, " -- marking %u as seen\n", cur);
-#endif
+	if (dst_id == FSM_STATE_REMAP_NO_STATE) {
+		return 1;		/* discard */
+	}
+
+	if (!idmap_set(env->dst, dst_id, value)) {
+		env->ok = 0;
+		return 0;
+	}
 
-	edge_set_reset(edge_set, &tc->iter);
-	tc->step = TRAIL_STEP_ITER_EDGES;
 	return 1;
 }
 
-static int
-step_trail_iter_edges(struct capture_set_path_env *env)
+int
+fsm_capture_id_compact(struct fsm *fsm, const fsm_state_t *mapping,
+    size_t orig_statecount)
 {
-	struct trail_cell *tc = &env->trail[env->trail_i - 1];
-	struct trail_cell *next_tc = NULL;
-
-	struct fsm_edge e;
+	struct capture_idmap_compact_env env;
+	struct idmap *old_idmap = fsm->capture_info->end_capture_map;
+	struct idmap *new_idmap = idmap_new(fsm->opt->alloc);
 
-	if (!edge_set_next(&tc->iter, &e)) {
-#if LOG_CAPTURE > 0
-		fprintf(stderr, " -- ITER_EDGE_NEXT: DONE %u\n", tc->state);
-#endif
-		tc->step = TRAIL_STEP_ITER_EPSILONS;
-		return 1;
+	if (new_idmap == NULL) {
+		return 0;
 	}
 
-#if LOG_CAPTURE > 0
-	fprintf(stderr, " -- ITER_EDGE_NEXT: %u -- NEXT %u\n",
-	    tc->state, e.state);
-#endif
+	env.ok = 1;
+	env.dst = new_idmap;
+	env.mapping = mapping;
+	env.orig_statecount = orig_statecount;
 
-	if (tc->state == e.state) {
-#if LOG_CAPTURE > 0
-		fprintf(stderr, "    -- special case, self-edge\n");
-#endif
-		/* Mark this state as having a self-edge, then continue
-		 * the iterator. An EXTEND action will be added for the
-		 * self-edge later, if necessary. */
-		tc->has_self_edge = 1;
-		return 1;
-	} else if (CHECK_SEEN(env, e.state)) {
-#if LOG_CAPTURE > 0
-		fprintf(stderr, "    -- seen, skipping\n");
-#endif
-		return 1;	/* continue */
-	}
-
-	if (env->trail_i == env->trail_ceil) {
-		if (!grow_trail(env)) {
-			return 0;
-		}
+	idmap_iter(old_idmap, copy_with_mapping_cb, &env);
+	if (!env.ok) {
+		idmap_free(new_idmap);
+		return 0;
 	}
 
-#if LOG_CAPTURE > 0
-	fprintf(stderr, " -- marking %u as seen\n", e.state);
-#endif
-	MARK_SEEN(env, e.state);
+	idmap_free(old_idmap);
+	fsm->capture_info->end_capture_map = new_idmap;
 
-#if LOG_CAPTURE > 0
-	fprintf(stderr, "    -- not seen (%u), exploring\n", e.state);
-#endif
-	env->trail_i++;
-	next_tc = &env->trail[env->trail_i - 1];
-	next_tc->state = e.state;
-	next_tc->step = TRAIL_STEP_START;
-	next_tc->has_self_edge = 0;
 	return 1;
 }
 
-static int
-step_trail_iter_epsilons(struct capture_set_path_env *env)
+int
+fsm_capture_program_association_compact(struct fsm *fsm, const fsm_state_t *mapping,
+    size_t orig_statecount)
 {
-	struct trail_cell *tc = &env->trail[env->trail_i - 1];
+	struct capture_idmap_compact_env env;
+	struct idmap *old_idmap = fsm->capture_info->end_capvm_program_map;
+	struct idmap *new_idmap = idmap_new(fsm->opt->alloc);
 
-	/* skipping this for now */
-
-#if LOG_CAPTURE > 0
-	fprintf(stderr, " -- ITER_EPSILONS: %u\n", tc->state);
-#endif
-
-	tc->step = TRAIL_STEP_DONE;
-	return 1;
-}
+	if (new_idmap == NULL) {
+		return 0;
+	}
 
-static int
-step_trail_done(struct capture_set_path_env *env)
-{
-	struct trail_cell *tc;
+	env.ok = 1;
+	env.dst = new_idmap;
+	env.mapping = mapping;
+	env.orig_statecount = orig_statecount;
 
-	/* 0-step paths already handled outside loop */
-	assert(env->trail_i > 0);
+	idmap_iter(old_idmap, copy_with_mapping_cb, &env);
+	if (!env.ok) {
+		idmap_free(new_idmap);
+		return 0;
+	}
 
-	tc = &env->trail[env->trail_i - 1];
-#if LOG_CAPTURE > 0
-	fprintf(stderr, " -- DONE: %u\n", tc->state);
-#endif
-	CLEAR_SEEN(env, tc->state);
+	idmap_free(old_idmap);
+	fsm->capture_info->end_capvm_program_map = new_idmap;
 
-	env->trail_i--;
 	return 1;
 }
 
 void
-fsm_capture_rebase_capture_id(struct fsm *fsm, unsigned base)
+fsm_capture_update_max_capture_id(struct fsm_capture_info *ci,
+	unsigned capture_id)
 {
-	size_t i;
-	struct fsm_capture_info *ci = fsm->capture_info;
 	assert(ci != NULL);
-
-	for (i = 0; i < ci->bucket_count; i++) {
-		struct fsm_capture_action_bucket *b = &ci->buckets[i];
-		if (b->state == CAPTURE_NO_STATE) {
-			continue;
-		}
-
-		b->action.id += base;
-		if (b->action.id > ci->max_capture_id) {
-			ci->max_capture_id = b->action.id;
-		}
+	if (capture_id >= ci->max_capture_id) {
+		ci->max_capture_id = capture_id;
 	}
 }
 
-void
-fsm_capture_rebase_capture_action_states(struct fsm *fsm, fsm_state_t base)
+int
+fsm_capture_add_program(struct fsm *fsm,
+	struct capvm_program *program, uint32_t *prog_id)
 {
-	size_t i;
+	assert(program != NULL);
+	assert(prog_id != NULL);
+
 	struct fsm_capture_info *ci = fsm->capture_info;
-	assert(ci != NULL);
 
-	for (i = 0; i < ci->bucket_count; i++) {
-		struct fsm_capture_action_bucket *b = &ci->buckets[i];
-		if (b->state == CAPTURE_NO_STATE) {
-			continue;
+	if (ci->programs.used == ci->programs.ceil) {
+		const size_t nceil = (ci->programs.ceil == 0
+		    ? DEF_PROGRAMS_CEIL
+		    : 2*ci->programs.ceil);
+		assert(nceil > ci->programs.ceil);
+		struct capvm_program **nset = f_realloc(fsm->opt->alloc,
+		    ci->programs.set, nceil * sizeof(nset[0]));
+		if (nset == NULL) {
+			return 0;
 		}
 
-		b->state += base;
-		if (b->action.to != CAPTURE_NO_STATE) {
-			b->action.to += base;
-		}
+		ci->programs.ceil = nceil;
+		ci->programs.set = nset;
 	}
-}
+	assert(ci->programs.used < ci->programs.ceil);
 
-struct fsm_capture *
-fsm_capture_alloc(const struct fsm *fsm)
-{
-	(void)fsm;
-	assert(!"todo");
-	return NULL;
-}
-
-void
-fsm_capture_update_captures(const struct fsm *fsm,
-    fsm_state_t cur_state, fsm_state_t next_state, size_t offset,
-    struct fsm_capture *captures)
-{
-	const struct fsm_capture_info *ci;
-	uint64_t h;
-	size_t b_i, mask;
-
-	assert(cur_state < fsm->statecount);
-	assert(fsm->states[cur_state].has_capture_actions);
-
-	ci = fsm->capture_info;
-	assert(ci != NULL);
-
-	h = hash_id(cur_state);
-	mask = ci->bucket_count - 1;
-
-#if LOG_CAPTURE > 0
-	fprintf(stderr, "-- updating captures at state %u, to %d, offset %lu\n",
-	    cur_state, next_state, offset);
-#endif
-
-	for (b_i = 0; b_i < ci->bucket_count; b_i++) {
-		const size_t b_id = (h + b_i) & mask;
-		struct fsm_capture_action_bucket *b = &ci->buckets[b_id];
-		unsigned capture_id;
-
-#if LOG_CAPTURE > 3
-		fprintf(stderr, "   -- update_captures: bucket %lu, state %d\n", b_id, b->state);
-#endif
-
-
-		if (b->state == CAPTURE_NO_STATE) {
-#if LOG_CAPTURE > 3
-			fprintf(stderr, "  -- no more actions for this state\n");
-#endif
-			break;	/* no more for this state */
-		} else if (b->state != cur_state) {
-			continue; /* skip collision */
-		}
-
-		assert(b->state == cur_state);
-		capture_id = b->action.id;
-
-		switch (b->action.type) {
-		case CAPTURE_ACTION_START:
-#if LOG_CAPTURE > 0
-			fprintf(stderr, "START [%u, %u]\n",
-			    b->action.id, b->action.to);
-#endif
-			if (next_state == b->action.to && captures[capture_id].pos[0] == FSM_CAPTURE_NO_POS) {
-				captures[capture_id].pos[0] = offset;
-#if LOG_CAPTURE > 0
-				fprintf(stderr, " -- set capture[%u].[0] to %lu\n", b->action.id, offset);
-#endif
-			} else {
-				/* filtered, ignore */
-			}
-			break;
-		case CAPTURE_ACTION_EXTEND:
-#if LOG_CAPTURE > 0
-			fprintf(stderr, "EXTEND [%u, %u]\n",
-			    b->action.id, b->action.to);
-#endif
-			if (captures[capture_id].pos[0] != FSM_CAPTURE_NO_POS
-			    && (0 == (captures[capture_id].pos[1] & COMMITTED_CAPTURE_FLAG))) {
-				if (next_state == b->action.to) {
-					captures[capture_id].pos[1] = offset;
-#if LOG_CAPTURE > 0
-				fprintf(stderr, " -- set capture[%u].[1] to %lu\n", b->action.id, offset);
-#endif
-				} else {
-					/* filtered, ignore */
-				}
-			}
-			break;
-		case CAPTURE_ACTION_COMMIT_ZERO_STEP:
-#if LOG_CAPTURE > 0
-			fprintf(stderr, "COMMIT_ZERO_STEP [%u]\n",
-			    b->action.id);
-#endif
-
-			if (captures[capture_id].pos[0] == FSM_CAPTURE_NO_POS) {
-				captures[capture_id].pos[0] = offset;
-				captures[capture_id].pos[1] = offset | COMMITTED_CAPTURE_FLAG;
-			} else { /* extend */
-				captures[capture_id].pos[1] = offset | COMMITTED_CAPTURE_FLAG;
-			}
-
-#if LOG_CAPTURE > 0
-			fprintf(stderr, " -- set capture[%u].[0] and [1] to %lu (with COMMIT flag)\n", b->action.id, offset);
-#endif
-			break;
-		case CAPTURE_ACTION_COMMIT:
-#if LOG_CAPTURE > 0
-			fprintf(stderr, "COMMIT [%u]\n",
-			    b->action.id);
-#endif
-			captures[capture_id].pos[1] = offset | COMMITTED_CAPTURE_FLAG;
-#if LOG_CAPTURE > 0
-			fprintf(stderr, " -- set capture[%u].[1] to %lu (with COMMIT flag)\n", b->action.id, offset);
-#endif
-			break;
-		default:
-			assert(!"matchfail");
-		}
+	const unsigned max_prog_capture_id = fsm_capvm_program_get_max_capture_id(program);
+	if (max_prog_capture_id > ci->max_capture_id) {
+		fsm_capture_update_max_capture_id(ci, max_prog_capture_id);
 	}
-}
 
-void
-fsm_capture_finalize_captures(const struct fsm *fsm,
-    size_t capture_count, struct fsm_capture *captures)
-{
-	size_t i;
-
-	/* If either pos[] is FSM_CAPTURE_NO_POS or the
-	 * COMMITTED_CAPTURE_FLAG isn't set on pos[1], then the capture
-	 * wasn't finalized; clear it. Otherwise, clear that bit so the
-	 * pos[1] offset is meaningful. */
-
-	/* FIXME: this should also take the end state(s) associated
-	 * with a capture into account, when that information is available;
-	 * otherwise there will be false positives for zero-width captures
-	 * where the paths have a common prefix. */
-	(void)fsm;
-
-	for (i = 0; i < capture_count; i++) {
-#if LOG_CAPTURE > 1
-		fprintf(stderr, "finalize[%lu]: pos[0]: %ld, pos[1]: %ld\n",
-		    i, captures[i].pos[0], captures[i].pos[1]);
-#endif
+	*prog_id = ci->programs.used;
+	ci->programs.set[ci->programs.used] = program;
+	ci->programs.used++;
+	return 1;
+}
 
-		if (captures[i].pos[0] == FSM_CAPTURE_NO_POS
-		    || captures[i].pos[1] == FSM_CAPTURE_NO_POS
-		    || (0 == (captures[i].pos[1] & COMMITTED_CAPTURE_FLAG))) {
-			captures[i].pos[0] = FSM_CAPTURE_NO_POS;
-			captures[i].pos[1] = FSM_CAPTURE_NO_POS;
-#if LOG_CAPTURE > 1
-			fprintf(stderr, "finalize: discard %lu\n", i);
-#endif
-		} else if (captures[i].pos[1] & COMMITTED_CAPTURE_FLAG) {
-			captures[i].pos[1] &=~ COMMITTED_CAPTURE_FLAG;
-		}
+const struct capvm_program *
+fsm_capture_get_program_by_id(const struct fsm *fsm, uint32_t prog_id)
+{
+	struct fsm_capture_info *ci = fsm->capture_info;
+	if (prog_id >= ci->programs.used) {
+		return NULL;
 	}
+	return ci->programs.set[prog_id];
 }
 
-void
-fsm_capture_action_iter(const struct fsm *fsm,
-    fsm_capture_action_iter_cb *cb, void *opaque)
+int
+fsm_capture_associate_program_with_end_state(struct fsm *fsm,
+	uint32_t prog_id, fsm_state_t end_state)
 {
-	size_t i;
 	struct fsm_capture_info *ci = fsm->capture_info;
-	assert(ci != NULL);
-
-	for (i = 0; i < ci->bucket_count; i++) {
-		struct fsm_capture_action_bucket *b = &ci->buckets[i];
-		if (b->state == CAPTURE_NO_STATE) {
-			continue;
-		}
+	assert(end_state < fsm->statecount);
+	assert(prog_id < ci->programs.used);
 
-		if (!cb(b->state, b->action.type,
-			b->action.id, b->action.to, opaque)) {
-			break;
-		}
+	if (!idmap_set(ci->end_capvm_program_map, end_state, prog_id)) {
+		return 0;
 	}
+	return 1;
 }
 
-const char *fsm_capture_action_type_name[] = {
-	"START", "EXTEND",
-	"COMMIT_ZERO_STEP", "COMMIT"
+struct capture_resolve_env {
+	const struct fsm_capture_info *ci;
+	const unsigned char *input;
+	const size_t length;
+
+	int res;
+	struct fsm_capture *captures;
+	size_t captures_len;
 };
 
 static int
-dump_iter_cb(fsm_state_t state,
-    enum capture_action_type type, unsigned capture_id, fsm_state_t to,
-    void *opaque)
+exec_capvm_program_cb(fsm_state_t state_id, unsigned prog_id, void *opaque)
 {
-	FILE *f = opaque;
-	fprintf(f, " - state %u, %s [capture_id: %u, to: %d]\n",
-	    state, fsm_capture_action_type_name[type], capture_id, to);
+	struct capture_resolve_env *env = opaque;
+	(void)state_id;
+
+	/* TODO: idmap_iter could take a halt return value */
+	if (env->res != 1) { return 0; }
+
+	assert(prog_id < env->ci->programs.used);
+	struct capvm_program *p = env->ci->programs.set[prog_id];
+
+	LOG(5 - LOG_EVAL, "%s: evaluating prog_id %u for state %d\n",
+	    __func__, prog_id, state_id);
+
+#define EXEC_COUNT 1		/* can be increased for benchmarking */
+
+	for (size_t i = 0; i < EXEC_COUNT; i++) {
+		const enum fsm_capvm_program_exec_res exec_res =
+		    fsm_capvm_program_exec(p,
+			(const uint8_t *)env->input, env->length,
+			env->captures, env->captures_len);
+		if (exec_res != FSM_CAPVM_PROGRAM_EXEC_SOLUTION_WRITTEN) {
+			env->res = 0;
+			return 0;
+		}
+	}
 	return 1;
 }
 
-/* Dump capture metadata about an FSM. */
-void
-fsm_capture_dump(FILE *f, const char *tag, const struct fsm *fsm)
+int
+fsm_capture_resolve_during_exec(const struct fsm *fsm,
+	fsm_state_t end_state, const unsigned char *input, size_t input_offset,
+	struct fsm_capture *captures, size_t captures_len)
 {
-	struct fsm_capture_info *ci;
-
 	assert(fsm != NULL);
-	ci = fsm->capture_info;
-	if (ci == NULL || ci->bucket_count == 0) {
-		fprintf(f, "==== %s -- no captures\n", tag);
-		return;
-	}
-
-	fprintf(f, "==== %s -- capture action hash table (%u buckets)\n",
-	    tag, ci->bucket_count);
-	fsm_capture_action_iter(fsm, dump_iter_cb, f);
+	assert(input != NULL);
+	assert(captures != NULL);
+
+	const struct fsm_capture_info *ci = fsm->capture_info;
+
+	struct capture_resolve_env capture_env = {
+		.res = 1,
+		.ci = ci,
+		.input = input,
+		.length = input_offset,
+		.captures = captures,
+		.captures_len = captures_len,
+	};
+
+	LOG(5 - LOG_EVAL, "%s: ended on state %d\n",
+	    __func__, end_state);
+	idmap_iter_for_state(ci->end_capvm_program_map,
+	    end_state, exec_capvm_program_cb, &capture_env);
+
+	return capture_env.res;
 }
diff --git a/src/libfsm/capture.h b/src/libfsm/capture.h
index 4c0ba4722..16588060c 100644
--- a/src/libfsm/capture.h
+++ b/src/libfsm/capture.h
@@ -2,28 +2,26 @@
 #define LIBFSM_CAPTURE_H
 
 #include <stdlib.h>
+#include <stdio.h>
 #include <fsm/fsm.h>
 #include <fsm/capture.h>
+#include <adt/stateset.h>
 
-#define NEXT_STATE_END ((fsm_state_t)-1)
-
+/* Internal state IDs that are out of range for valid state IDs.
+ *
+ * CAPTURE_NO_STATE is used to represent the absence of a state, such as
+ * when remapping a state to a dead state (removing it) or empty
+ * hash table buckets.
+ *
+ * NEXT_STATE_END is used as a destination for capture actions that
+ * trigger when ending on a state. */
 #define CAPTURE_NO_STATE ((fsm_state_t)-1)
 
 /* Capture interface -- functions internal to libfsm.
  * The public interface should not depend on any of these details. */
 
-enum capture_action_type {
-	/* Start an active capture if transitioning to TO. */
-	CAPTURE_ACTION_START,
-	/* Continue an active capture if transitioning to TO,
-	 * otherwise deactivate it. */
-	CAPTURE_ACTION_EXTEND,
-	/* Write a zero-step capture (i.e., the start and
-	 * end state are the same). */
-	CAPTURE_ACTION_COMMIT_ZERO_STEP,
-	/* Write an active capture's endpoints. */
-	CAPTURE_ACTION_COMMIT
-};
+struct fsm_capture_info;
+struct capvm_program;
 
 int
 fsm_capture_init(struct fsm *fsm);
@@ -31,41 +29,110 @@ fsm_capture_init(struct fsm *fsm);
 void
 fsm_capture_free(struct fsm *fsm);
 
+void
+fsm_capture_dump_active_for_ends(FILE *f, const struct fsm *fsm);
+
+void
+fsm_capture_dump_program_end_mapping(FILE *f, const struct fsm *fsm);
+
 /* Does the FSM have captures? */
 int
 fsm_capture_has_captures(const struct fsm *fsm);
 
-/* Update captures, called when exiting or ending on a state.
- * If ending on a state, use NEXT_STATE_END for next_state. */
 void
-fsm_capture_update_captures(const struct fsm *fsm,
-    fsm_state_t cur_state, fsm_state_t next_state, size_t offset,
-    struct fsm_capture *captures);
+fsm_capture_dump(FILE *f, const char *tag, const struct fsm *fsm);
+
+void
+fsm_capture_dump_programs(FILE *f, const struct fsm *fsm);
 
+/* If EXPENSIVE_CHECKS is non-zero, assert that all capture metadata on
+ * an FSM is internally consistent. */
 void
-fsm_capture_finalize_captures(const struct fsm *fsm,
-    size_t capture_count, struct fsm_capture *captures);
+fsm_capture_integrity_check(const struct fsm *fsm);
+
+int
+fsm_capture_id_compact(struct fsm *fsm, const fsm_state_t *mapping,
+    size_t orig_statecount);
 
-/* Add a capture action. This is used to update capture actions
- * in the destination FSM when combining/transforming other FSMs. */
 int
-fsm_capture_add_action(struct fsm *fsm,
-    fsm_state_t state, enum capture_action_type type,
-    unsigned id, fsm_state_t to);
-
-/* Callback for iterating over capture actions.
- * Return 1 to continue, return 0 to halt.
- * If TO is not meaningful for a particular type, it will be
- * set to NEXT_STATE_END. */
+fsm_capture_program_association_compact(struct fsm *fsm, const fsm_state_t *mapping,
+    size_t orig_statecount);
+
+/* Iterator callback for capture IDs that are active for a particular
+ * end state. Returns whether iteration should continue. */
 typedef int
-fsm_capture_action_iter_cb(fsm_state_t state,
-    enum capture_action_type type, unsigned capture_id, fsm_state_t to,
+fsm_capture_iter_active_for_end_cb(fsm_state_t state, unsigned capture_id,
     void *opaque);
 
 void
-fsm_capture_action_iter(const struct fsm *fsm,
-    fsm_capture_action_iter_cb *cb, void *opaque);
+fsm_capture_iter_active_for_end_state(const struct fsm *fsm, fsm_state_t state,
+    fsm_capture_iter_active_for_end_cb *cb, void *opaque);
+
+void
+fsm_capture_iter_active_for_all_end_states(const struct fsm *fsm,
+    fsm_capture_iter_active_for_end_cb *cb, void *opaque);
 
-extern const char *fsm_capture_action_type_name[];
+/* Iterator callback for program IDs that are active for a particular
+ * end state. Returns whether iteration should continue. */
+typedef int
+fsm_capture_iter_program_ids_for_end_state_cb(fsm_state_t state, unsigned prog_id,
+     void *opaque);
+
+void
+fsm_capture_iter_program_ids_for_end_state(const struct fsm *fsm, fsm_state_t state,
+    fsm_capture_iter_program_ids_for_end_state_cb *cb, void *opaque);
+void
+fsm_capture_iter_program_ids_for_all_end_states(const struct fsm *fsm,
+    fsm_capture_iter_program_ids_for_end_state_cb *cb, void *opaque);
+
+/* TODO: combine/rename */
+int
+fsm_capture_copy_active_for_ends(const struct fsm *src_fsm,
+	const struct state_set *states,
+	struct fsm *dst_fsm, fsm_state_t dst_state);
+int
+fsm_capture_copy_program_end_state_associations(const struct fsm *src_fsm,
+	const struct state_set *states,
+	struct fsm *dst_fsm, fsm_state_t dst_state);
+
+int
+fsm_capture_copy_programs(const struct fsm *src_fsm,
+	struct fsm *dst_fsm);
+
+size_t
+fsm_capture_program_count(const struct fsm *fsm);
+
+void
+fsm_capture_update_max_capture_id(struct fsm_capture_info *ci,
+	unsigned capture_id);
+
+int
+fsm_capture_add_program(struct fsm *fsm,
+	struct capvm_program *program, uint32_t *prog_id);
+
+const struct capvm_program *
+fsm_capture_get_program_by_id(const struct fsm *fsm, uint32_t prog_id);
+
+int
+fsm_capture_associate_program_with_end_state(struct fsm *fsm,
+	uint32_t prog_id, fsm_state_t end_state);
+
+/* Resolve captures.
+ *
+ * FIXME: With the current implementation, if enough memory
+ * was passed in then it couldn't fail, but it may be worth
+ * changing the interface so that it doesn't assume there was
+ * already a successful match in order to support one-pass
+ * matching & capture resolution attempts from a stream.
+ *
+ * TODO: This should pass in a size for captures[].
+ * TODO: An alternate interface that allows passing in
+ *       preallocated buffers for working memory.
+ *
+ * TODO: describe return value. */
+int
+fsm_capture_resolve_during_exec(const struct fsm *fsm,
+	fsm_state_t end_state, const unsigned char *input, size_t input_offset,
+	struct fsm_capture *captures, size_t captures_len);
 
 #endif
diff --git a/src/libfsm/capture_internal.h b/src/libfsm/capture_internal.h
deleted file mode 100644
index 70418b988..000000000
--- a/src/libfsm/capture_internal.h
+++ /dev/null
@@ -1,115 +0,0 @@
-#ifndef CAPTURE_INTERNAL_H
-#define CAPTURE_INTERNAL_H
-
-#include <stdlib.h>
-#include <stdint.h>
-
-#include <fsm/alloc.h>
-#include <fsm/capture.h>
-#include <fsm/fsm.h>
-
-#include <adt/edgeset.h>
-#include <adt/hash.h>
-
-#include <string.h>
-#include <assert.h>
-#include <errno.h>
-
-#include "internal.h"
-#include "capture.h"
-
-/* Bucket count for capture action hash table.
- * Must be a power of 2. */
-
-#define DEF_CAPTURE_ACTION_BUCKET_COUNT 32
-#define DEF_TRAIL_CEIL 8
-
-#define LOG_CAPTURE 0
-
-/* Most significant bit of a size_t. */
-#define COMMITTED_CAPTURE_FLAG ((SIZE_MAX) ^ (SIZE_MAX >> 1))
-
-struct fsm_capture_info {
-	unsigned max_capture_id;
-
-	/* Add-only hash table. */
-	unsigned bucket_count;
-	unsigned buckets_used; /* grow if >= 1/2 used */
-
-	/* Hash buckets. If state is CAPTURE_NO_STATE,
-	 * the bucket is empty. */
-	struct fsm_capture_action_bucket {
-		fsm_state_t state; /* key */
-		struct fsm_capture_action {
-			enum capture_action_type type;
-			unsigned id;
-			/* only used by START and EXTEND */
-			fsm_state_t to;
-		} action;
-	} *buckets;
-};
-
-enum trail_step {
-	TRAIL_STEP_START,
-	TRAIL_STEP_ITER_EDGES,
-	TRAIL_STEP_ITER_EPSILONS,
-	TRAIL_STEP_DONE
-};
-
-/* env->seen is used as a bit set for tracking which states have already
- * been processed. These macros set/check/clear the bits. */
-#define SEEN_BITOP(ENV, STATE, OP) ENV->seen[STATE/64] OP ((uint64_t)1 << (STATE&63))
-#define MARK_SEEN(ENV, STATE) SEEN_BITOP(ENV, STATE, |=)
-#define CHECK_SEEN(ENV, STATE) SEEN_BITOP(ENV, STATE, &)
-#define CLEAR_SEEN(ENV, STATE) SEEN_BITOP(ENV, STATE, &=~)
-
-struct capture_set_path_env {
-	struct fsm *fsm;
-	unsigned capture_id;
-	fsm_state_t start;
-	fsm_state_t end;
-
-	unsigned trail_i;
-	unsigned trail_ceil;
-	struct trail_cell {
-		fsm_state_t state;
-		enum trail_step step;
-		char has_self_edge;
-		struct edge_iter iter;
-	} *trail;
-
-	/* bitset for which states have already been seen. */
-	uint64_t *seen;
-};
-
-static int
-init_capture_action_htab(struct fsm *fsm, struct fsm_capture_info *ci);
-
-static int
-mark_capture_path(struct capture_set_path_env *env);
-
-static int
-add_capture_action(struct fsm *fsm, struct fsm_capture_info *ci,
-    fsm_state_t state, const struct fsm_capture_action *action);
-
-static int
-grow_capture_action_buckets(const struct fsm_alloc *alloc,
-    struct fsm_capture_info *ci);
-
-static int
-grow_trail(struct capture_set_path_env *env);
-
-static int
-step_trail_start(struct capture_set_path_env *env);
-static int
-step_trail_iter_edges(struct capture_set_path_env *env);
-static int
-step_trail_iter_epsilons(struct capture_set_path_env *env);
-static int
-step_trail_done(struct capture_set_path_env *env);
-
-static int
-cmp_action(const struct fsm_capture_action *a,
-    const struct fsm_capture_action *b);
-
-#endif
diff --git a/src/libfsm/capture_log.h b/src/libfsm/capture_log.h
new file mode 100644
index 000000000..c850460bb
--- /dev/null
+++ b/src/libfsm/capture_log.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2020 Scott Vokes
+ *
+ * See LICENCE for the full copyright terms.
+ */
+
+#ifndef CAPTURE_LOG_H
+#define CAPTURE_LOG_H
+
+/* Log levels */
+#define LOG_CAPTURE 0
+#define LOG_CAPTURE_COMBINING_ANALYSIS 0
+#define LOG_EVAL 0
+#define LOG_APPEND_ACTION 0
+#define LOG_PRINT_FSM 0
+#define LOG_MARK_PATH 0
+
+#include <stdio.h>
+
+#define LOG(LEVEL, ...)							\
+	do {								\
+		if ((LEVEL) <= LOG_CAPTURE) {				\
+			fprintf(stderr, __VA_ARGS__);			\
+		}							\
+	} while(0)
+
+#endif
diff --git a/src/libfsm/capture_vm.c b/src/libfsm/capture_vm.c
new file mode 100644
index 000000000..e6a1f0539
--- /dev/null
+++ b/src/libfsm/capture_vm.c
@@ -0,0 +1,194 @@
+/*
+ * Copyright 2022 Scott Vokes
+ *
+ * See LICENCE for the full copyright terms.
+ */
+
+/* Virtual machine for resolving captures while executing regular
+ * expressions from a subset of PCRE. This is based on the approach
+ * described in Russ Cox's "Regular Expression Matching: the Virtual
+ * Machine Approach" (https://swtch.com/~rsc/regexp/regexp2.html), but
+ * has a couple major modifications, mainly to keep memory usage low and
+ * predictable, and to be more consistent (arguably, bug-compatible...)
+ * with PCRE's behavior for libfsm's supported subset of PCRE.
+ *
+ * Instead of giving each green thread its own copy of the capture
+ * buffers, which uses a prohibitive amount of memory when combining DFAs
+ * with several captures each, operate in two passes:
+ *
+ * In the first pass, each thread keeps track of its execution path,
+ * appending a bit for each branch: 1 for the greedy option, 0 for the
+ * non-greedy. Since there can be at most one live thread per program
+ * instruction, and all of them are either on the current or next input
+ * character, there's a bounded window for diverging paths during execution.
+ * After a certain distance back all paths either have a common prefix
+ * or consist entirely of 0 bits (for continually looping at an unanchored
+ * start). The path bits are stored in chunks in a backwards linked list,
+ * so nodes for common path prefixes can be shared by multiple threads,
+ * and the prefix of all 0 bits is instead stored as a counter. This
+ * keeps memory usage substantially lower. This search runs threads in
+ * parallel, breadth-first, halting any threads that duplicate work of
+ * a greedier search path (since PCRE's results match the greediest).
+ *
+ * In the second pass, replay the execution path for just the single
+ * greediest thread, which represents the "correct" match (according to
+ * PCRE semantics), and write capture offsets into buffers passed in by
+ * the caller.
+ *
+ * Most of the other differences have to do with matching PCRE
+ * edge cases, particularly interactions between newlines and start/end
+ * anchors. */
+
+#include "capture_vm.h"
+#include "capture_vm_program.h"
+
+#include <adt/alloc.h>
+
+#include <assert.h>
+#include <ctype.h>
+#include <string.h>
+
+void
+fsm_capvm_program_free(const struct fsm_alloc *alloc,
+    struct capvm_program *program)
+{
+	if (program == NULL) { return; }
+	f_free(alloc, program->ops);
+	f_free(alloc, program->char_classes.sets);
+	f_free(alloc, program);
+}
+
+struct capvm_program *
+capvm_program_copy(const struct fsm_alloc *alloc,
+    const struct capvm_program *src)
+{
+	assert(src != NULL);
+	struct capvm_program *p = NULL;
+	struct capvm_opcode *ops = NULL;
+	struct capvm_char_class *sets = NULL;
+
+	p = f_calloc(alloc, 1, sizeof(*p));
+	if (p == NULL) { goto cleanup; }
+
+	/* This allocates exactly as many instructions and char_classes
+	 * as necessary, rather than a power-of-2 buffer, because
+	 * they are only added during compilation in libre. */
+
+	ops = f_calloc(alloc, src->used, sizeof(ops[0]));
+	if (ops == NULL) { goto cleanup; }
+
+	sets = f_calloc(alloc,
+	    /* do non-zero allocation to silence EFENCE */
+	    src->char_classes.count == 0 ? 1 : src->char_classes.count,
+	    sizeof(src->char_classes.sets[0]));
+	if (sets == NULL) { goto cleanup; }
+
+	memcpy(ops, src->ops, src->used * sizeof(src->ops[0]));
+
+	assert(src->char_classes.sets != NULL || src->char_classes.count == 0);
+	if (src->char_classes.count > 0) {
+		memcpy(sets, src->char_classes.sets,
+		    src->char_classes.count * sizeof(src->char_classes.sets[0]));
+	}
+
+	struct capvm_program np = {
+		.capture_count = src->capture_count,
+		.capture_base = src->capture_base,
+
+		.used = src->used,
+		.ceil = src->used,
+		.ops = ops,
+
+		.char_classes = {
+			.count = src->char_classes.count,
+			.ceil = src->char_classes.count,
+			.sets = sets,
+		},
+	};
+	memcpy(p, &np, sizeof(np));
+	return p;
+
+cleanup:
+	f_free(alloc, p);
+	f_free(alloc, ops);
+	f_free(alloc, sets);
+	return NULL;
+}
+
+void
+capvm_program_rebase(struct capvm_program *program, unsigned capture_offset)
+{
+	assert(program->capture_base + capture_offset > program->capture_base);
+	program->capture_base += capture_offset;
+}
+
+void
+fsm_capvm_program_dump(FILE *f,
+    const struct capvm_program *p)
+{
+	for (size_t i = 0; i < p->used; i++) {
+		const struct capvm_opcode *op = &p->ops[i];
+		switch (op->t) {
+		case CAPVM_OP_CHAR:
+			fprintf(f, "%zu: char 0x%02x (%c)\n",
+			    i, op->u.chr, isprint(op->u.chr) ? op->u.chr : '.');
+			break;
+		case CAPVM_OP_CHARCLASS:
+		{
+			const uint32_t id = op->u.charclass_id;
+			assert(id < p->char_classes.count);
+			const struct capvm_char_class *cc = &p->char_classes.sets[id];
+			fprintf(f, "%zu: charclass %u -> [", i, id);
+			for (size_t i = 0; i < 4; i++) {
+				fprintf(f, "%016lx", cc->octets[i]);
+			}
+			fprintf(f, "]\n");
+			break;
+		}
+		case CAPVM_OP_MATCH:
+			fprintf(f, "%zu: match\n", i);
+			break;
+		case CAPVM_OP_JMP:
+			fprintf(f, "%zu: jmp %u\n", i, op->u.jmp);
+			break;
+		case CAPVM_OP_JMP_ONCE:
+			fprintf(f, "%zu: jmp_once %u\n", i, op->u.jmp_once);
+			break;
+		case CAPVM_OP_SPLIT:
+			fprintf(f, "%zu: split greedy %u nongreedy %u\n", i, op->u.split.greedy, op->u.split.nongreedy);
+			break;
+		case CAPVM_OP_SAVE:
+			fprintf(f, "%zu: save %u (cap %u, %s)\n",
+			    i, op->u.save,
+			    op->u.save / 2, (op->u.save & (uint32_t)0x01) ? "end" : "start");
+			break;
+		case CAPVM_OP_ANCHOR:
+			fprintf(f, "%zu: anchor %s\n", i,
+			    op->u.anchor == CAPVM_ANCHOR_START ? "start" : "end");
+			break;
+		default:
+			assert(!"matchfail");
+		}
+	}
+	for (size_t i = 0; i < p->char_classes.count; i++) {
+		const uint64_t *octets = p->char_classes.sets[i].octets;
+		fprintf(f, "char_class %zu: 0x%016lx 0x%016lx 0x%016lx 0x%016lx\n",
+		    i, octets[0], octets[1], octets[2], octets[3]);
+	}
+}
+
+unsigned
+fsm_capvm_program_get_capture_count(const struct capvm_program *program)
+{
+	assert(program != NULL);
+	return program->capture_count;
+}
+
+unsigned
+fsm_capvm_program_get_max_capture_id(const struct capvm_program *program)
+{
+	assert(program != NULL);
+	return (program->capture_count == 0
+	    ? 0
+	    : program->capture_base + program->capture_count - 1);
+}
diff --git a/src/libfsm/capture_vm.h b/src/libfsm/capture_vm.h
new file mode 100644
index 000000000..02c198dab
--- /dev/null
+++ b/src/libfsm/capture_vm.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright 2022 Scott Vokes
+ *
+ * See LICENCE for the full copyright terms.
+ */
+
+#ifndef CAPTURE_VM_H
+#define CAPTURE_VM_H
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include <fsm/alloc.h>
+#include <fsm/capture.h>
+
+/* Interface for the virtual machine used to resolve captures.
+ * These interfaces are exposed to libre but should not be
+ * used directly. */
+
+/* Opaque struct, details in capture_vm_program.h. */
+struct capvm_program;
+
+void
+fsm_capvm_program_free(const struct fsm_alloc *alloc,
+    struct capvm_program *program);
+
+struct capvm_program *
+capvm_program_copy(const struct fsm_alloc *alloc,
+    const struct capvm_program *program);
+
+/* Add an offset to the capture ID base for a program.
+ * Used when FSMs are merged, one of the source FSMs' capture IDs
+ * will be shifted to appear after the others. */
+void
+capvm_program_rebase(struct capvm_program *program, unsigned capture_offset);
+
+void
+fsm_capvm_program_dump(FILE *f,
+    const struct capvm_program *program);
+
+enum fsm_capvm_program_exec_res {
+	FSM_CAPVM_PROGRAM_EXEC_SOLUTION_WRITTEN,
+	FSM_CAPVM_PROGRAM_EXEC_NO_SOLUTION_FOUND,
+	FSM_CAPVM_PROGRAM_EXEC_STEP_LIMIT_REACHED,
+	FSM_CAPVM_PROGRAM_EXEC_ERROR_ALLOC = -1,
+};
+
+/* Execute a capture program with the given input and populate
+ * the capture buffer.
+ *
+ * This asserts that the capture buffer is at least as large as
+ * necessary. This is an internal interface, and the buffer size
+ * should have already been checked by the caller. */
+enum fsm_capvm_program_exec_res
+fsm_capvm_program_exec(const struct capvm_program *program,
+    const uint8_t *input, size_t length,
+    struct fsm_capture *capture_buf, size_t capture_buf_length);
+
+/* Get the capture count from the program. */
+unsigned
+fsm_capvm_program_get_capture_count(const struct capvm_program *program);
+
+/* Get the max capture ID from the program.
+ * If there are no captures (which is pointless) it will return 0. */
+unsigned
+fsm_capvm_program_get_max_capture_id(const struct capvm_program *program);
+
+#endif
diff --git a/src/libfsm/capture_vm_exec.c b/src/libfsm/capture_vm_exec.c
new file mode 100644
index 000000000..9d4be066a
--- /dev/null
+++ b/src/libfsm/capture_vm_exec.c
@@ -0,0 +1,2076 @@
+/*
+ * Copyright 2022 Scott Vokes
+ *
+ * See LICENCE for the full copyright terms.
+ */
+
+#include "capture_vm.h"
+#include "capture_vm_program.h"
+#include "capture_vm_log.h"
+
+#include <stdbool.h>
+#include <assert.h>
+#include <limits.h>
+#include <ctype.h>
+
+#include <fsm/capture.h>
+
+/* for EXPENSIVE_CHECKS and TRACK_TIMES */
+#include "internal.h"
+
+#if EXPENSIVE_CHECKS
+#include <adt/hash.h>
+#endif
+
+/* Special handling for a path node that has a long prefix of all 0
+ * bits, as is common when the regex is unanchored at the start. */
+#define USE_COLLAPSED_ZERO_PREFIX 1
+
+/* Special out-of-range NONE values. */
+#define NO_POS ((uint32_t)-1)
+#define NO_ID  ((uint32_t)-1)
+#define COLLAPSED_ZERO_PREFIX_ID ((uint32_t)-2)
+#define NO_POS_SIZE_T ((size_t)-1)
+
+/* Max number of bits each path link can store.
+ * This value cannot be changed without reworking the data structures. */
+#define PATH_LINK_BITS 32
+
+/* This enables extra debugging/testing output in an easily scraped format */
+#ifndef TESTING_OPTIONS
+#define TESTING_OPTIONS 0
+#endif
+
+/* Write the solution to stdout (used for testing). */
+#define LOG_SOLUTION_TO_STDOUT (0 || TESTING_OPTIONS)
+
+/* Enable extra fields for debugging/performance tuning, most notably
+ * a 'uniq_id' field that helps to see the various execution paths. */
+#define CAPVM_STATS (0 || TESTING_OPTIONS)
+#define CAPVM_PATH_STATS (0 && CAPVM_STATS)
+
+/* Allow the path table to grow on demand.
+ * In theory it should be possible to determine the worst case
+ * based on compile-time analysis and the input length; if an
+ * appropriately sized buffer was passed in capture resolution
+ * would not need dynamic allocation at all. */
+#define ALLOW_PATH_TABLE_RESIZING 1
+
+/* Set to non-zero to trap runaway path table growth */
+#define PATH_TABLE_CEIL_LIMIT 0
+
+/* Specialized logging that can be scraped to reconstruct non-interleaved
+ * execution paths per thread. */
+#define LOG_EXECUTION 0
+#define LOG_EXECUTION_FILE stderr
+#if LOG_EXECUTION
+
+#if CAPVM_STATS == 0
+#error CAPVM_STATS must be 1 for uniq_id
+#endif
+
+/* Various execution log messages, in an easily scraped format */
+#define LOG_EXEC_OP(UNIQ_ID, INPUT_POS, OP_ID, OP_NAME)	\
+	fprintf(LOG_EXECUTION_FILE,			\
+	    "LOG_EXEC OP %u %u %u %s\n",		\
+	    UNIQ_ID, INPUT_POS, OP_ID, OP_NAME)
+
+#define LOG_EXEC_CHAR(UNIQ_ID, CHAR)		\
+	fprintf(LOG_EXECUTION_FILE,		\
+	    "LOG_EXEC CHAR %u %c 0x%02x\n", UNIQ_ID, isprint(CHAR) ? CHAR : '.', CHAR)
+
+#define LOG_EXEC_HALT(UNIQ_ID)			\
+	fprintf(LOG_EXECUTION_FILE,		\
+	    "LOG_EXEC HALT %u\n", UNIQ_ID)
+
+#define LOG_EXEC_PATH_FIND_SOLUTION(UNIQ_ID, BIT)		\
+	fprintf(LOG_EXECUTION_FILE,				\
+	    "LOG_EXEC PATH_FIND_SOLUTION %u %u\n", UNIQ_ID, BIT)
+
+#define LOG_EXEC_PATH_SAVE_CAPTURES(UNIQ_ID, BIT)		\
+	fprintf(LOG_EXECUTION_FILE,				\
+	    "LOG_EXEC PATH_SAVE_CAPTURES %u %u\n", UNIQ_ID, BIT)
+
+#define LOG_EXEC_SPLIT(PARENT_UNIQ_ID, CHILD_UNIQ_ID)		\
+	fprintf(LOG_EXECUTION_FILE,				\
+	    "LOG_EXEC SPLIT %u %u\n", PARENT_UNIQ_ID, CHILD_UNIQ_ID)
+#else
+#define LOG_EXEC_OP(UNIQ_ID, INPUT_POS, OP_ID, OP_NAME) /* no-op */
+#define LOG_EXEC_CHAR(UNIQ_ID, CHAR)			/* no-op */
+#define LOG_EXEC_HALT(UNIQ_ID)				/* no-op */
+#define LOG_EXEC_PATH_FIND_SOLUTION(UNIQ_ID, BIT)	/* no-op */
+#define LOG_EXEC_PATH_SAVE_CAPTURES(UNIQ_ID, BIT)	/* no-op */
+#define LOG_EXEC_SPLIT(PARENT_UNIQ_ID, CHILD_UNIQ_ID)	/* no-op */
+#endif
+
+/* Bitset backed by an array of 32-bit words */
+#define GET_BIT32(BITARRAY, BIT) (BITARRAY[BIT/32] & ((uint32_t)1 << (BIT & 31)))
+#define SET_BIT32(BITARRAY, BIT) (BITARRAY[BIT/32] |= ((uint32_t)1 << (BIT & 31)))
+
+static const char *
+op_name[] = {
+	[CAPVM_OP_CHAR] = "CHAR",
+	[CAPVM_OP_CHARCLASS] = "CHARCLASS",
+	[CAPVM_OP_MATCH] = "MATCH",
+	[CAPVM_OP_JMP] = "JMP",
+	[CAPVM_OP_JMP_ONCE] = "JMP_ONCE",
+	[CAPVM_OP_SPLIT] = "SPLIT",
+	[CAPVM_OP_SAVE] = "SAVE",
+	[CAPVM_OP_ANCHOR] = "ANCHOR",
+};
+
+enum pair_id { PAIR_ID_CURRENT = 0, PAIR_ID_NEXT = 1 };
+
+struct capvm {
+	const struct capvm_program *p;
+	const uint8_t *input;
+	const uint32_t input_len;
+	struct fsm_capture *capture_buf;
+	const size_t capture_buf_length;
+	size_t step_limit;
+
+#if CAPVM_STATS
+	uint32_t uniq_id_counter;
+#endif
+
+	/* Two stacks, used to track which execution instruction should
+	 * be advanced next. The current stack is
+	 * run_stacks[PAIR_ID_CURRENT], run_stacks[PAIR_ID_NEXT] is the
+	 * stack for the next input position, and when the current stack
+	 * is completed the next stack is copied over (and reversed, so
+	 * the greediest threads end up on top and resume first).
+	 * Same with run_stacks_h, the height for each stack, and the
+	 * other fields with [2] below. */
+	uint32_t *run_stacks[2];
+	uint32_t run_stacks_h[2];
+
+	/* Similarly, two columns of bits and two arrays of path_info
+	 * node IDs and uniq_ids for the execution at a particular
+	 * opcode.
+	 *
+	 * evaluated bit array[]: Has the instruction n already been
+	 * evaluated at the current input position? */
+	uint32_t *evaluated[2];
+	uint32_t *path_info_heads[2]; /* path for thread on instruction */
+#if CAPVM_STATS
+	uint32_t *uniq_ids[2];
+#endif
+
+	struct capvm_thread_stats {
+		uint32_t live;
+		uint32_t max_live;
+	} threads;
+
+	/* Pool of nodes for linked lists of path segments. */
+	struct capvm_path_info_pool {
+		uint32_t ceil;
+		uint32_t live;
+		uint32_t max_live;
+		uint32_t freelist_head;
+		struct capvm_path_info {
+			union {
+				struct capvm_path_freelist_link {
+					uint16_t refcount; /* == 0: tag for freelist node */
+					uint32_t freelist;
+				} freelist_node;
+				struct capvm_path_info_link {
+					/* refcount: When > 0 this is a path node.
+					 * This could be sticky at UINT16_MAX, but in order
+					 * to get there it would need a regex whose compiled
+					 * program has well over 2**16 instructions that all
+					 * share the same path info node. */
+					uint16_t refcount;
+					uint8_t used;      /* .bits used, <= PATH_LINK_BITS */
+					uint32_t bits;	   /* buffer for this link's path bits */
+					uint32_t offset;   /* offset into the path bit array */
+					/* Linked list to earlier path nodes, with common
+					 * nodes shared until paths diverge.
+					 *
+					 * This can be either a valid path node ID, NO_ID
+					 * for end of list, or COLLAPSED_ZERO_PREFIX_ID
+					 * to indicate that the node is preceded by
+					 * (offset) zero bits. */
+					uint32_t backlink;
+#if CAPVM_PATH_STATS
+					uint32_t bits_added_per_input_character;
+#endif
+				} path;
+			} u;
+		} *pool;
+	} paths;
+
+	struct capvm_solution_info {
+		uint32_t best_path_id;
+#if CAPVM_STATS
+		uint32_t best_path_uniq_id;
+#endif
+		uint32_t zeros_evaluated_up_to;
+	} solution;
+
+	struct {
+		size_t steps;
+#if CAPVM_STATS
+		uint32_t matches;
+		uint32_t path_prefixes_shared;
+		uint32_t collapsed_zero_prefixes;
+#endif
+#if CAPVM_PATH_STATS
+		uint32_t max_bits_added_per_input_character;
+		uint32_t max_path_length_memory;
+#endif
+	} stats;
+
+	enum fsm_capvm_program_exec_res res;
+};
+
+/* Type identifier macros */
+#define IS_THREAD_FREELIST(T) (T->u.thread.path_info_head == NO_ID)
+#define IS_PATH_FREELIST(P) (P->u.path.refcount == 0)
+#define IS_PATH_NODE(P) (P->u.path.refcount > 0 && P->u.path.used <= PATH_LINK_BITS)
+
+static void
+release_path_info_link(struct capvm *vm, uint32_t *pi_id);
+
+static void
+dump_path_table(FILE *f, const struct capvm *vm);
+
+static void
+set_max_threads_live(struct capvm *vm, uint32_t new_max_live)
+{
+	vm->threads.max_live = new_max_live;
+	if (LOG_CAPVM >= 6) {
+		LOG(0, "==== new vm->threads.max_live: %u\n", vm->threads.max_live);
+		dump_path_table(stderr, vm);
+	}
+}
+
+
+/***********************
+ * path_info functions *
+ ***********************/
+
+static void
+set_max_paths_live(struct capvm *vm)
+{
+	vm->paths.max_live = vm->paths.live;
+	if (LOG_CAPVM >= 6) {
+		LOG(0, "==== new vm->paths.max_live: %u\n", vm->paths.max_live);
+		dump_path_table(stderr, vm);
+	}
+}
+
+static uint32_t
+get_path_node_refcount(const struct capvm *vm, uint32_t p_id)
+{
+	assert(p_id < vm->paths.ceil);
+	const struct capvm_path_info *pi = &vm->paths.pool[p_id];
+	if (IS_PATH_FREELIST(pi)) {
+		return pi->u.freelist_node.refcount;
+	} else {
+		assert(IS_PATH_NODE(pi));
+		return pi->u.path.refcount;
+	}
+}
+
+static void
+inc_path_node_refcount(struct capvm *vm, uint32_t p_id)
+{
+	/* TODO: sticky refcount handling is not currently implemented */
+	if (p_id == COLLAPSED_ZERO_PREFIX_ID) { return; }
+	assert(p_id < vm->paths.ceil);
+	struct capvm_path_info *pi = &vm->paths.pool[p_id];
+	assert(IS_PATH_NODE(pi));
+	LOG(5, "%s: p_id %u: refcnt %u -> %u\n",
+	    __func__, p_id, pi->u.path.refcount, pi->u.path.refcount + 1);
+	pi->u.path.refcount++;
+}
+
+static uint32_t
+get_path_node_offset(const struct capvm *vm, uint32_t p_id)
+{
+	assert(p_id < vm->paths.ceil);
+	const struct capvm_path_info *pi = &vm->paths.pool[p_id];
+	assert(IS_PATH_NODE(pi));
+	return pi->u.path.offset;
+}
+
+static uint32_t
+get_path_node_backlink(const struct capvm *vm, uint32_t p_id)
+{
+	assert(p_id < vm->paths.ceil);
+	const struct capvm_path_info *pi = &vm->paths.pool[p_id];
+	if (IS_PATH_FREELIST(pi)) {
+		return pi->u.freelist_node.freelist;
+	} else {
+		assert(IS_PATH_NODE(pi));
+		return pi->u.path.backlink;
+	}
+}
+
+static void
+set_path_node_backlink(struct capvm *vm, uint32_t p_id, uint32_t backlink)
+{
+	assert(p_id < vm->paths.ceil);
+	assert(backlink < vm->paths.ceil || (backlink == NO_ID || backlink == COLLAPSED_ZERO_PREFIX_ID));
+	struct capvm_path_info *pi = &vm->paths.pool[p_id];
+	assert(IS_PATH_NODE(pi));
+	pi->u.path.backlink = backlink;
+}
+
+static void
+dump_path_table(FILE *f, const struct capvm *vm)
+{
+	fprintf(f, "=== path table, %u/%u live\n",
+	    vm->paths.live, vm->paths.ceil);
+	for (uint32_t i = 0; i < vm->paths.ceil; i++) {
+		struct capvm_path_info *pi = &vm->paths.pool[i];
+		if (IS_PATH_FREELIST(pi)) {
+			if (LOG_CAPVM >= 5) {
+				fprintf(f, "paths[%u]: freelist -> %d\n",
+				    i, (int)pi->u.freelist_node.freelist);
+			}
+		} else {
+			assert(IS_PATH_NODE(pi));
+			fprintf(f, "paths[%u]: refcount %u, used %u, bits 0x%08x, offset %u, backlink %d%s\n",
+			    i, pi->u.path.refcount, pi->u.path.used, pi->u.path.bits,
+			    pi->u.path.offset, (int)pi->u.path.backlink,
+			    pi->u.path.backlink == COLLAPSED_ZERO_PREFIX_ID
+			    ? " (collapsed zero prefix)"
+			    : pi->u.path.backlink == NO_ID
+			    ? " (none)"
+			    : " (link)");
+		}
+	}
+}
+
+static void
+check_path_table(const struct capvm *vm)
+{
+#if EXPENSIVE_CHECKS
+	uint32_t *refcounts = calloc(vm->paths.ceil, sizeof(refcounts[0]));
+	assert(refcounts);
+
+	if (LOG_CAPVM >= 4) {
+		dump_path_table(stderr, vm);
+	}
+
+	LOG(4, "%s: stack heights %u, %u\n", __func__,
+	    vm->run_stacks_h[PAIR_ID_CURRENT], vm->run_stacks_h[PAIR_ID_NEXT]);
+
+	for (uint32_t pair_id = 0; pair_id < 2; pair_id++) {
+		for (uint32_t h = 0; h < vm->run_stacks_h[pair_id]; h++) {
+			const uint32_t op_id = vm->run_stacks[pair_id][h];
+			if (op_id == NO_ID) { continue; }
+#if CAPVM_STATS
+			const uint32_t uniq_id = vm->uniq_ids[pair_id][op_id];
+#else
+			const uint32_t uniq_id = 0;
+#endif
+
+			LOG(4, "%s: run_stacks[%u][%u/%u]: op_id %u (uniq_id %u) -> path_info_head %u\n",
+			    __func__, pair_id, h, vm->run_stacks_h[pair_id], op_id,
+			    uniq_id, vm->path_info_heads[pair_id][op_id]);
+			if (op_id == NO_ID) { continue; }
+			const uint32_t p_id = vm->path_info_heads[pair_id][op_id];
+			if (p_id != NO_ID) {
+				refcounts[p_id]++;
+			}
+		}
+	}
+
+	for (uint32_t p_id = 0; p_id < vm->paths.ceil; p_id++) {
+		const struct capvm_path_info *pi = &vm->paths.pool[p_id];
+		if (IS_PATH_FREELIST(pi)) {
+			continue;
+		}
+		const uint32_t backlink = get_path_node_backlink(vm, p_id);
+		if (backlink != NO_ID && backlink != COLLAPSED_ZERO_PREFIX_ID) {
+			assert(backlink < vm->paths.ceil);
+			refcounts[backlink]++;
+		}
+	}
+
+	if (vm->solution.best_path_id != NO_ID) {
+		assert(vm->solution.best_path_id < vm->paths.ceil);
+		refcounts[vm->solution.best_path_id]++;
+	}
+
+	for (uint32_t p_id = 0; p_id < vm->paths.ceil; p_id++) {
+		const struct capvm_path_info *pi = &vm->paths.pool[p_id];
+		if (IS_PATH_FREELIST(pi)) { continue; }
+		bool ok;
+		const uint32_t refcount = get_path_node_refcount(vm, p_id);
+		ok = refcounts[p_id] == refcount;
+
+		if (!ok) {
+			dump_path_table(stderr, vm);
+
+			fprintf(stderr, "BAD REFCOUNT: pi[%u], expected %u, got %u\n",
+			    p_id, refcounts[p_id], refcount);
+			assert(ok);
+		}
+	}
+
+	free(refcounts);
+	LOG(6, "%s: passed\n", __func__);
+#else
+	(void)vm;
+#endif
+}
+
+static bool
+reserve_path_info_link(struct capvm *vm, uint32_t *pi_id)
+{
+	if (vm->paths.live == vm->paths.ceil) {
+#if ALLOW_PATH_TABLE_RESIZING
+		if (LOG_CAPVM >= 4) {
+			fprintf(stderr, "\n");
+			dump_path_table(stderr, vm);
+			check_path_table(vm);
+			fprintf(stderr, "\n");
+		}
+
+		const uint32_t nceil = 2*vm->paths.ceil;
+		LOG(1, "%s: growing path table %u -> %u\n",
+		    __func__, vm->paths.ceil, nceil);
+
+		/* This can legitimitely be reached with very long inputs, but
+		 * if PATH_TABLE_CEIL_LIMIT is non-zero and this is hit then
+		 * it's most likely a sign of an infinite loop. */
+		if (PATH_TABLE_CEIL_LIMIT != 0 && nceil > PATH_TABLE_CEIL_LIMIT) {
+			assert(!"reached PATH_TABLE_CEIL_LIMIT");
+		}
+
+		assert(nceil > vm->paths.ceil);
+		struct capvm_path_info *npool = realloc(vm->paths.pool,
+		    nceil * sizeof(npool[0]));
+		if (npool == NULL) {
+			return false;
+		}
+
+		for (size_t i = vm->paths.ceil; i < nceil; i++) {
+			npool[i].u.freelist_node.refcount = 0;
+			npool[i].u.freelist_node.freelist = i + 1;
+		}
+		npool[nceil - 1].u.freelist_node.refcount = 0;
+		npool[nceil - 1].u.freelist_node.freelist = NO_POS;
+		vm->paths.freelist_head = vm->paths.ceil;
+		vm->paths.ceil = nceil;
+		vm->paths.pool = npool;
+#else
+		assert(!"shouldn't need to grow path pool");
+#endif
+	}
+
+	assert(vm->paths.live < vm->paths.ceil);
+	assert(vm->paths.freelist_head != NO_POS);
+
+	*pi_id = vm->paths.freelist_head;
+	LOG(3, "%s: returning %u\n", __func__, *pi_id);
+	return true;
+}
+
+/* Release a reference to a path_info_link. Consume the argument.
+ * If the reference count reaches 0, repool the node and release
+ * its backlink. */
+static void
+release_path_info_link(struct capvm *vm, uint32_t *pi_id)
+{
+#define LOG_RELEASE_PI 0
+	size_t count = 0;
+	assert(pi_id != NULL);
+	uint32_t cur_id = *pi_id;
+	LOG(4 - LOG_RELEASE_PI, "%s: pi_id %u\n", __func__, cur_id);
+	*pi_id = NO_ID;
+
+	while (cur_id != NO_ID) {
+		struct capvm_path_info *pi = &vm->paths.pool[cur_id];
+		uint32_t refcount = get_path_node_refcount(vm, cur_id);
+		LOG(4 - LOG_RELEASE_PI, "-- checking path_info[%u]: refcount %u\n",
+		    cur_id, refcount);
+		assert(refcount > 0);
+		LOG(4 - LOG_RELEASE_PI, "release: pi[%u] refcount %u -> %u\n",
+		    cur_id, refcount, refcount - 1);
+
+		const uint32_t backlink = get_path_node_backlink(vm, cur_id);
+		assert(IS_PATH_NODE(pi));
+		pi->u.path.refcount--;
+		refcount = pi->u.path.refcount;
+
+		if (refcount > 0) {
+			break;
+		}
+
+		count++;
+		LOG(3 - LOG_RELEASE_PI, "-- repooling path_info %u, now %u live\n",
+		    cur_id, vm->paths.live - 1);
+		LOG(3 - LOG_RELEASE_PI, "-- backlink: %d\n", backlink);
+
+		pi->u.freelist_node.freelist = vm->paths.freelist_head;
+		vm->paths.freelist_head = cur_id;
+		assert(vm->paths.live > 0);
+		vm->paths.live--;
+
+		cur_id = backlink;
+		if (cur_id == COLLAPSED_ZERO_PREFIX_ID) {
+			break;
+		}
+	}
+}
+
+static void
+print_path(FILE *f, const struct capvm *vm, uint32_t p_id)
+{
+	if (p_id == NO_ID) {
+		fprintf(f, "/0");
+		return;
+	}
+
+	/* reverse links to the root node */
+	uint32_t zero_prefix = 0;
+	uint32_t next = NO_ID;
+	uint32_t first = NO_ID;
+	uint32_t prev;
+
+	while (p_id != NO_ID) {
+		assert(p_id < vm->paths.ceil);
+		struct capvm_path_info *pi = &vm->paths.pool[p_id];
+		assert(!IS_PATH_FREELIST(pi));
+
+		uint32_t bl;
+		assert(IS_PATH_NODE(pi));
+		bl = pi->u.path.backlink;
+		pi->u.path.backlink = next;
+
+		if (bl == NO_ID) {
+			prev = bl;
+			first = p_id;
+			break;
+		} else if (bl == COLLAPSED_ZERO_PREFIX_ID) {
+			prev = bl;
+			first = p_id;
+			zero_prefix = pi->u.path.offset;
+			break;
+		}
+
+		next = p_id;
+		p_id = bl;
+	}
+
+	if (zero_prefix > 0) {
+		fprintf(f, "0/%u", zero_prefix);
+	}
+
+	/* iterate forward, printing and restoring link order */
+	p_id = first;
+	while (p_id != NO_ID) {
+		assert(p_id < vm->paths.ceil);
+		struct capvm_path_info *pi = &vm->paths.pool[p_id];
+		assert(!IS_PATH_FREELIST(pi));
+
+		uint32_t fl;	/* now a forward link */
+		assert(IS_PATH_NODE(pi));
+		fl = pi->u.path.backlink;
+		pi->u.path.backlink = prev;
+		prev = p_id;
+		fprintf(f, "%s%08x/%u", prev == NO_ID ? "" : " ",
+		    pi->u.path.bits, pi->u.path.used);
+
+		p_id = fl;
+	}
+}
+
+#if EXPENSIVE_CHECKS
+SUPPRESS_EXPECTED_UNSIGNED_INTEGER_OVERFLOW()
+#endif
+static int
+cmp_paths(struct capvm *vm, uint32_t p_a, uint32_t p_b)
+{
+#if EXPENSIVE_CHECKS
+	/* When EXPENSIVE_CHECKS is set, walk the chains
+	 * before and after and compare incremental hashing of node IDs,
+	 * to ensure the chains are restored properly. */
+	uint64_t hash_a_before = 0;
+	uint64_t hash_b_before = 0;
+#endif
+
+#define LOG_CMP_PATHS 0
+	LOG(3 - LOG_CMP_PATHS, "%s: p_a %d, p_b %d\n", __func__, p_a, p_b);
+
+	if (p_a == NO_ID) {
+		return p_b == NO_ID ? 0 : -1;
+	} else if (p_b == NO_ID) {
+		return 1;
+	}
+
+	assert(p_a != p_b);
+
+	if (LOG_CAPVM >= 5) {
+		fprintf(stderr, "A: ");
+		print_path(stderr, vm, p_a);
+		fprintf(stderr, "\n");
+
+		fprintf(stderr, "B: ");
+		print_path(stderr, vm, p_b);
+		fprintf(stderr, "\n");
+	}
+
+	/* walk both paths backward until they reach a beginning
+	 * or the common prefix node, reversing links along the
+	 * way, then compare forward and restore link order. */
+	uint32_t link_a = p_a;
+	uint32_t link_b = p_b;
+
+	uint32_t fwd_a = NO_ID;
+	uint32_t fwd_b = NO_ID;
+
+	/* Walk both paths backward, individually until reaching a
+	 * common offset, then back until reaching a common prefix
+	 * (including the start). While iterating backward, replace
+	 * the .backlink field with a forward link, which will be
+	 * reverted when iterating forward and comparing from the
+	 * common prefix. */
+	bool common_prefix_found = false;
+	uint32_t first_a = NO_ID;
+	uint32_t first_b = NO_ID;
+	uint32_t common_prefix_link; /* can be NO_ID */
+
+#if EXPENSIVE_CHECKS
+	uint32_t hash_step = 0;	/* added so ordering matters */
+	while (link_a != NO_ID) {
+		assert(link_a < vm->paths.ceil);
+		const uint32_t prev = get_path_node_backlink(vm, link_a);
+		hash_a_before += hash_id(link_a + hash_step);
+		link_a = prev;
+		hash_step++;
+	}
+	hash_step = 0;
+	while (link_b != NO_ID) {
+		assert(link_b < vm->paths.ceil);
+		const uint32_t prev = get_path_node_backlink(vm, link_b);
+		hash_b_before += hash_id(link_b + hash_step);
+		link_b = prev;
+		hash_step++;
+	}
+
+	link_a = p_a;
+	link_b = p_b;
+#endif
+
+	while (!common_prefix_found) {
+		assert(link_a != NO_ID);
+		assert(link_b != NO_ID);
+		assert(link_a < vm->paths.ceil);
+		assert(link_b < vm->paths.ceil);
+
+		const uint32_t prev_a = get_path_node_backlink(vm, link_a);
+		const uint32_t prev_b = get_path_node_backlink(vm, link_b);
+		const uint32_t offset_a = get_path_node_offset(vm, link_a);
+		const uint32_t offset_b = get_path_node_offset(vm, link_b);
+		const uint32_t backlink_a = get_path_node_backlink(vm, link_a);
+		const uint32_t backlink_b = get_path_node_backlink(vm, link_b);
+
+		/* These are only used for logging, which may compile out. */
+		(void)backlink_a;
+		(void)backlink_b;
+
+		LOG(3 - LOG_CMP_PATHS,
+		    "%s: backward loop: link_a %d (offset %u, prev %d), link_b %d (offset %u, prev %d)\n",
+		    __func__, link_a, offset_a, prev_a, link_b, offset_b, prev_b);
+
+		assert((offset_a & (PATH_LINK_BITS - 1)) == 0); /* multiple of 32 */
+		assert((offset_b & (PATH_LINK_BITS - 1)) == 0); /* multiple of 32 */
+		if (offset_a > offset_b) {
+			LOG(3 - LOG_CMP_PATHS, "%s: backward loop: a longer than b\n", __func__);
+			set_path_node_backlink(vm, link_a, fwd_a);
+			fwd_a = link_a;
+			link_a = prev_a;
+		} else if (offset_b > offset_a) {
+			LOG(3 - LOG_CMP_PATHS, "%s: backward loop: b longer than a\n", __func__);
+			set_path_node_backlink(vm, link_b, fwd_b);
+			fwd_b = link_b;
+			link_b = prev_b;
+		} else {
+			assert(offset_b == offset_a);
+			LOG(3 - LOG_CMP_PATHS, "%s: backward loop: comparing backlinks: a: %d, b: %d\n",
+			    __func__, backlink_a, backlink_b);
+			assert(fwd_a != link_a);
+			set_path_node_backlink(vm, link_a, fwd_a);
+			assert(fwd_b != link_b);
+			set_path_node_backlink(vm, link_b, fwd_b);
+
+			if (prev_a == prev_b) {
+				/* if == NO_ID, empty prefix */
+				common_prefix_found = true;
+				common_prefix_link = prev_a;
+				first_a = link_a;
+				first_b = link_b;
+
+				LOG(3 - LOG_CMP_PATHS, "%s: backward loop: common_prefix_found: %d\n",
+				    __func__, common_prefix_link);
+			} else {
+				fwd_a = link_a;
+				fwd_b = link_b;
+
+				link_a = prev_a;
+				link_b = prev_b;
+			}
+		}
+	}
+
+	assert(first_a != NO_ID);
+	assert(first_b != NO_ID);
+	link_a = first_a;
+	link_b = first_b;
+
+	bool cmp_done = false;
+	int res;
+	bool done_restoring_link_order = false;
+	uint32_t prev_a = common_prefix_link;
+	uint32_t prev_b = common_prefix_link;
+	while (!done_restoring_link_order) {
+		LOG(3 - LOG_CMP_PATHS,
+		    "%s: fwd loop, link_a %d, link_b %d, cmp_done %d\n",
+		    __func__, link_a, link_b, cmp_done);
+		if (!cmp_done) {
+			if (link_a == NO_ID) { /* b is longer */
+				cmp_done = true;
+				if (link_b == NO_ID) {
+					res = 0;
+					LOG(3 - LOG_CMP_PATHS,
+					    "%s: fwd loop, equal length, res %d\n", __func__, res);
+				} else {
+					res = -1;
+					LOG(3 - LOG_CMP_PATHS,
+					    "%s: fwd loop, b is longer, res %d\n", __func__, res);
+				}
+			} else if (link_b == NO_ID) { /* a is longer */
+				cmp_done = true;
+				res = 1;
+				LOG(3 - LOG_CMP_PATHS,
+				    "%s: fwd loop, a is longer, res %d\n", __func__, res);
+			} else {
+				assert(link_a < vm->paths.ceil);
+				assert(link_b < vm->paths.ceil);
+				struct capvm_path_info *pi_a = &vm->paths.pool[link_a];
+				struct capvm_path_info *pi_b = &vm->paths.pool[link_b];
+
+				const uint32_t offset_a = get_path_node_offset(vm, link_a);
+				const uint32_t offset_b = get_path_node_offset(vm, link_b);
+
+				assert(offset_a == offset_b);
+
+				if (pi_a->u.path.bits != pi_b->u.path.bits) {
+					res = pi_a->u.path.bits < pi_b->u.path.bits ? -1 : 1;
+					cmp_done = true;
+					LOG(3 - LOG_CMP_PATHS,
+					    "%s: fwd loop, different bits (0x%08x, 0x%08x) => res %d\n",
+					    __func__, pi_a->u.path.bits, pi_b->u.path.bits, res);
+				}
+			}
+		}
+
+		/* Check if both have reached the original head node. */
+		if (link_a == NO_ID && link_b == NO_ID) {
+			done_restoring_link_order = true;
+			LOG(3 - LOG_CMP_PATHS,
+			    "%s: fwd loop: reached end of both paths, prev_a %d (p_a %d), prev_b %d (p_b %d)\n",
+			    __func__, prev_a, p_a, prev_b, p_b);
+			assert(prev_a == p_a);
+			assert(prev_b == p_b);
+		}
+
+		/* Whether or not comparison has finished, iterate forward,
+		 * restoring forward links. */
+		if (link_a != NO_ID) {
+			assert(link_a < vm->paths.ceil);
+			const uint32_t fwd_a = get_path_node_backlink(vm, link_a);
+			LOG(3 - LOG_CMP_PATHS, "%s: fwd loop: link_a %d, fwd_a %d\n",
+			    __func__, link_a, fwd_a);
+			assert(fwd_a != link_a);
+
+			LOG(3 - LOG_CMP_PATHS,
+			    "%s: fwd loop, restoring a's backlink: pi[%u].backlink <- %d\n",
+			    __func__, link_a, prev_a);
+			set_path_node_backlink(vm, link_a, prev_a);
+			prev_a = link_a;
+			link_a = fwd_a;
+		}
+
+		if (link_b != NO_ID) {
+			assert(link_b < vm->paths.ceil);
+			const uint32_t fwd_b = get_path_node_backlink(vm, link_b);
+			LOG(3 - LOG_CMP_PATHS, "%s: fwd loop: link_b %d, fwd_b %d\n",
+			    __func__, link_b, fwd_b);
+			assert(fwd_b != link_b);
+
+			LOG(3 - LOG_CMP_PATHS,
+			    "%s: fwd loop, restoring b's backlink: pi[%u].backlink <- %d\n",
+			    __func__, link_b, prev_b);
+			set_path_node_backlink(vm, link_b, prev_b);
+			prev_b = link_b;
+			link_b = fwd_b;
+		}
+	}
+
+	LOG(3 - LOG_CMP_PATHS, "%s: res %d\n", __func__, res);
+
+#if EXPENSIVE_CHECKS
+	uint64_t hash_a_after = 0;
+	uint64_t hash_b_after = 0;
+	hash_step = 0;
+	link_a = p_a;
+	while (link_a != NO_ID) {
+		assert(link_a < vm->paths.ceil);
+		const uint32_t prev = get_path_node_backlink(vm, link_a);
+		hash_a_after += hash_id(link_a + hash_step);
+		link_a = prev;
+		hash_step++;
+	}
+	link_b = p_b;
+	hash_step = 0;
+	while (link_b != NO_ID) {
+		assert(link_b < vm->paths.ceil);
+		const uint32_t prev = get_path_node_backlink(vm, link_b);
+		hash_b_after += hash_id(link_b + hash_step);
+		link_b = prev;
+		hash_step++;
+	}
+
+	assert(hash_a_after == hash_a_before);
+	assert(hash_b_after == hash_b_before);
+#endif
+
+	return res;
+#undef LOG_CMP_PATHS
+}
+
+static void
+handle_possible_matching_path(struct capvm *vm, uint32_t path_info_head, uint32_t uniq_id);
+
+static bool
+copy_path_info(struct capvm *vm, uint32_t path_info_head,
+	uint32_t *new_path_info_head);
+
+static bool
+extend_path_info(struct capvm *vm, uint32_t path_info_head, bool greedy, uint32_t uniq_id,
+	uint32_t *new_path_info_head);
+
+/* Push the next execution step onto the stack, if it hasn't already
+ * been run by a greedier path. Calling this hands off ownership of the
+ * path_info_head, so it is released if execution will not be resumed
+ * later. */
+static void
+schedule_possible_next_step(struct capvm *vm, enum pair_id pair_id,
+    uint32_t input_pos, uint32_t op_id,
+    uint32_t path_info_head, uint32_t uniq_id)
+{
+	assert(path_info_head != NO_ID);
+	uint32_t *stack = vm->run_stacks[pair_id];
+	uint32_t *stack_h = &vm->run_stacks_h[pair_id];
+	assert((*stack_h) < vm->p->used);
+
+	/* If that instruction has already been evaluated, skip the
+	 * redundant execution by a less greedy path. */
+	const uint32_t *evaluated = vm->evaluated[pair_id];
+	const bool already_evaluated = GET_BIT32(evaluated, op_id) != 0;
+	LOG(3, "%s: pair_id %u, input_pos %u, op_id %u, path_info_head %u, uniq_id %u, already_evaluated %d, stack_h %u\n",
+	    __func__, pair_id, input_pos, op_id, path_info_head, uniq_id, already_evaluated, *stack_h);
+
+	if (already_evaluated) {
+		LOG_EXEC_HALT(uniq_id);
+		release_path_info_link(vm, &path_info_head);
+		assert(vm->threads.live > 0);
+		vm->threads.live--;
+	} else {
+
+		/* If the work being scheduled by the current greediest
+		 * thread pre-empts work scheduled by a less greedy
+		 * thread, release that thread's path link and clear its
+		 * op ID on the run stack.
+		 *
+		 * TODO: Ideally, avoid the linear scan here, but the
+		 * total stack height is bounded by the generated program size
+		 * and should be fairly small in practice. Wait to change this
+		 * untill there are benchmarks in place showing it's necessary.
+		 *
+		 * An extra two bits per opcode (one for each stack) could
+		 * be used to track whether the stack already contains
+		 * op_id, so the linear scan could be avoided except when
+		 * actually necessary. */
+		uint32_t cur_pih = vm->path_info_heads[pair_id][op_id];
+		if (cur_pih != NO_ID) {
+			release_path_info_link(vm, &cur_pih);
+			vm->path_info_heads[pair_id][op_id] = NO_ID;
+			const size_t h = *stack_h;
+			for (size_t i = 0; i < h; i++) {
+				if (stack[i] == op_id) {
+					stack[i] = NO_ID; /* cancel thread */
+					vm->threads.live--;
+				}
+			}
+		}
+		stack[(*stack_h)++] = op_id;
+		vm->path_info_heads[pair_id][op_id] = path_info_head;
+#if CAPVM_STATS
+		vm->uniq_ids[pair_id][op_id] = uniq_id;
+#endif
+
+		if (*stack_h > vm->threads.max_live) {
+			vm->threads.max_live = *stack_h;
+			if (LOG_CAPVM >= 6) {
+				LOG(0, "==== new vm->threads.max_live: %u\n", vm->threads.max_live);
+				dump_path_table(stderr, vm);
+			}
+		}
+	}
+}
+
+/* returns whether the vm should continue. */
+static bool
+eval_vm_advance_greediest(struct capvm *vm, uint32_t input_pos,
+    uint32_t path_info_head, uint32_t uniq_id, uint32_t op_id)
+{
+	LOG(5, "%s: input_pos %u, input_len %u, op_id %u, threads_live %u\n",
+	    __func__, input_pos, vm->input_len, op_id, vm->threads.live);
+
+	assert(op_id < vm->p->used);
+
+	if (vm->stats.steps == vm->step_limit) {
+		LOG(1, "%s: halting, steps == step_limit %zu\n",
+		    __func__, vm->step_limit);
+		vm->res = FSM_CAPVM_PROGRAM_EXEC_STEP_LIMIT_REACHED;
+		return false;
+	}
+	vm->stats.steps++;
+
+	const struct capvm_opcode *op = &vm->p->ops[op_id];
+	LOG(2, "%s: op_id[%u]: input_pos %u, path_info_head %u, uniq_id %u, op %s\n",
+	    __func__, op_id, input_pos, path_info_head, uniq_id, op_name[op->t]);
+	LOG_EXEC_OP(uniq_id, input_pos, op_id, op_name[op->t]);
+
+	switch (op->t) {
+	case CAPVM_OP_CHAR:
+		if (input_pos == vm->input_len) {
+			goto halt_thread; /* past end of input */
+		}
+
+		LOG(3, "OP_CHAR: input_pos %u, exp char '%c', got '%c'\n",
+		    input_pos, op->u.chr, vm->input[input_pos]);
+
+		if (vm->input[input_pos] != op->u.chr) {
+			goto halt_thread; /* character mismatch */
+		}
+		LOG_EXEC_CHAR(uniq_id, vm->input[input_pos]);
+
+		schedule_possible_next_step(vm, PAIR_ID_NEXT, input_pos + 1, op_id + 1,
+		    path_info_head, uniq_id);
+		break;
+
+	case CAPVM_OP_CHARCLASS:
+	{
+		if (input_pos == vm->input_len) {
+			goto halt_thread; /* past end of input */
+		}
+
+		const uint8_t c = vm->input[input_pos];
+		const uint32_t cc_id = op->u.charclass_id;
+		assert(cc_id < vm->p->char_classes.count);
+		const struct capvm_char_class *class = &vm->p->char_classes.sets[cc_id];
+
+		if (!(class->octets[c/64] & ((uint64_t)1 << (c&63)))) {
+			goto halt_thread; /* character not in class */
+		}
+		LOG_EXEC_CHAR(uniq_id, vm->input[input_pos]);
+
+		schedule_possible_next_step(vm, PAIR_ID_NEXT, input_pos + 1, op_id + 1,
+		    path_info_head, uniq_id);
+		break;
+	}
+
+	case CAPVM_OP_MATCH:
+		if (input_pos == vm->input_len) {
+			handle_possible_matching_path(vm, path_info_head, uniq_id);
+		} else if (vm->input_len > 0 && input_pos == vm->input_len - 1
+		    && vm->input[input_pos] == '\n') {
+			LOG(3, "OP_MATCH: special case for trailing newline\n");
+			handle_possible_matching_path(vm, path_info_head, uniq_id);
+		}
+		goto halt_thread;
+
+	case CAPVM_OP_JMP:
+		schedule_possible_next_step(vm, PAIR_ID_CURRENT, input_pos, op->u.jmp,
+		    path_info_head, uniq_id);
+		break;
+
+	case CAPVM_OP_JMP_ONCE:
+	{
+		/* If the destination for this jump has already been visited
+		 * without advancing input, then skip the jump. This is necessary
+		 * for edge cases like the first branch in `^(^|.$)*`, which would
+		 * otherwise have a backward jump to before the first case, due to
+		 * the repetition, and would effectively be treated as an infinite
+		 * loop and ignored, leading to incorrect match bounds for "x".
+		 *
+		 * Replaying the capture path does not track what has been evaluated,
+		 * so this needs to record the branch in the path. This will make
+		 * repetition more expensive in some cases, but compilation could
+		 * emit a JMP when it's safe to do so. */
+		const bool greedy = GET_BIT32(vm->evaluated[PAIR_ID_CURRENT], op->u.jmp_once);
+		if (greedy) {
+			/* non-greedy branch -- fall through */
+			uint32_t new_path_info_head = NO_ID;
+			if (!extend_path_info(vm, path_info_head, 0, uniq_id, &new_path_info_head)) {
+				release_path_info_link(vm, &path_info_head);
+				goto alloc_error;
+			}
+			schedule_possible_next_step(vm, PAIR_ID_CURRENT, input_pos, op_id + 1,
+			    new_path_info_head, uniq_id);
+		} else {
+			/* greedy branch -- loop back and potentially match more */
+			uint32_t new_path_info_head = NO_ID;
+			if (!extend_path_info(vm, path_info_head, 1, uniq_id, &new_path_info_head)) {
+				release_path_info_link(vm, &path_info_head);
+				goto alloc_error;
+			}
+			schedule_possible_next_step(vm, PAIR_ID_CURRENT, input_pos, op->u.jmp_once,
+			    new_path_info_head, uniq_id);
+		}
+		break;
+	}
+
+	case CAPVM_OP_SPLIT:
+	{
+		const uint32_t dst_greedy = op->u.split.greedy;
+		const uint32_t dst_nongreedy = op->u.split.nongreedy;
+
+		/* destinations must be in range and not self-referential */
+		assert(dst_greedy < vm->p->used);
+		assert(dst_nongreedy < vm->p->used);
+		assert(dst_greedy != op_id);
+		assert(dst_nongreedy != op_id);
+
+		uint32_t nongreedy_path_info_head;
+		if (!copy_path_info(vm, path_info_head, &nongreedy_path_info_head)) {
+			goto alloc_error;
+		}
+
+		if (!extend_path_info(vm, path_info_head, 1, uniq_id, &path_info_head)) {
+			release_path_info_link(vm, &path_info_head);
+			goto alloc_error;
+		}
+
+		/* nongreedy is the non-greedy branch */
+		if (!extend_path_info(vm, nongreedy_path_info_head, 0, uniq_id, &nongreedy_path_info_head)) {
+			release_path_info_link(vm, &path_info_head);
+			goto alloc_error;
+		}
+
+#if CAPVM_STATS
+		const uint32_t nongreedy_uniq_id = ++vm->uniq_id_counter;
+#else
+		const uint32_t nongreedy_uniq_id = 0;
+#endif
+
+		vm->threads.live++;
+		if (vm->threads.live > vm->threads.max_live) {
+			set_max_threads_live(vm, vm->threads.live);
+		}
+
+		/* Push the split.nongreedy destination, and then the
+		 * split.greedy destination on top of it, so that the
+		 * greedier branch will be fully evaluated first. */
+		schedule_possible_next_step(vm, PAIR_ID_CURRENT, input_pos, dst_nongreedy,
+		    nongreedy_path_info_head, nongreedy_uniq_id);
+		schedule_possible_next_step(vm, PAIR_ID_CURRENT, input_pos, dst_greedy,
+		    path_info_head, uniq_id);
+		LOG_EXEC_SPLIT(uniq_id, nongreedy_uniq_id);
+
+	break;
+	}
+
+	case CAPVM_OP_SAVE:
+		/* no-op, during this stage */
+		schedule_possible_next_step(vm, PAIR_ID_CURRENT, input_pos, op_id + 1,
+		    path_info_head, uniq_id);
+		break;
+
+	case CAPVM_OP_ANCHOR:
+		if (op->u.anchor == CAPVM_ANCHOR_START) {
+			LOG(3, "%s: ^ anchor\n", __func__);
+			/* ignore a single trailing newline, because PCRE does.
+			 * For ^ this affects the capture position. */
+			if (input_pos == 0
+			    && vm->input_len == 1
+			    && vm->input[0] == '\n') {
+				/* allowed */
+				LOG(3, "%s: special case: ^ ignoring trailing newline\n", __func__);
+				schedule_possible_next_step(vm, PAIR_ID_CURRENT, input_pos, op_id + 1,
+				    path_info_head, uniq_id);
+				break;
+			} else if (input_pos == 1
+			    && vm->input_len == 1
+			    && vm->input[0] == '\n') {
+				/* allowed */
+			} else if (input_pos != 0) { goto halt_thread; }
+		} else {
+			assert(op->u.anchor == CAPVM_ANCHOR_END);
+			LOG(3, "%s: $ anchor: input_len %u, input_pos %u\n",
+			    __func__, vm->input_len, input_pos);
+
+			/* ignore a single trailing newline, because PCRE does */
+			if (vm->input_len > 0 && input_pos == vm->input_len - 1) {
+				if (vm->input[input_pos] != '\n') {
+					goto halt_thread;
+				}
+				LOG(3, "%s: special case: $ allowing trailing newline\n", __func__);
+				schedule_possible_next_step(vm, PAIR_ID_CURRENT, input_pos, op_id + 1,
+				    path_info_head, uniq_id);
+				break;
+			} else if (input_pos != vm->input_len) {
+				goto halt_thread;
+			}
+		}
+
+		schedule_possible_next_step(vm, PAIR_ID_CURRENT, input_pos, op_id + 1,
+		    path_info_head, uniq_id);
+		break;
+
+	default:
+		assert(!"unreachable");
+		return false;
+	}
+
+	if (EXPENSIVE_CHECKS) { /* postcondition */
+		check_path_table(vm);
+	}
+
+	return true;
+
+halt_thread:
+	/* do not push further execution on the run stack */
+	LOG_EXEC_HALT(uniq_id);
+
+	release_path_info_link(vm, &path_info_head);
+	assert(vm->threads.live > 0);
+	vm->threads.live--;
+	return true;
+
+alloc_error:
+	release_path_info_link(vm, &path_info_head);
+	vm->res = FSM_CAPVM_PROGRAM_EXEC_ERROR_ALLOC;
+	return false;
+}
+
+static void
+handle_possible_matching_path(struct capvm *vm, uint32_t pi_id, uint32_t uniq_id)
+{
+	LOG(3, "\n%s: HIT, pi_id %u, uniq_id %u\n", __func__, pi_id, uniq_id);
+
+	if (LOG_CAPVM >= 3) {
+		LOG(3, "--- current_live: %u, max_live: %u\n",
+		    vm->threads.live, vm->threads.max_live);
+		dump_path_table(stderr, vm);
+		LOG(3, "=====\n");
+	}
+
+#if CAPVM_STATS
+	vm->stats.matches++;
+#endif
+
+	assert(pi_id < vm->paths.ceil);
+
+	if (vm->solution.best_path_id == NO_ID) {
+		struct capvm_path_info *pi = &vm->paths.pool[pi_id];
+		assert(!IS_PATH_FREELIST(pi));
+		if (LOG_CAPVM >= 5) {
+			const uint32_t refcount = get_path_node_refcount(vm, pi_id);
+			LOG(5, "MATCH: pi_id %u refcount %u -> %u\n",
+			    pi_id, refcount, refcount + 1);
+		}
+		inc_path_node_refcount(vm, pi_id);
+		vm->solution.best_path_id = pi_id;
+#if CAPVM_STATS
+		vm->solution.best_path_uniq_id = uniq_id;
+#endif
+		LOG(3, "MATCH: saved current best solution path (pi_id %u)\n", pi_id);
+	} else {
+		/* Compare path info and only keep the path associated
+		 * with the greediest match so far. */
+		const int res = cmp_paths(vm, pi_id, vm->solution.best_path_id);
+		if (res > 0) {
+			/* replace current best solution */
+			struct capvm_path_info *pi = &vm->paths.pool[pi_id];
+			assert(!IS_PATH_FREELIST(pi));
+			if (LOG_CAPVM >= 5) {
+				const uint32_t refcount = get_path_node_refcount(vm, pi_id);
+				LOG(5, "MATCH: pi_id %u refcount %u -> %u\n",
+				    pi_id, refcount, refcount + 1);
+			}
+			inc_path_node_refcount(vm, pi_id);
+
+			LOG(3, "MATCH: replacing current best solution path %u with %u\n",
+			    vm->solution.best_path_id, pi_id);
+
+			release_path_info_link(vm, &vm->solution.best_path_id);
+			vm->solution.best_path_id = pi_id;
+#if CAPVM_STATS
+			vm->solution.best_path_uniq_id = uniq_id;
+#endif
+		} else {
+			/* keep the current best solution */
+			LOG(3, "MATCH: ignoring new solution path %u, keeping %u\n",
+			    pi_id, vm->solution.best_path_id);
+		}
+	}
+}
+
+static bool
+eval_vm(struct capvm *vm)
+{
+	uint32_t i_i;
+
+	/* init the path_info_heads tables to NO_ID, except for cell 0
+	 * in next, which contains the starting point. */
+	for (size_t op_i = 0; op_i < vm->p->used; op_i++) {
+		vm->path_info_heads[PAIR_ID_CURRENT][op_i] = NO_ID;
+#if CAPVM_STATS
+		vm->uniq_ids[PAIR_ID_CURRENT][op_i] = NO_ID;
+#endif
+	}
+	for (size_t op_i = 1; op_i < vm->p->used; op_i++) {
+		vm->path_info_heads[PAIR_ID_NEXT][op_i] = NO_ID;
+#if CAPVM_STATS
+		vm->uniq_ids[PAIR_ID_NEXT][op_i] = NO_ID;
+#endif
+	}
+
+	for (i_i = 0; i_i <= vm->input_len; i_i++) {
+		if (vm->threads.live == 0
+		    || vm->stats.steps == vm->step_limit) {
+			LOG(3, "%s: breaking, live %u, steps %zu/%zd\n",
+			    __func__, vm->threads.live, vm->stats.steps, vm->step_limit);
+			break;
+		}
+		LOG(3, "\n###### i_i %u\n", i_i);
+
+		LOG(4, "-- clearing evaluated\n");
+		const size_t evaluated_bit_words = vm->p->used/32 + 1;
+		for (size_t i = 0; i < evaluated_bit_words; i++) {
+			vm->evaluated[PAIR_ID_CURRENT][i] = 0;
+			vm->evaluated[PAIR_ID_NEXT][i] = 0;
+		}
+
+		uint32_t *stack_h = &vm->run_stacks_h[PAIR_ID_CURRENT];
+		uint32_t *run_stack = vm->run_stacks[PAIR_ID_CURRENT];
+
+		/* Copy everything from the next run stack to the
+		 * current. Copy in reverse, so items that were pushed
+		 * earlier by greedier paths end up on the top of the
+		 * stack and evalated first, preserving greedy
+		 * ordering. */
+		{
+			const uint32_t next_stack_h = vm->run_stacks_h[PAIR_ID_NEXT];
+			const uint32_t *next_stack = vm->run_stacks[PAIR_ID_NEXT];
+			uint32_t *next_path_info_heads = vm->path_info_heads[PAIR_ID_NEXT];
+			uint32_t *cur_path_info_heads = vm->path_info_heads[PAIR_ID_CURRENT];
+
+			uint32_t discarded = 0;
+			for (size_t i = 0; i < next_stack_h; i++) {
+				const uint32_t op_id = next_stack[i];
+				if (op_id == NO_ID) {
+					assert(!"unreachable");
+					discarded++;
+					continue;
+				}
+
+				cur_path_info_heads[op_id] = next_path_info_heads[op_id];
+				LOG(3, "%s: run_stack[%zd] <- %u, path_info_head %u\n",
+				    __func__, i, op_id, cur_path_info_heads[op_id]);
+				assert(next_path_info_heads[op_id] < vm->paths.ceil);
+				next_path_info_heads[op_id] = NO_ID; /* move reference */
+#if CAPVM_STATS
+				vm->uniq_ids[PAIR_ID_CURRENT][op_id] =
+				    vm->uniq_ids[PAIR_ID_NEXT][op_id];
+#endif
+				run_stack[next_stack_h - i - 1 - discarded] = op_id;
+			}
+			*stack_h = next_stack_h - discarded;
+			vm->run_stacks_h[PAIR_ID_NEXT] = 0;
+
+#if CAPVM_PATH_STATS
+			/* reset counters */
+			for (size_t i = 0; i < vm->paths.ceil; i++) {
+				struct capvm_path_info *pi = &vm->paths.pool[i];
+				if (IS_PATH_NODE(pi)) {
+					pi->u.path.bits_added_per_input_character = 0;
+				}
+			}
+#endif
+		}
+
+		uint32_t *path_info_heads = vm->path_info_heads[PAIR_ID_CURRENT];
+		while (vm->run_stacks_h[PAIR_ID_CURRENT] > 0) {
+			/* Do this here, before popping, so that the reference
+			 * on the stack can be counted properly. */
+			if (EXPENSIVE_CHECKS) {
+				check_path_table(vm);
+			}
+
+			const uint32_t h = --(*stack_h);
+			assert(h < vm->p->used);
+			const uint32_t op_id = run_stack[h];
+			LOG(4, "%s: popped op_id %d off stack\n", __func__, op_id);
+			if (op_id == NO_ID) {
+				LOG(4, "%s: ignoring halted pending execution\n", __func__);
+				continue;
+			}
+			assert(op_id < vm->p->used);
+
+			if (GET_BIT32(vm->evaluated[PAIR_ID_CURRENT], op_id)) {
+				LOG(2, "%s: evaluated[current] already set for op_id %u (popped off stack), skipping\n",
+				    __func__, op_id);
+				assert(!"unreachable");
+				continue;
+			}
+
+			LOG(4, "%s: setting evaluated[current] for op_id %u (popped off stack)\n", __func__, op_id);
+			SET_BIT32(vm->evaluated[PAIR_ID_CURRENT], op_id);
+
+			const uint32_t path_info_head = path_info_heads[op_id];
+			LOG(4, "%s: op_id %d's path_info_head: %d\n", __func__, op_id, path_info_head);
+			path_info_heads[op_id] = NO_ID;
+
+
+#if CAPVM_STATS
+			const uint32_t uniq_id = vm->uniq_ids[PAIR_ID_CURRENT][op_id];
+			assert(uniq_id != NO_ID);
+#else
+			const uint32_t uniq_id = 0;
+#endif
+			if (!eval_vm_advance_greediest(vm, i_i, path_info_head, uniq_id, op_id)) {
+				return false;
+			}
+		}
+
+
+#if CAPVM_PATH_STATS
+		uint32_t max_path_bits_added = 0;
+		for (size_t i = 0; i < vm->paths.ceil; i++) {
+			const struct capvm_path_info *pi = &vm->paths.pool[i];
+			if (IS_PATH_NODE(pi)) {
+				if (pi->u.path.bits_added_per_input_character > max_path_bits_added) {
+					max_path_bits_added = pi->u.path.bits_added_per_input_character;
+				}
+			}
+		}
+		LOG(2, "%s: input_i %u: max_path_bits_added: %u\n",
+		    __func__, i_i, max_path_bits_added);
+		if (max_path_bits_added > vm->stats.max_bits_added_per_input_character) {
+			vm->stats.max_bits_added_per_input_character = max_path_bits_added;
+		}
+
+		if (CAPVM_PATH_STATS > 1) {
+			dump_path_table(stderr, vm);
+		}
+#endif
+	}
+
+	return vm->solution.best_path_id != NO_ID;
+}
+
+static bool
+copy_path_info(struct capvm *vm, uint32_t path_info_head,
+    uint32_t *new_path_info_head)
+{
+	if (!reserve_path_info_link(vm, new_path_info_head)) {
+		return false;
+	}
+
+	assert(path_info_head != NO_ID);
+	assert(path_info_head < vm->paths.ceil);
+	assert(*new_path_info_head < vm->paths.ceil);
+	assert(*new_path_info_head != path_info_head);
+
+	/* Since this is the path head, it can never be a collapsed
+	 * zero prefix node. */
+	const struct capvm_path_info *pi = &vm->paths.pool[path_info_head];
+	assert(IS_PATH_NODE(pi));
+
+	struct capvm_path_info *npi = &vm->paths.pool[*new_path_info_head];
+	assert(IS_PATH_FREELIST(npi));
+
+	/* unlink from freelist */
+	vm->paths.freelist_head = npi->u.freelist_node.freelist;
+	vm->paths.live++;
+	if (vm->paths.live > vm->paths.max_live) {
+		set_max_paths_live(vm);
+	}
+
+	*npi = (struct capvm_path_info){
+		.u.path = {
+			.refcount = 1,
+			.used = pi->u.path.used,
+			.bits = pi->u.path.bits,
+			.offset = pi->u.path.offset,
+			.backlink = pi->u.path.backlink,
+		}
+	};
+	if (pi->u.path.backlink != NO_ID) {
+		inc_path_node_refcount(vm, pi->u.path.backlink);
+	}
+	return true;
+}
+
+#if CAPVM_PATH_STATS
+static void
+update_max_path_length_memory(struct capvm *vm, const struct capvm_path_info *pi)
+{
+	const uint32_t len = pi->u.path.used +
+	    (pi->u.path.backlink == COLLAPSED_ZERO_PREFIX_ID
+		? 0		/* not actually stored, so don't count it */
+		: pi->u.path.offset);
+
+	if (len > vm->stats.max_path_length_memory) {
+		vm->stats.max_path_length_memory = len;
+	}
+}
+#endif
+
+static bool
+extend_path_info(struct capvm *vm, uint32_t pi_id, bool greedy, uint32_t uniq_id,
+    uint32_t *new_path_info_head)
+{
+	assert(pi_id < vm->paths.ceil);
+	struct capvm_path_info *pi = &vm->paths.pool[pi_id];
+	assert(IS_PATH_NODE(pi));
+
+	(void)uniq_id;
+	LOG_EXEC_PATH_FIND_SOLUTION(uniq_id, greedy);
+
+#define LOG_EPI 0
+	LOG(5 - LOG_EPI, "%s: pi_id %u, greedy %d\n",
+	    __func__, pi_id, greedy);
+
+
+	if (pi->u.path.used == PATH_LINK_BITS) { /* full */
+		uint32_t npi_id;
+		if (!reserve_path_info_link(vm, &npi_id)) {
+			assert(!"alloc fail");
+		}
+		pi = &vm->paths.pool[pi_id]; /* refresh stale pointer */
+		LOG(5 - LOG_EPI, "%s: new head at %u (%u is full)\n", __func__, npi_id, pi_id);
+		assert(npi_id < vm->paths.ceil);
+		struct capvm_path_info *npi = &vm->paths.pool[npi_id];
+		vm->paths.freelist_head = npi->u.freelist_node.freelist;
+		vm->paths.live++;
+		if (vm->paths.live > vm->paths.max_live) {
+			set_max_paths_live(vm);
+		}
+
+		LOG(5 - LOG_EPI, "%s: npi_id %u refcount 1 (new link)\n",
+		    __func__, npi_id);
+		npi->u.path.refcount = 1;
+		npi->u.path.offset = pi->u.path.offset + pi->u.path.used;
+		npi->u.path.bits = (greedy ? ((uint32_t)1 << 31) : 0);
+		LOG(5 - LOG_EPI, "%s: bits after: 0x%08x\n", __func__, npi->u.path.bits);
+		npi->u.path.used = 1;
+
+#if CAPVM_PATH_STATS
+		npi->u.path.bits_added_per_input_character = pi->u.path.bits_added_per_input_character + 1;
+#endif
+
+		/* If the path node is full of zero bits and it's either at the start,
+		 * or its backlink is a COLLAPSED_ZERO_PREFIX_ID, then extend the
+		 * backlink to a collapsed run of zeroes. The node's offset field
+		 * indicates the prefix length. Long prefixes of zero bits tend to
+		 * occur with an unanchored start loop. */
+		if (pi->u.path.bits == (uint32_t)0 && USE_COLLAPSED_ZERO_PREFIX
+		    && (pi->u.path.offset == 0 || pi->u.path.backlink == COLLAPSED_ZERO_PREFIX_ID)) {
+			release_path_info_link(vm, &pi_id);
+			pi_id = COLLAPSED_ZERO_PREFIX_ID;
+
+#if CAPVM_STATS
+			vm->stats.collapsed_zero_prefixes++;
+#endif
+		} else {
+			/* Check if there's an existing full path node with
+			 * exactly the same bits. If so, link backward to that
+			 * and free the old full one, rather than saving it as
+			 * a duplicate. */
+			const uint32_t old_path_bits = pi->u.path.bits;
+			const uint32_t old_path_offset = pi->u.path.offset;
+			const uint32_t old_path_backlink = pi->u.path.backlink;
+
+			for (uint32_t epi_id = 0; epi_id < vm->paths.ceil; epi_id++) {
+				if (epi_id == pi_id) { continue; }
+				struct capvm_path_info *epi = &vm->paths.pool[epi_id];
+				if (IS_PATH_FREELIST(epi)) {
+					continue;
+				}
+
+				assert(IS_PATH_NODE(epi));
+				if (epi->u.path.used == PATH_LINK_BITS
+				    && epi->u.path.bits == old_path_bits
+				    && epi->u.path.offset == old_path_offset
+				    && epi->u.path.backlink == old_path_backlink) {
+
+					if (LOG_CAPVM >= 4 || 1) {
+						const uint32_t refcount = get_path_node_refcount(vm, epi_id);
+						(void)refcount;
+						LOG(4 - LOG_EPI, "%s: pi[%u] refcount %u -> %u (reusing identical path backlink %u instead of %u)\n",
+						    __func__, epi_id, refcount, refcount + 1,
+						    epi_id, pi_id);
+					}
+					inc_path_node_refcount(vm, epi_id);
+					release_path_info_link(vm, &pi_id);
+					pi_id = epi_id;
+#if CAPVM_STATS
+					vm->stats.path_prefixes_shared++;
+#endif
+					break;
+				}
+			}
+		}
+
+		assert(IS_PATH_NODE(npi));
+		npi->u.path.backlink = pi_id;
+		/* transfer pi_id's reference to npi_id */
+		*new_path_info_head = npi_id;
+
+#if CAPVM_PATH_STATS
+		update_max_path_length_memory(vm, npi);
+#endif
+
+		return true;
+	} else {
+		assert(IS_PATH_NODE(pi));
+		assert(pi->u.path.used < PATH_LINK_BITS);
+
+		LOG(5 - LOG_EPI, "%s: appending to head node %u, %u -> %u used\n",
+		    __func__, pi_id, pi->u.path.used, pi->u.path.used + 1);
+		assert(pi->u.path.used < PATH_LINK_BITS);
+		if (greedy) {
+			LOG(5 - LOG_EPI, "%s: bits before: 0x%08x (greedy: %d)\n",
+			    __func__, pi->u.path.bits, greedy);
+			pi->u.path.bits |= (uint32_t)1 << (31 - pi->u.path.used);
+			LOG(5 - LOG_EPI, "%s: bits after: 0x%08x\n",
+			    __func__, pi->u.path.bits);
+		}
+		pi->u.path.used++;
+#if CAPVM_PATH_STATS
+		pi->u.path.bits_added_per_input_character++;
+#endif
+
+#if CAPVM_PATH_STATS
+		update_max_path_length_memory(vm, pi);
+#endif
+
+		*new_path_info_head = pi_id;
+		return true;
+	}
+#undef LOG_EPI
+}
+
+static void
+populate_solution(struct capvm *vm)
+{
+	if (LOG_CAPVM >= 3) {
+		fsm_capvm_program_dump(stderr, vm->p);
+		LOG(0, "%s: best_path_id %d, tables:\n", __func__, vm->solution.best_path_id);
+		dump_path_table(stderr, vm);
+		check_path_table(vm);
+		fprintf(stderr, "SOLUTION_PATH: ");
+		print_path(stderr, vm, vm->solution.best_path_id);
+		fprintf(stderr, "\n");
+	}
+
+#if CAPVM_PATH_STATS
+	LOG(1, "%s: prog_size %u, max_path_length_memory %u (bits)\n",
+	    __func__, vm->p->used, vm->stats.max_path_length_memory);
+	const uint32_t uniq_id = vm->solution.best_path_uniq_id;
+#else
+	const uint32_t uniq_id = NO_ID;
+#endif
+	(void)uniq_id;
+
+	/* Interpret the program again, but rather than using the input to
+	 * drive execution, use the saved path for the primary solution. */
+
+	/* Walk the solution path, reversing the edges temporarily so it
+	 * can be executed start to finish, and truncate any bits appended
+	 * after branches on the path. */
+	assert(vm->solution.best_path_id != NO_ID);
+	assert(vm->solution.best_path_id < vm->paths.ceil);
+
+	uint32_t path_link = vm->solution.best_path_id;
+	uint32_t next_link = NO_ID;
+	uint32_t first_link = NO_ID;
+
+	size_t split_count = 0;
+	uint32_t zero_prefix_length = 0;
+
+	if (LOG_CAPVM >= 3) {
+		const struct capvm_path_info *pi = &vm->paths.pool[path_link];
+		assert(!IS_PATH_FREELIST(pi));
+		LOG(3, "%s: best_path %d, path_length %u\n",
+		    __func__, vm->solution.best_path_id, pi->u.path.offset + pi->u.path.used);
+		if (LOG_CAPVM > 4) {
+			dump_path_table(stderr, vm);
+		}
+	}
+
+	uint32_t prev;
+	do {
+		struct capvm_path_info *pi = &vm->paths.pool[path_link];
+		assert(!IS_PATH_FREELIST(pi));
+		const uint32_t prev_link = get_path_node_backlink(vm, path_link);
+
+		if (LOG_CAPVM >= 3) {
+			if (IS_PATH_NODE(pi)) {
+				LOG(3, "%s (moving back), node %u: refcount %u, used %u, offset %u, backlink %d, bits '",
+				    __func__, path_link, pi->u.path.refcount, pi->u.path.used,
+				    pi->u.path.offset, pi->u.path.backlink);
+				for (uint8_t i = 0; i < pi->u.path.used; i++) {
+					const uint32_t bit = (pi->u.path.bits & ((uint32_t)1 << (31 - i)));
+					LOG(3, "%c", bit ? '1' : '0');
+				}
+				LOG(3, "'\n");
+			}
+		}
+
+		split_count += pi->u.path.used;
+
+		if (next_link != NO_ID) {
+			LOG(3, "-- setting backlink to %d\n", next_link);
+			set_path_node_backlink(vm, path_link, next_link); /* point fwd */
+		} else {
+			LOG(3, "-- setting backlink to %d\n", NO_ID);
+			set_path_node_backlink(vm, path_link, NO_ID); /* now EOL */
+		}
+
+		if (prev_link == NO_ID) {
+			first_link = path_link;
+			prev = prev_link;
+		} else if (prev_link == COLLAPSED_ZERO_PREFIX_ID) {
+			first_link = path_link;
+			split_count += pi->u.path.offset;
+			zero_prefix_length = pi->u.path.offset;
+			prev = prev_link;
+		}
+
+		next_link = path_link;
+		assert(path_link != prev_link);
+		path_link = prev_link;
+	} while (path_link != NO_ID && path_link != COLLAPSED_ZERO_PREFIX_ID);
+
+	/* iter forward */
+	if (LOG_CAPVM >= 3) {
+		uint32_t cur = first_link;
+		do {
+			struct capvm_path_info *pi = &vm->paths.pool[cur];
+
+			assert(IS_PATH_NODE(pi));
+			LOG(3, "%s (moving fwd): node %u: refcount %u, used %u, offset %u, fwdlink %d, bits '",
+			    __func__, cur, get_path_node_refcount(vm, cur),
+			    pi->u.path.used,
+			    get_path_node_offset(vm, cur),
+			    get_path_node_backlink(vm, cur));
+			for (uint8_t i = 0; i < pi->u.path.used; i++) {
+				const uint32_t bit = (pi->u.path.bits & ((uint32_t)1 << (31 - i)));
+				LOG(3, "%c", bit ? '1' : '0');
+			}
+			LOG(3, "'\n");
+
+			const uint32_t next_cur = get_path_node_backlink(vm, cur);
+			assert(cur != next_cur);
+			cur = next_cur;	/* fwd link */
+		} while (cur != NO_ID);
+	}
+
+	/* evaluate program with forward path */
+	LOG(3, "%s: split_count %zu\n", __func__, split_count);
+	size_t split_i = 0;
+	uint32_t prog_i = 0;
+	uint32_t input_i = 0;
+	size_t capture_lookup_steps = 0;
+	bool done = false;
+
+	/* This flag tracks whether an explicit newline was matched at
+	 * the end of input. Normally a trailing newline is implicitly
+	 * ignored in the bounds for captures, but when the regex
+	 * matches a newline at the end, it must still be included. An
+	 * example case where this matters is `^[^x]$` for "\n", because
+	 * the character class matches the newline this should capture
+	 * as (0,1). */
+	bool explicitly_matched_nl_at_end = false;
+
+	uint32_t cur = first_link;
+	while (split_i < split_count || !done) {
+		assert(prog_i < vm->p->used);
+		const uint32_t cur_prog_i = prog_i;
+		const struct capvm_opcode *op = &vm->p->ops[cur_prog_i];
+		LOG(3, "%s: i_i %u, p_i %u, s_i %zu/%zu, op %s\n",
+		    __func__, input_i, cur_prog_i, split_i, split_count, op_name[op->t]);
+
+		prog_i++;
+		capture_lookup_steps++;
+		switch (op->t) {
+		case CAPVM_OP_CHAR:
+			assert(input_i < vm->input_len);
+			LOG(3, "OP_CHAR: input_i %u, exp char '%c', got '%c'\n",
+			    input_i, op->u.chr, vm->input[input_i]);
+			assert(vm->input[input_i] == op->u.chr);
+			if (vm->input_len > 0
+			    && input_i == vm->input_len - 1
+			    && vm->input[input_i] == '\n') {
+				explicitly_matched_nl_at_end = true;
+			}
+			input_i++;
+			break;
+		case CAPVM_OP_CHARCLASS:
+			assert(input_i < vm->input_len);
+			if (vm->input_len > 0
+			    && input_i == vm->input_len - 1
+			    && vm->input[input_i] == '\n') {
+				explicitly_matched_nl_at_end = true;
+			}
+			input_i++;
+			break;
+		case CAPVM_OP_MATCH:
+			LOG(2, "split_i %zu, split_count %zu\n", split_i, split_count);
+			assert(split_i == split_count);
+			done = true;
+			break;
+		case CAPVM_OP_JMP:
+			prog_i = op->u.jmp;
+			break;
+		case CAPVM_OP_JMP_ONCE:
+		{
+			/* look at next bit of path and jmp or fall through */
+			const uint32_t offset = get_path_node_offset(vm, cur);
+			const struct capvm_path_info *pi = &vm->paths.pool[cur];
+
+			assert(IS_PATH_NODE(pi));
+			bool next_bit;
+			LOG(3, "%s: OP_JMP_ONCE: split_i %zu, zpl %u, offset %u, pi->u.path.used %u\n",
+			    __func__, split_i, zero_prefix_length, offset, pi->u.path.used);
+			if (split_i < zero_prefix_length) {
+				next_bit = 0;
+			} else {
+				assert(split_i >= offset &&
+				    split_i <= offset + pi->u.path.used);
+				const uint32_t shift = 31 - (split_i & 31);
+				assert(shift < PATH_LINK_BITS);
+				next_bit = (pi->u.path.bits & ((uint32_t)1 << shift)) != 0;
+			}
+			LOG(3, "jmp_once: next_bit %d\n", next_bit);
+			LOG_EXEC_PATH_SAVE_CAPTURES(uniq_id, next_bit);
+			if (next_bit) { /* greedy edge */
+				prog_i = op->u.jmp_once;
+			} else { /* non-greedy edge */
+				/* fall through */
+			}
+			split_i++;
+			if (split_i >= offset &&
+			    split_i - offset == pi->u.path.used && split_i < split_count) {
+				const uint32_t backlink = get_path_node_backlink(vm, cur);
+				assert(backlink != NO_ID);
+				cur = backlink;
+			}
+			LOG(3, "%s: prog_i now %u, split_i %zu/%zu\n",
+			    __func__, prog_i, split_i, split_count);
+			assert(split_i <= split_count);
+			break;
+		}
+		case CAPVM_OP_SPLIT:
+		{
+			/* look at next bit of path and act accordingly */
+			const uint32_t offset = get_path_node_offset(vm, cur);
+			const struct capvm_path_info *pi = &vm->paths.pool[cur];
+
+			const uint32_t dst_greedy = op->u.split.greedy;
+			const uint32_t dst_nongreedy = op->u.split.nongreedy;
+
+			assert(IS_PATH_NODE(pi));
+			bool next_bit;
+			LOG(3, "%s: OP_SPLIT_CONT: split_i %zu, zpl %u, offset %u, pi->u.path.used %u\n",
+			    __func__, split_i, zero_prefix_length, offset, pi->u.path.used);
+			if (split_i < zero_prefix_length) {
+				next_bit = 0;
+			} else {
+				assert(split_i >= offset &&
+				    split_i <= offset + pi->u.path.used);
+				const uint32_t shift = 31 - (split_i & 31);
+				assert(shift < PATH_LINK_BITS);
+				next_bit = (pi->u.path.bits & ((uint32_t)1 << shift)) != 0;
+			}
+			LOG(3, "split: next_bit %d\n", next_bit);
+			LOG_EXEC_PATH_SAVE_CAPTURES(uniq_id, next_bit);
+			if (next_bit) { /* greedy edge */
+				prog_i = dst_greedy;
+			} else { /* non-greedy edge */
+				prog_i = dst_nongreedy;
+			}
+			split_i++;
+			if (split_i >= offset &&
+			    split_i - offset == pi->u.path.used && split_i < split_count) {
+				const uint32_t backlink = get_path_node_backlink(vm, cur);
+				assert(backlink != NO_ID);
+				cur = backlink;
+			}
+			LOG(3, "%s: prog_i now %u, split_i %zu/%zu\n",
+			    __func__, prog_i, split_i, split_count);
+			assert(split_i <= split_count);
+
+			break;
+		}
+		case CAPVM_OP_SAVE:
+		{
+			const unsigned capture_id = op->u.save/2;
+			const bool is_end = (op->u.save & 1) == 1;
+
+			LOG(5, "%s: input_i %u, save %d -> capture %d pos %d, cur value %zd, prev char 0x%02x\n",
+			    __func__,
+			    input_i, op->u.save,
+			    capture_id, is_end,
+			    vm->capture_buf[op->u.save/2].pos[op->u.save & 1],
+			    input_i > 0 ? vm->input[input_i - 1] : 0xff);
+
+			/* Special case to ignore a trailing
+			 * newline when capturing, unless the
+			 * newline was explicitly matched as the
+			 * last character of input. */
+			if (input_i > 0
+			    && !explicitly_matched_nl_at_end
+			    && input_i == vm->input_len
+			    && vm->input[input_i - 1] == '\n') {
+				LOG(3, "%s: updating capture[%u].pos[1] to ignore trailing '\\n' at %u\n",
+				    __func__, capture_id, input_i);
+				vm->capture_buf[capture_id].pos[is_end] = input_i - 1;
+			} else {
+				/* Save current position to appropriate capture buffer endpoint */
+				vm->capture_buf[op->u.save/2].pos[op->u.save & 1] = input_i;
+				LOG(3, "%s: saved capture[%d].pos[%d] <- %u\n",
+				    __func__, op->u.save/2, op->u.save&1, input_i);
+			}
+			break;
+		}
+		case CAPVM_OP_ANCHOR:
+			if (op->u.anchor == CAPVM_ANCHOR_START) {
+				assert(input_i == 0
+				    || (input_i == 1
+					&& vm->input_len == 1
+					&& vm->input[0] == '\n'));
+			} else {
+				assert(op->u.anchor == CAPVM_ANCHOR_END);
+				LOG(3, "%s: $ anchor: input_len %u, input_i %u\n",
+				    __func__, vm->input_len, input_i);
+
+				if (vm->input_len > 0 && input_i == vm->input_len - 1) {
+					/* special hack to not include trailing newline
+					 * in match group zero */
+					if (vm->p->capture_count > 0) {
+						vm->capture_buf[0].pos[1] = input_i;
+					}
+
+					assert(vm->input[input_i] == '\n');
+					input_i++;
+				} else {
+					assert(input_i == vm->input_len);
+				}
+			}
+			break;
+
+		default:
+			assert(!"match fail");
+		}
+	}
+
+	/* write solution into caller's buffers and print */
+	if (LOG_SOLUTION_TO_STDOUT) {
+		/* fprintf(stderr, "capture_count %u\n", vm->p->capture_count); */
+		printf("HIT:");
+		for (unsigned i = 0; i < vm->p->capture_count; i++) {
+			printf(" %zd %zd",
+			    vm->capture_buf[i].pos[0], vm->capture_buf[i].pos[1]);
+		}
+		printf("\n");
+	}
+
+	/* restore original link order */
+	cur = first_link;
+	do {
+		struct capvm_path_info *pi = &vm->paths.pool[cur];
+		assert(!IS_PATH_FREELIST(pi));
+		const uint32_t backlink = get_path_node_backlink(vm, cur);
+
+		LOG(3, "%s (moving fwd again): node %u: refcount %u, used %u, offset %u, fwdlink %d, bits '",
+		    __func__, cur, get_path_node_refcount(vm, cur),
+		    pi->u.path.used,
+		    get_path_node_offset(vm, cur),
+		    backlink);
+		for (uint8_t i = 0; i < pi->u.path.used; i++) {
+			const uint32_t bit = (pi->u.path.bits & ((uint32_t)1 << (31 - i)));
+			LOG(3, "%c", (pi->u.path.bits & bit) ? '1' : '0');
+			(void)bit;
+		}
+		LOG(3, "'\n");
+
+		LOG(3, "-- setting node %u's backlink to %d\n", cur, prev);
+		const uint32_t next = backlink;
+		set_path_node_backlink(vm, cur, prev);
+
+		prev = cur;
+		cur = next; /* fwd link */
+	} while (cur != NO_ID);
+}
+
+/* TODO: It should be possible to avoid dynamic allocation here
+ * by calculating the max space needed upfront and passing in a
+ * uint32_t or uint64_t-aligned array for working space. */
+
+enum fsm_capvm_program_exec_res
+fsm_capvm_program_exec(const struct capvm_program *program,
+    const uint8_t *input, size_t length,
+    struct fsm_capture *capture_buf, size_t capture_buf_length)
+{
+	assert(program != NULL);
+	assert(input != NULL || length == 0);
+	assert(capture_buf != NULL);
+
+	const size_t thread_max = program->used;
+
+	/* FIXME: The path node table can grow beyond this, but in
+	 * practice will usually stay fairly small. The worst case
+	 * should be decidable based on the compiled program and input
+	 * length. */
+#if ALLOW_PATH_TABLE_RESIZING
+	const size_t path_info_max = thread_max;
+#else
+	const size_t path_info_max = 3 * thread_max;
+#endif
+
+	struct capvm_path_info *path_info_pool = malloc(path_info_max
+	    * sizeof(path_info_pool[0]));
+	if (path_info_pool == NULL) {
+		return FSM_CAPVM_PROGRAM_EXEC_ERROR_ALLOC;
+	}
+	assert(path_info_pool != NULL);
+
+	/* link path_info freelist */
+	for (size_t i = 1; i < path_info_max - 1; i++) {
+		struct capvm_path_info *pi = &path_info_pool[i];
+		pi->u.freelist_node.refcount = 0;
+		pi->u.freelist_node.freelist = i + 1;
+	}
+	struct capvm_path_info *piZ = &path_info_pool[path_info_max - 1];
+	piZ->u.freelist_node.refcount = 0;
+	piZ->u.freelist_node.freelist = NO_ID;
+
+	/* init an empty path descriptor for initial execution */
+	struct capvm_path_info *pi0 = &path_info_pool[0];
+	pi0->u.path.refcount = 1;
+	pi0->u.path.used = 0;
+	pi0->u.path.bits = 0;
+	pi0->u.path.offset = 0;
+	pi0->u.path.backlink = NO_ID;
+
+	uint32_t stack_a[thread_max];
+	uint32_t stack_b[thread_max];
+
+	const size_t evaluated_bit_words = program->used/32 + 1;
+	uint32_t evaluated_a[evaluated_bit_words];
+	uint32_t evaluated_b[evaluated_bit_words];
+	uint32_t path_info_head_a[thread_max];
+	uint32_t path_info_head_b[thread_max];
+#if CAPVM_STATS
+	uint32_t uniq_ids_a[thread_max];
+	uint32_t uniq_ids_b[thread_max];
+#endif
+
+	assert(capture_buf_length >= program->capture_base + program->capture_count);
+
+	struct fsm_capture *offset_capture_buf = &capture_buf[program->capture_base];
+
+	struct capvm vm = {
+		.res = FSM_CAPVM_PROGRAM_EXEC_NO_SOLUTION_FOUND,
+		.p = program,
+		.input = input,
+		.input_len = length,
+		.capture_buf = offset_capture_buf,
+		.capture_buf_length = capture_buf_length,
+		.step_limit = SIZE_MAX,
+#if CAPVM_STATS
+		.uniq_id_counter = 0,
+#endif
+
+		.run_stacks = { stack_a, stack_b },
+		.evaluated = { evaluated_a, evaluated_b },
+		.path_info_heads = { path_info_head_a, path_info_head_b },
+#if CAPVM_STATS
+		.uniq_ids = { uniq_ids_a, uniq_ids_b },
+#endif
+
+		.paths = {
+			.ceil = path_info_max,
+			.live = 1,
+			.max_live = 1,
+			.freelist_head = 1,
+			.pool = path_info_pool,
+		},
+		.solution = {
+			.best_path_id = NO_ID,
+		},
+	};
+
+	/* enqueue execution at first opcode */
+	vm.run_stacks[PAIR_ID_NEXT][0] = 0;
+	vm.run_stacks_h[PAIR_ID_NEXT] = 1;
+	vm.threads.live = 1;
+	vm.threads.max_live = 1;
+	vm.path_info_heads[PAIR_ID_NEXT][0] = 0;
+
+#if CAPVM_STATS
+	vm.uniq_ids[PAIR_ID_NEXT][0] = 0;
+#endif
+
+	INIT_TIMERS();
+	TIME(&pre);
+	if (eval_vm(&vm)) {
+		assert(vm.threads.live == 0);
+		assert(vm.paths.live > 0);
+
+		populate_solution(&vm);
+		release_path_info_link(&vm, &vm.solution.best_path_id);
+		vm.res = FSM_CAPVM_PROGRAM_EXEC_SOLUTION_WRITTEN;
+
+		/* TODO: This assert will not work if refcounts are
+		 * sticky at the max value, but if the number of paths
+		 * and threads is bounded then it shouldn't be possible
+		 * to overflow the refcount anyway. If sticky refcounts
+		 * are used then reaching one should probably set a
+		 * flag, which would skip this assertion. */
+		assert(vm.paths.live == 0);
+	} else {
+		assert(vm.res != FSM_CAPVM_PROGRAM_EXEC_SOLUTION_WRITTEN);
+	}
+
+	TIME(&post);
+	DIFF_MSEC(__func__, pre, post, NULL);
+
+#if CAPVM_STATS
+	LOG(2, "%s: %zu steps, max_threads %u, max_paths %u, matches %u, path_prefixes_shared %u, collapsed_zero_prefixes %u\n",
+	    __func__, vm.stats.steps, vm.threads.max_live, vm.paths.max_live, vm.stats.matches,
+	    vm.stats.path_prefixes_shared, vm.stats.collapsed_zero_prefixes);
+#if CAPVM_PATH_STATS
+	LOG(2, "%s: prog_size %u, max_path_length_memory %u (bits), input length %zu, max_paths * %zu bytes/path => %zu bytes\n",
+	    __func__, vm.p->used, vm.stats.max_path_length_memory, length,
+	    sizeof(vm.paths.pool[0]),
+	    vm.paths.max_live * sizeof(vm.paths.pool[0]));
+#endif
+#endif
+
+	free(vm.paths.pool);
+	return vm.res;
+}
diff --git a/src/libfsm/capture_vm_log.h b/src/libfsm/capture_vm_log.h
new file mode 100644
index 000000000..8ff51d8b4
--- /dev/null
+++ b/src/libfsm/capture_vm_log.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright 2022 Scott Vokes
+ *
+ * See LICENCE for the full copyright terms.
+ */
+
+#ifndef CAPTURE_VM_LOG_H
+#define CAPTURE_VM_LOG_H
+
+#include <stdio.h>
+
+#define LOG_CAPVM (1+0)
+#define LOG(LEVEL, ...)							\
+	do {								\
+		if ((LEVEL) <= LOG_CAPVM) {				\
+			fprintf(stderr, __VA_ARGS__);			\
+		}							\
+	} while(0)
+
+
+#endif
diff --git a/src/libfsm/capture_vm_program.h b/src/libfsm/capture_vm_program.h
new file mode 100644
index 000000000..0b24ffb5b
--- /dev/null
+++ b/src/libfsm/capture_vm_program.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright 2022 Scott Vokes
+ *
+ * See LICENCE for the full copyright terms.
+ */
+
+#ifndef CAPTURE_VM_PROGRAM_H
+#define CAPTURE_VM_PROGRAM_H
+
+#include <stdint.h>
+
+struct capvm_program {
+	uint32_t capture_count;
+	uint32_t capture_base;
+
+	uint32_t used;
+	uint32_t ceil;
+	struct capvm_opcode {
+		enum capvm_opcode_type {
+			/* Next character of input == .u.chr */
+			CAPVM_OP_CHAR,
+			/* Next character of input is in char class */
+			CAPVM_OP_CHARCLASS,
+			/* Input has matched */
+			CAPVM_OP_MATCH,
+			/* Unconditional jump */
+			CAPVM_OP_JMP,
+			/* If destination has already been evaulated
+			 * since advancing the input position, fall
+			 * through to next instruction, otherwise jmp. */
+			CAPVM_OP_JMP_ONCE,
+			/* Split execution to two paths, where .cont
+			 * offset is greedier than .new's offset. */
+			CAPVM_OP_SPLIT,
+			/* Save current input position as capture bound */
+			CAPVM_OP_SAVE,
+			/* Check if current input position is at start/end
+			 * of input, after accounting for PCRE's special
+			 * cases for a trailing newline. */
+			CAPVM_OP_ANCHOR,
+		} t;
+		union {
+			uint8_t chr;
+			uint32_t charclass_id;
+			uint32_t jmp;	     /* absolute */
+			uint32_t jmp_once;   /* absolute */
+			struct {
+				uint32_t greedy;
+				uint32_t nongreedy;
+			} split;
+			/* (save >> 1): capture ID,
+			 * (save & 0x01): save pos to start (0b0) or end (0b1). */
+			uint32_t save;
+			enum capvm_anchor_type {
+				CAPVM_ANCHOR_START,
+				CAPVM_ANCHOR_END,
+			} anchor;
+		} u;
+	} *ops;
+
+	/* Most compiled programs only use a few distinct character
+	 * classes (if any), and the data is much larger than the
+	 * other instructions, so they are stored in a separate
+	 * table and referred to by op->u.charclass_id. */
+	struct capvm_char_classes {
+		uint32_t count;
+		uint32_t ceil;
+		struct capvm_char_class {
+			uint64_t octets[4]; /* 256-bitset */
+		} *sets;
+	} char_classes;
+};
+
+#endif
diff --git a/src/libfsm/clone.c b/src/libfsm/clone.c
index bec16bb0f..8b7b606c8 100644
--- a/src/libfsm/clone.c
+++ b/src/libfsm/clone.c
@@ -21,10 +21,7 @@
 #define LOG_CLONE_ENDIDS 0
 
 static int
-copy_capture_actions(struct fsm *dst, const struct fsm *src);
-
-static int
-copy_end_ids(struct fsm *dst, const struct fsm *src);
+copy_end_metadata(struct fsm *dst, const struct fsm *src);
 
 struct fsm *
 fsm_clone(const struct fsm *fsm)
@@ -70,12 +67,7 @@ fsm_clone(const struct fsm *fsm)
 	}
 
 	{
-		if (!copy_capture_actions(new, fsm)) {
-			fsm_free(new);
-			return NULL;
-		}
-
-		if (!copy_end_ids(new, fsm)) {
+		if (!copy_end_metadata(new, fsm)) {
 			fsm_free(new);
 			return NULL;
 		}
@@ -84,75 +76,81 @@ fsm_clone(const struct fsm *fsm)
 	return new;
 }
 
-struct copy_capture_actions_env {
+struct copy_end_ids_env {
+	char tag;
 	struct fsm *dst;
 	int ok;
 };
 
 static int
-copy_capture_actions_cb(fsm_state_t state,
-    enum capture_action_type type, unsigned capture_id, fsm_state_t to,
-    void *opaque)
+copy_end_ids_cb(const struct fsm *fsm, fsm_state_t state,
+	size_t nth, const fsm_end_id_t id, void *opaque)
 {
-	struct copy_capture_actions_env *env = opaque;
-	assert(env->dst);
+	struct copy_end_ids_env *env = opaque;
+	enum fsm_endid_set_res sres;
+	assert(env->tag == 'c');
+	(void)fsm;
+	(void)nth;
 
-	if (!fsm_capture_add_action(env->dst,
-		state, type, capture_id, to)) {
+#if LOG_CLONE_ENDIDS
+	fprintf(stderr, "clone[%d] <- %d\n", state, id);
+#endif
+
+	sres = fsm_endid_set(env->dst, state, id);
+	if (sres == FSM_ENDID_SET_ERROR_ALLOC_FAIL) {
 		env->ok = 0;
+		return 0;
 	}
 
-	return env->ok;
+	return 1;
 }
 
 static int
-copy_capture_actions(struct fsm *dst, const struct fsm *src)
+copy_active_capture_ids_cb(fsm_state_t state, unsigned capture_id, void *opaque)
 {
-	struct copy_capture_actions_env env = { NULL, 1 };
-	env.dst = dst;
+	struct copy_end_ids_env *env = opaque;
 
-	fsm_capture_action_iter(src,
-	    copy_capture_actions_cb, &env);
-	return env.ok;
+	if (!fsm_capture_set_active_for_end(env->dst,
+		capture_id,
+		state)) {
+		env->ok = 0;
+		return 0;
+	}
+	return 1;
 }
 
-struct copy_end_ids_env {
-	char tag;
-	struct fsm *dst;
-	const struct fsm *src;
-	int ok;
-};
-
 static int
-copy_end_ids_cb(fsm_state_t state, const fsm_end_id_t id, void *opaque)
+associate_capture_programs_cb(fsm_state_t state, unsigned prog_id, void *opaque)
 {
 	struct copy_end_ids_env *env = opaque;
-	enum fsm_endid_set_res sres;
-	assert(env->tag == 'c');
 
-#if LOG_CLONE_ENDIDS
-	fprintf(stderr, "clone[%d] <- %d\n", state, id);
-#endif
-
-	sres = fsm_endid_set(env->dst, state, id);
-	if (sres == FSM_ENDID_SET_ERROR_ALLOC_FAIL) {
+	if (!fsm_capture_associate_program_with_end_state(env->dst,
+		prog_id, state)) {
 		env->ok = 0;
 		return 0;
 	}
-
 	return 1;
 }
 
 static int
-copy_end_ids(struct fsm *dst, const struct fsm *src)
+copy_end_metadata(struct fsm *dst, const struct fsm *src)
 {
 	struct copy_end_ids_env env;
 	env.tag = 'c';		/* for clone */
 	env.dst = dst;
-	env.src = src;
 	env.ok = 1;
 
 	fsm_endid_iter(src, copy_end_ids_cb, &env);
 
+	fsm_capture_iter_active_for_all_end_states(src,
+	    copy_active_capture_ids_cb, &env);
+
+	if (!fsm_capture_copy_programs(src, dst)) {
+		return 0;
+	}
+
+	fsm_capture_iter_program_ids_for_all_end_states(src,
+	    associate_capture_programs_cb, &env);
+
 	return env.ok;
 }
diff --git a/src/libfsm/closure.c b/src/libfsm/closure.c
index fa2d0c783..9ebf48eb9 100644
--- a/src/libfsm/closure.c
+++ b/src/libfsm/closure.c
@@ -128,7 +128,7 @@ epsilon_closure_single(const struct fsm *fsm, struct state_set **closures, fsm_s
 }
 
 struct state_set **
-epsilon_closure(struct fsm *fsm)
+fsm_epsilon_closure(struct fsm *fsm)
 {
 	struct state_set **closures;
 	fsm_state_t s;
@@ -190,7 +190,7 @@ epsilon_closure(struct fsm *fsm)
 }
 
 void
-closure_free(struct state_set **closures, size_t n)
+fsm_closure_free(struct state_set **closures, size_t n)
 {
 	fsm_state_t s;
 
diff --git a/src/libfsm/consolidate.c b/src/libfsm/consolidate.c
index 4518d3926..a5fb98878 100644
--- a/src/libfsm/consolidate.c
+++ b/src/libfsm/consolidate.c
@@ -19,6 +19,7 @@
 #include <adt/set.h>
 #include <adt/edgeset.h>
 #include <adt/stateset.h>
+#include <adt/u64bitset.h>
 
 #include "internal.h"
 #include "capture.h"
@@ -26,27 +27,15 @@
 
 #define LOG_MAPPING 0
 #define LOG_CONSOLIDATE_CAPTURES 0
-#define LOG_CONSOLIDATE_ENDIDS 0
+#define LOG_CONSOLIDATE_END_METADATA 0
 
 struct mapping_closure {
 	size_t count;
 	const fsm_state_t *mapping;
 };
 
-struct consolidate_copy_capture_actions_env {
-	char tag;
-	struct fsm *dst;
-	size_t mapping_count;
-	const fsm_state_t *mapping;
-	int ok;
-};
-
-static int
-consolidate_copy_capture_actions(struct fsm *dst, const struct fsm *src,
-    const fsm_state_t *mapping, size_t mapping_count);
-
 static int
-consolidate_end_ids(struct fsm *dst, const struct fsm *src,
+consolidate_end_metadata(struct fsm *dst, const struct fsm *src,
     const fsm_state_t *mapping, size_t mapping_count);
 
 static fsm_state_t
@@ -67,7 +56,16 @@ fsm_consolidate(const struct fsm *src,
 	struct mapping_closure closure;
 	size_t max_used = 0;
 
+#if LOG_CONSOLIDATE_END_METADATA > 1
+	fprintf(stderr, "==== fsm_consolidate -- endid_info before:\n");
+	fsm_endid_dump(stderr, src);
+	fsm_capture_dump_active_for_ends(stderr, src);
+#endif
+
 	assert(src != NULL);
+	if (mapping_count == 0) {
+		return fsm_clone(src);
+	}
 	assert(src->opt != NULL);
 
 	dst = fsm_new(src->opt);
@@ -76,12 +74,14 @@ fsm_consolidate(const struct fsm *src,
 	}
 
 	for (src_i = 0; src_i < mapping_count; src_i++) {
+		const fsm_state_t dst_i = mapping[src_i];
 #if LOG_MAPPING
 		fprintf(stderr, "consolidate_mapping[%u]: %u\n",
 		    src_i, mapping[src_i]);
 #endif
-		if (mapping[src_i] >= max_used) {
-			max_used = mapping[src_i];
+		if (dst_i > max_used) {
+			assert(dst_i != FSM_STATE_REMAP_NO_STATE);
+			max_used = dst_i;
 		}
 	}
 
@@ -96,8 +96,8 @@ fsm_consolidate(const struct fsm *src,
 		goto cleanup;
 	}
 
-#define DST_SEEN(I) (seen[I/64] & ((uint64_t)1 << (I&63)))
-#define SET_DST_SEEN(I) (seen[I/64] |= ((uint64_t)1 << (I&63)))
+#define DST_SEEN(I) u64bitset_get(seen, I)
+#define SET_DST_SEEN(I) u64bitset_set(seen, I)
 
 	/* map N states to M states, where N >= M.
 	 * if it's the first time state[M] is seen,
@@ -110,6 +110,9 @@ fsm_consolidate(const struct fsm *src,
 	for (src_i = 0; src_i < mapping_count; src_i++) {
 		const fsm_state_t dst_i = mapping[src_i];
 
+		/* fsm_consolidate does not currently support discarding states. */
+		assert(dst_i != FSM_STATE_REMAP_NO_STATE);
+
 		if (!DST_SEEN(dst_i)) {
 			SET_DST_SEEN(dst_i);
 
@@ -134,11 +137,11 @@ fsm_consolidate(const struct fsm *src,
 		}
 	}
 
-	if (!consolidate_copy_capture_actions(dst, src, mapping, mapping_count)) {
+	if (!fsm_capture_copy_programs(src, dst)) {
 		goto cleanup;
 	}
 
-	if (!consolidate_end_ids(dst, src, mapping, mapping_count)) {
+	if (!consolidate_end_metadata(dst, src, mapping, mapping_count)) {
 		goto cleanup;
 	}
 
@@ -161,97 +164,89 @@ fsm_consolidate(const struct fsm *src,
 	return NULL;
 }
 
+struct consolidate_end_ids_env {
+	char tag;
+	struct fsm *dst;
+	const struct fsm *src;
+	const fsm_state_t *mapping;
+	size_t mapping_count;
+	int ok;
+};
+
 static int
-consolidate_copy_capture_actions_cb(fsm_state_t state,
-    enum capture_action_type type, unsigned capture_id, fsm_state_t to,
+consolidate_active_captures_cb(fsm_state_t state, unsigned capture_id,
     void *opaque)
 {
-	struct consolidate_copy_capture_actions_env *env = opaque;
-	fsm_state_t s, t;
-
+	struct consolidate_end_ids_env *env = opaque;
+	fsm_state_t dst_s;
 	assert(env->tag == 'C');
 
-#if LOG_CONSOLIDATE_CAPTURES
-	fprintf(stderr, "consolidate_copy_capture_actions_cb: state %u, type %s, ID %u, TO %d\n",
-	    state,
-	    fsm_capture_action_type_name[type],
-	    capture_id, to);
-#endif
-
 	assert(state < env->mapping_count);
-	assert(to == CAPTURE_NO_STATE || to < env->mapping_count);
-	s = env->mapping[state];
-	t = to == CAPTURE_NO_STATE
-	    ? CAPTURE_NO_STATE : env->mapping[to];
+	dst_s = env->mapping[state];
 
-	if (!fsm_capture_add_action(env->dst,
-		s, type, capture_id, t)) {
+#if LOG_CONSOLIDATE_END_METADATA
+	fprintf(stderr, "consolidate_active_captures_cb: state %d -> dst_s %d, capture_id %u\n",
+	    state, dst_s, capture_id);
+#endif
+
+	if (!fsm_capture_set_active_for_end(env->dst, capture_id, dst_s)) {
 		env->ok = 0;
 		return 0;
 	}
-
 	return 1;
 }
 
 static int
-consolidate_copy_capture_actions(struct fsm *dst, const struct fsm *src,
-    const fsm_state_t *mapping, size_t mapping_count)
+consolidate_capture_programs_cb(fsm_state_t state, unsigned program_id,
+    void *opaque)
 {
-	size_t i;
+	struct consolidate_end_ids_env *env = opaque;
+	fsm_state_t dst_s;
+	assert(env->tag == 'C');
 
-	struct consolidate_copy_capture_actions_env env;
-	env.tag = 'C';
-	env.dst = dst;
-	env.mapping_count = mapping_count;
-	env.mapping = mapping;
-	env.ok = 1;
+	assert(state < env->mapping_count);
+	dst_s = env->mapping[state];
 
-#if LOG_MAPPING
-	for (i = 0; i < mapping_count; i++) {
-		fprintf(stderr, "mapping[%lu]: %u\n", i, mapping[i]);
+	if (!fsm_capture_associate_program_with_end_state(env->dst,
+		(uint32_t)program_id, dst_s)) {
+		env->ok = 0;
 	}
-#else
-	(void)i;
-#endif
 
-	fsm_capture_action_iter(src,
-	    consolidate_copy_capture_actions_cb, &env);
-	return env.ok;
+	return 1;
 }
 
-struct consolidate_end_ids_env {
-	char tag;
-	struct fsm *dst;
-	const struct fsm *src;
-	const fsm_state_t *mapping;
-	size_t mapping_count;
-};
-
 static int
 consolidate_end_ids_cb(fsm_state_t state, const fsm_end_id_t *ids, size_t num_ids, void *opaque)
 {
 	struct consolidate_end_ids_env *env = opaque;
-	enum fsm_endid_set_res sres;
-	fsm_state_t s;
+	fsm_state_t dst_s;
 	assert(env->tag == 'C');
 
 	assert(state < env->mapping_count);
-	s = env->mapping[state];
+	dst_s = env->mapping[state];
 
-	sres = fsm_endid_set_bulk(env->dst, s, num_ids, ids, FSM_ENDID_BULK_APPEND);
+#if LOG_CONSOLIDATE_END_METADATA > 1
+	fprintf(stderr, "consolidate_end_ids_cb: state %u, dst %u, IDs [",
+	    state, dst_s);
+	for (size_t i = 0; i < num_ids; i++) {
+		fprintf(stderr, "%s%d", i > 0 ? " " : "", ids[i]);
+	}
+	fprintf(stderr, "]\n");
+#endif
+
+	enum fsm_endid_set_res sres = fsm_endid_set_bulk(env->dst,
+	    dst_s, num_ids, ids, FSM_ENDID_BULK_APPEND);
 	if (sres == FSM_ENDID_SET_ERROR_ALLOC_FAIL) {
 		return 0;
 	}
-
 	return 1;
 }
 
 static int
-consolidate_end_ids(struct fsm *dst, const struct fsm *src,
+consolidate_end_metadata(struct fsm *dst, const struct fsm *src,
     const fsm_state_t *mapping, size_t mapping_count)
 {
 	struct consolidate_end_ids_env env;
-	int ret;
 
 	env.tag = 'C';		/* for Consolidate */
 	env.dst = dst;
@@ -259,12 +254,31 @@ consolidate_end_ids(struct fsm *dst, const struct fsm *src,
 	env.mapping = mapping;
 	env.mapping_count = mapping_count;
 
-	ret = fsm_endid_iter_bulk(src, consolidate_end_ids_cb, &env);
+	env.ok = fsm_endid_iter_bulk(src, consolidate_end_ids_cb, &env);
 
-#if LOG_CONSOLIDATE_ENDIDS > 1
+	if (env.ok) {
+		fsm_state_t s;
+		const size_t src_state_count = fsm_countstates(src);
+		for (s = 0; s < src_state_count; s++) {
+			fsm_capture_iter_active_for_end_state(src, s,
+			    consolidate_active_captures_cb, &env);
+			if (!env.ok) {
+				break;
+			}
+
+			fsm_capture_iter_program_ids_for_end_state(src, s,
+			    consolidate_capture_programs_cb, &env);
+			if (!env.ok) {
+				break;
+			}
+		}
+	}
+
+#if LOG_CONSOLIDATE_END_METADATA > 1
 	fprintf(stderr, "==== fsm_consolidate -- endid_info after:\n");
 	fsm_endid_dump(stderr, dst);
+	fsm_capture_dump_active_for_ends(stderr, dst);
 #endif
 
-	return ret;
+	return env.ok;
 }
diff --git a/src/libfsm/determinise.c b/src/libfsm/determinise.c
index 56e135afd..7aa3fdb66 100644
--- a/src/libfsm/determinise.c
+++ b/src/libfsm/determinise.c
@@ -6,6 +6,8 @@
 
 #include "determinise_internal.h"
 
+#define LOG_DETERMINISATION_COUNTERS 0
+
 static void
 dump_labels(FILE *f, const uint64_t labels[4])
 {
@@ -29,6 +31,8 @@ fsm_determinise(struct fsm *nfa)
 	size_t dfacount = 0;
 
 	struct analyze_closures_env ac_env = { 0 };
+	INIT_TIMERS();
+	INIT_TIMERS_NAMED(overall);
 
 	assert(nfa != NULL);
 	map.alloc = nfa->opt->alloc;
@@ -39,9 +43,12 @@ fsm_determinise(struct fsm *nfa)
 	 * faster where we can start with an epsilon-free NFA in the first place.
 	 */
 	if (fsm_has(nfa, fsm_hasepsilons)) {
+		TIME(&pre);
 		if (!fsm_remove_epsilons(nfa)) {
 			return 0;
 		}
+		TIME(&post);
+		DIFF_MSEC("det_remove_eps", pre, post, NULL);
 	}
 
 #if LOG_DETERMINISE_CAPTURES || LOG_INPUT
@@ -49,6 +56,7 @@ fsm_determinise(struct fsm *nfa)
 	fsm_print_fsm(stderr, nfa);
 	fsm_capture_dump(stderr, "#### post_remove_epsilons", nfa);
 #endif
+	TIME(&overall_pre);
 
 	issp = interned_state_set_pool_alloc(nfa->opt->alloc);
 	if (issp == NULL) {
@@ -104,6 +112,17 @@ fsm_determinise(struct fsm *nfa)
 	ac_env.fsm = nfa;
 	ac_env.issp = issp;
 
+#if LOG_DETERMINISATION_STATS
+	fprintf(stderr, "%s: determinising FSM with %d states\n", __func__, fsm_countstates(nfa));
+#endif
+
+	INIT_TIMERS_NAMED(iss);
+	size_t iss_accum = 0;
+	size_t iss_calls = 0;
+	size_t stack_pushes = 0;
+	size_t inner_steps = 0;
+
+	TIME(&pre);
 	do {
 		size_t o_i;
 
@@ -114,18 +133,25 @@ fsm_determinise(struct fsm *nfa)
 
 		assert(curr != NULL);
 
+		TIME(&iss_pre);
 		if (!analyze_closures__pairwise_grouping(&ac_env, curr->iss)) {
 			goto cleanup;
 		}
+		TIME(&iss_post);
+		DIFF_MSEC("det_iss", iss_pre, iss_post, &iss_accum);
+		(void)iss_accum;
+		iss_calls++;
 
 		if (!edge_set_advise_growth(&curr->edges, nfa->opt->alloc, ac_env.output_count)) {
 			goto cleanup;
 		}
 
+		/* each output is an outgoing (label set) -> interned_state_set pair */
 		for (o_i = 0; o_i < ac_env.output_count; o_i++) {
 			struct mapping *m;
 			struct ac_output *output = &ac_env.outputs[o_i];
 			interned_state_set_id iss = output->iss;
+			inner_steps++;
 
 #if LOG_DETERMINISE_CLOSURES
 			fprintf(stderr, "fsm_determinise: output %zu/%zu: cur (dfa %zu) label [",
@@ -157,6 +183,7 @@ fsm_determinise(struct fsm *nfa)
 				if (!stack_push(stack, m)) {
 					goto cleanup;
 				}
+				stack_pushes++;
 			}
 
 #if LOG_SYMBOL_CLOSURE
@@ -174,6 +201,13 @@ fsm_determinise(struct fsm *nfa)
 
 		/* All elements in sclosures[] are interned, so they will be freed later. */
 	} while ((curr = stack_pop(stack)));
+	TIME(&post);
+	DIFF_MSEC("det_stack_loop", pre, post, NULL);
+
+	if (LOG_DETERMINISATION_COUNTERS) {
+		fprintf(stderr, "%s: iss_accum total %zu (%zu calls, %g usec avg.), %zu stack pushes, %zu iterations, %zu inner_steps\n",
+		    __func__, iss_accum, iss_calls, iss_accum / (1.0 * iss_calls), stack_pushes, iss_calls, inner_steps);
+	}
 
 	{
 		struct map_iter it;
@@ -185,6 +219,13 @@ fsm_determinise(struct fsm *nfa)
 			goto cleanup;
 		}
 
+		TIME(&pre);
+		if (!fsm_capture_copy_programs(nfa, dfa)) {
+			goto cleanup;
+		}
+		TIME(&post);
+		DIFF_MSEC("det_copy_captures", pre, post, NULL);
+
 #if DUMP_MAPPING
 		{
 			fprintf(stderr, "#### fsm_determinise: mapping\n");
@@ -192,10 +233,10 @@ fsm_determinise(struct fsm *nfa)
 			/* build reverse mappings table: for every NFA state X, if X is part
 			 * of the new DFA state Y, then add Y to a list for X */
 			for (m = map_first(&map, &it); m != NULL; m = map_next(&it)) {
-				struct state_iter si;
 				interned_state_set_id iss_id = m->iss;
+				struct state_iter si;
 				fsm_state_t state;
-				struct state_set *ss = interned_state_set_get_state_set(ac_env.issp, iss_id);
+				struct state_set *ss = interned_state_set_get_state_set(issp, iss_id);
 				fprintf(stderr, "%zu:", m->dfastate);
 
 				for (state_set_reset(ss, &si); state_set_next(&si, &state); ) {
@@ -238,24 +279,41 @@ fsm_determinise(struct fsm *nfa)
 			fsm_setend(dfa, m->dfastate, 1);
 
 			/*
-			 * Carry through end IDs, if present. This isn't anything to do
-			 * with the DFA conversion; it's meaningful only to the caller.
+			 * Copy over metadata associated with end
+			 * states, if present. This isn't anything to do
+			 * with the DFA conversion; it's meaningful only
+			 * to the caller.
 			 *
 			 * The closure may contain non-end states, but at least one state is
 			 * known to have been an end state.
 			 */
-			if (!fsm_endid_carry(nfa, ss, dfa, m->dfastate)) {
+			if (!remap_end_metadata(nfa, ss, dfa, m->dfastate)) {
 				goto cleanup;
 			}
 		}
+		TIME(&post);
+		DIFF_MSEC("det_map_loop", pre, post, NULL);
 
-		if (!remap_capture_actions(&map, issp, dfa, nfa)) {
-			goto cleanup;
-		}
+		fsm_capture_integrity_check(dfa);
 
 		fsm_move(nfa, dfa);
 	}
 
+#if LOG_DETERMINISE_CAPTURES
+	fprintf(stderr, "# post_determinise\n");
+	fsm_print_fsm(stderr, nfa);
+	fsm_capture_dump(stderr, "#### post_determinise", nfa);
+#endif
+
+	TIME(&overall_post);
+	DIFF_MSEC("det_overall", overall_pre, overall_post, NULL);
+
+#if LOG_DETERMINISATION_STATS
+	fprintf(stderr, "%s: created DFA with %d states\n", __func__, fsm_countstates(nfa));
+	fprintf(stderr, "%s: analyze_closures_env.analyze_usec: %zu\n",
+	    __func__, ac_env.analyze_usec);
+#endif
+
 #if EXPENSIVE_CHECKS
 	assert(fsm_all(nfa, fsm_isdfa));
 #endif
@@ -311,85 +369,6 @@ fsm_determinise(struct fsm *nfa)
 	return res;
 }
 
-/* Add DFA_state to the list for NFA_state. */
-static int
-add_reverse_mapping(const struct fsm_alloc *alloc,
-    struct reverse_mapping *reverse_mappings,
-    fsm_state_t dfastate, fsm_state_t nfa_state)
-{
-	struct reverse_mapping *rm = &reverse_mappings[nfa_state];
-	if (rm->count == rm->ceil) {
-		const unsigned nceil = (rm->ceil ? 2*rm->ceil : 2);
-		fsm_state_t *nlist = f_realloc(alloc,
-		    rm->list, nceil * sizeof(rm->list));
-		if (nlist == NULL) {
-			return 0;
-		}
-		rm->list = nlist;
-		rm->ceil = nceil;
-	}
-
-	rm->list[rm->count] = dfastate;
-	rm->count++;
-	return 1;
-}
-
-static int
-det_copy_capture_actions_cb(fsm_state_t state,
-    enum capture_action_type type, unsigned capture_id, fsm_state_t to,
-    void *opaque)
-{
-	struct reverse_mapping *rm_s;
-	size_t s_i, t_i;
-	struct det_copy_capture_actions_env *env = opaque;
-	assert(env->tag == 'D');
-
-#if LOG_DETERMINISE_CAPTURES
-	fprintf(stderr, "det_copy_capture_actions_cb: state %u, type %s, ID %u, TO %d\n",
-	    state, fsm_capture_action_type_name[type],
-	    capture_id, to);
-#endif
-
-	rm_s = &env->reverse_mappings[state];
-
-	for (s_i = 0; s_i < rm_s->count; s_i++) {
-		const fsm_state_t s = rm_s->list[s_i];
-
-		if (to == CAPTURE_NO_STATE) {
-			if (!fsm_capture_add_action(env->dst,
-				s, type, capture_id, CAPTURE_NO_STATE)) {
-				env->ok = 0;
-				return 0;
-			}
-		} else {
-			struct reverse_mapping *rm_t = &env->reverse_mappings[to];
-			for (t_i = 0; t_i < rm_t->count; t_i++) {
-				const fsm_state_t t = rm_t->list[t_i];
-
-				if (!fsm_capture_add_action(env->dst,
-					s, type, capture_id, t)) {
-					env->ok = 0;
-					return 0;
-				}
-			}
-		}
-	}
-
-	return 1;
-}
-
-static int
-det_copy_capture_actions(struct reverse_mapping *reverse_mappings,
-    struct fsm *dst, struct fsm *src)
-{
-	struct det_copy_capture_actions_env env = { 'D', NULL, NULL, 1 };
-	env.dst = dst;
-	env.reverse_mappings = reverse_mappings;
-
-	fsm_capture_action_iter(src, det_copy_capture_actions_cb, &env);
-	return env.ok;
-}
-
 SUPPRESS_EXPECTED_UNSIGNED_INTEGER_OVERFLOW()
 static uint64_t
 hash_iss(interned_state_set_id iss)
@@ -636,83 +615,6 @@ stack_pop(struct mappingstack *stack)
 	return item;
 }
 
-static int
-remap_capture_actions(struct map *map, struct interned_state_set_pool *issp,
-    struct fsm *dst_dfa, struct fsm *src_nfa)
-{
-	struct map_iter it;
-	struct state_iter si;
-	struct mapping *m;
-	struct reverse_mapping *reverse_mappings;
-	fsm_state_t state;
-	const size_t capture_count = fsm_countcaptures(src_nfa);
-	size_t i, j;
-	int res = 0;
-
-	if (capture_count == 0) {
-		return 1;
-	}
-
-	/* This is not 1 to 1 -- if state X is now represented by multiple
-	 * states Y in the DFA, and state X has action(s) when transitioning
-	 * to state Z, this needs to be added on every Y, for every state
-	 * representing Z in the DFA.
-	 *
-	 * We could probably filter this somehow, at the very least by
-	 * checking reachability from every X, but the actual path
-	 * handling later will also check reachability. */
-	reverse_mappings = f_calloc(dst_dfa->opt->alloc, src_nfa->statecount, sizeof(reverse_mappings[0]));
-	if (reverse_mappings == NULL) {
-		return 0;
-	}
-
-	/* build reverse mappings table: for every NFA state X, if X is part
-	 * of the new DFA state Y, then add Y to a list for X */
-	for (m = map_first(map, &it); m != NULL; m = map_next(&it)) {
-		struct state_set *ss;
-		interned_state_set_id iss_id = m->iss;
-		assert(m->dfastate < dst_dfa->statecount);
-		ss = interned_state_set_get_state_set(issp, iss_id);
-
-		for (state_set_reset(ss, &si); state_set_next(&si, &state); ) {
-			if (!add_reverse_mapping(dst_dfa->opt->alloc,
-				reverse_mappings,
-				m->dfastate, state)) {
-				goto cleanup;
-			}
-		}
-	}
-
-#if LOG_DETERMINISE_CAPTURES
-	fprintf(stderr, "#### reverse mapping for %zu states\n", src_nfa->statecount);
-	for (i = 0; i < src_nfa->statecount; i++) {
-		struct reverse_mapping *rm = &reverse_mappings[i];
-		fprintf(stderr, "%lu:", i);
-		for (j = 0; j < rm->count; j++) {
-			fprintf(stderr, " %u", rm->list[j]);
-		}
-		fprintf(stderr, "\n");
-	}
-#else
-	(void)j;
-#endif
-
-	if (!det_copy_capture_actions(reverse_mappings, dst_dfa, src_nfa)) {
-		goto cleanup;
-	}
-
-	res = 1;
-cleanup:
-	for (i = 0; i < src_nfa->statecount; i++) {
-		if (reverse_mappings[i].list != NULL) {
-			f_free(dst_dfa->opt->alloc, reverse_mappings[i].list);
-		}
-	}
-	f_free(dst_dfa->opt->alloc, reverse_mappings);
-
-	return res;
-}
-
 static int
 group_labels_overlap(const struct ac_group *a, const struct ac_group *b)
 {
@@ -730,6 +632,25 @@ group_labels_overlap(const struct ac_group *a, const struct ac_group *b)
 	return 0;
 }
 
+static int
+remap_end_metadata(const struct fsm *src_fsm, const struct state_set *src_set,
+    struct fsm *dst_fsm, fsm_state_t dst_state)
+{
+	if (!fsm_endid_carry(src_fsm, src_set, dst_fsm, dst_state)) {
+		return 0;
+	}
+
+	if (!fsm_capture_copy_active_for_ends(src_fsm, src_set, dst_fsm, dst_state)) {
+		return 0;
+	}
+
+	if (!fsm_capture_copy_program_end_state_associations(src_fsm, src_set, dst_fsm, dst_state)) {
+		return 0;
+	}
+
+	return 1;
+}
+
 static void
 intersect_with(uint64_t *a, const uint64_t *b)
 {
@@ -1339,6 +1260,7 @@ to_set_htab_check(struct analyze_closures_env *env,
 		if (b->count == 0) {
 			return 0; /* empty bucket -> not found */
 		} else if (b->count == count) {
+			assert(env->to_sets.buf != NULL);
 			assert(b->offset + count <= env->to_sets.used);
 			const fsm_state_t *ids = &env->to_sets.buf[b->offset];
 			if (0 == memcmp(ids, dst, count * sizeof(dst[0]))) {
@@ -1465,6 +1387,7 @@ save_to_set(struct analyze_closures_env *env,
 		env->to_sets.ceil = nceil;
 		env->to_sets.buf = nbuf;
 	}
+	assert(env->to_sets.buf != NULL);
 
 #if LOG_TO_SET
 	static size_t to_set_id;
@@ -2016,28 +1939,87 @@ static void
 sort_and_dedup_dst_buf(fsm_state_t *buf, size_t *used)
 {
 	const size_t orig_used = *used;
-	qsort(buf, orig_used, sizeof(buf[0]), cmp_fsm_state_t);
-
-	/* squash out duplicates */
-	size_t rd = 1;
-	size_t wr = 1;
-	while (rd < orig_used) {
-		if (buf[rd - 1] == buf[rd]) {
-			rd++;	/* skip */
-		} else {
-			buf[wr] = buf[rd];
-			rd++;
-			wr++;
-		}
+
+	if (orig_used <= 1) {
+		return;		/* no change */
 	}
 
-	*used = wr;
-#if EXPENSIVE_CHECKS
-	assert(wr <= orig_used);
-	for (size_t i = 1; i < *used; i++) {
-		assert(buf[i - 1] < buf[i]);
+	/* Figure out what the min and max values are, because
+	 * when the difference between them is not too large it
+	 * can be significantly faster to avoid qsort here. */
+	fsm_state_t min = (fsm_state_t)-1;
+	fsm_state_t max = 0;
+	for (size_t i = 0; i < orig_used; i++) {
+		const fsm_state_t cur = buf[i];
+		if (cur < min) { min = cur; }
+		if (cur > max) { max = cur; }
 	}
+
+	/* If there's only one unique value, then we're done. */
+	if (min == max) {
+		buf[0] = min;
+		*used = 1;
+		return;
+	}
+
+/* 81920 = 10 KB buffer on the stack. This must be divisible by 64.
+ * Set to 0 to disable. */
+#define QSORT_CUTOFF 81920
+
+	if (QSORT_CUTOFF == 0 || max - min > QSORT_CUTOFF) {
+		/* If the bitset would be very large but sparse due to
+		 * extreme values, then fall back on using qsort and
+		 * then sweeping over the array to squash out
+		 * duplicates. */
+		qsort(buf, orig_used, sizeof(buf[0]), cmp_fsm_state_t);
+
+		/* squash out duplicates */
+		size_t rd = 1;
+		size_t wr = 1;
+		while (rd < orig_used) {
+			if (buf[rd - 1] == buf[rd]) {
+				rd++;	/* skip */
+			} else {
+				buf[wr] = buf[rd];
+				rd++;
+				wr++;
+			}
+		}
+
+		*used = wr;
+#if EXPENSIVE_CHECKS
+		assert(wr <= orig_used);
+		for (size_t i = 1; i < *used; i++) {
+			assert(buf[i - 1] < buf[i]);
+		}
 #endif
+	} else {
+		/* Convert the array into a bitset and back, which sorts
+		 * and deduplicates in the process. Add 1 to avoid a zero-
+		 * zero-length array error if QSORT_CUTOFF is 0. */
+		uint64_t bitset[QSORT_CUTOFF/64 + 1];
+		const size_t words = u64bitset_words(max - min + 1);
+		memset(bitset, 0x00, words * sizeof(bitset[0]));
+
+		for (size_t i = 0; i < orig_used; i++) {
+			u64bitset_set(bitset, buf[i] - min);
+		}
+
+		size_t dst = 0;
+		for (size_t i = 0; i < words; i++) {
+			const uint64_t w = bitset[i];
+			if (w != 0) { /* skip empty words */
+				uint64_t bit = 0x1;
+				for (size_t b_i = 0; b_i < 64; b_i++, bit <<= 1) {
+					if (w & bit) {
+						buf[dst] = 64*i + b_i + min;
+						dst++;
+					}
+				}
+			}
+		}
+		*used = dst;
+	}
 }
 
 static int
diff --git a/src/libfsm/determinise_internal.h b/src/libfsm/determinise_internal.h
index 8fe35fcd8..856f8baf8 100644
--- a/src/libfsm/determinise_internal.h
+++ b/src/libfsm/determinise_internal.h
@@ -75,19 +75,6 @@ struct map_iter {
 	size_t i;
 };
 
-struct reverse_mapping {
-	unsigned count;
-	unsigned ceil;
-	fsm_state_t *list;
-};
-
-struct det_copy_capture_actions_env {
-	char tag;
-	struct fsm *dst;
-	struct reverse_mapping *reverse_mappings;
-	int ok;
-};
-
 #define MAPPINGSTACK_DEF_CEIL 16
 struct mappingstack {
 	const struct fsm_alloc *alloc;
@@ -289,6 +276,10 @@ analyze_closures__grow_dst(struct analyze_closures_env *env);
 static int
 analyze_closures__grow_outputs(struct analyze_closures_env *env);
 
+static int
+remap_end_metadata(const struct fsm *src_fsm, const struct state_set *src_set,
+	struct fsm *dst_fsm, fsm_state_t dst_state);
+
 static int
 map_add(struct map *map,
 	fsm_state_t dfastate, interned_state_set_id iss, struct mapping **new_mapping);
@@ -306,22 +297,9 @@ map_first(struct map *map, struct map_iter *iter);
 static struct mapping *
 map_next(struct map_iter *iter);
 
-static int
-add_reverse_mapping(const struct fsm_alloc *alloc,
-	struct reverse_mapping *reverse_mappings,
-	fsm_state_t dfastate, fsm_state_t nfa_state);
-
-static int
-det_copy_capture_actions(struct reverse_mapping *reverse_mappings,
-	struct fsm *dst, struct fsm *src);
-
 static int
 grow_map(struct map *map);
 
-static int
-remap_capture_actions(struct map *map, struct interned_state_set_pool *issp,
-	struct fsm *dst_dfa, struct fsm *src_nfa);
-
 static struct mappingstack *
 stack_init(const struct fsm_alloc *alloc);
 
diff --git a/src/libfsm/endids.c b/src/libfsm/endids.c
index 444ccbc2e..8da8e3371 100644
--- a/src/libfsm/endids.c
+++ b/src/libfsm/endids.c
@@ -4,10 +4,47 @@
  * See LICENCE for the full copyright terms.
  */
 
+#include <stdlib.h>
+#include <stdint.h>
+
 #include <stddef.h>
 #include <inttypes.h>
 
-#include "endids_internal.h"
+#include <fsm/alloc.h>
+#include <fsm/capture.h>
+#include <fsm/fsm.h>
+#include <fsm/pred.h>
+
+#include <adt/hash.h>
+#include <adt/stateset.h>
+
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+
+#include "internal.h"
+#include "endids.h"
+
+#define BUCKET_NO_STATE ((fsm_state_t)-1)
+#define DEF_BUCKET_COUNT 4
+#define DEF_BUCKET_ID_COUNT 16
+
+struct endid_info {
+	/* Add-only hash table, with a state ID and an associated
+	 * non-empty ordered array of unique end IDs. The state is the
+	 * key. Grows when the buckets are more than half full. */
+	unsigned bucket_count;
+	unsigned buckets_used;
+
+	struct endid_info_bucket {
+		fsm_state_t state;
+		struct end_info_ids {
+			unsigned count;
+			unsigned ceil;
+			fsm_end_id_t ids[1];
+		} *ids;
+	} *buckets;
+};
 
 #define LOG_ENDIDS 0
 
@@ -84,6 +121,14 @@ fsm_setendid(struct fsm *fsm, fsm_end_id_t id)
 	return 1;
 }
 
+int
+fsm_setendid_state(struct fsm *fsm, fsm_state_t s, fsm_end_id_t id)
+{
+	enum fsm_endid_set_res sres;
+	sres = fsm_endid_set(fsm, s, id);
+	return sres != FSM_ENDID_SET_ERROR_ALLOC_FAIL;
+}
+
 enum fsm_getendids_res
 fsm_getendids(const struct fsm *fsm, fsm_state_t end_state,
     size_t id_buf_count, fsm_end_id_t *id_buf,
@@ -778,13 +823,16 @@ struct carry_env {
 };
 
 static int
-carry_iter_cb(fsm_state_t state, fsm_end_id_t id, void *opaque)
+carry_iter_cb(const struct fsm *fsm, fsm_state_t state,
+	size_t nth, fsm_end_id_t id, void *opaque)
 {
 	enum fsm_endid_set_res sres;
 	struct carry_env *env = opaque;
 	assert(env->tag == 'C');
 
+	(void)fsm;
 	(void)state;
+	(void)nth;
 
 	sres = fsm_endid_set(env->dst, env->dst_state, id);
 	if (sres == FSM_ENDID_SET_ERROR_ALLOC_FAIL) {
@@ -839,6 +887,72 @@ fsm_endid_carry(const struct fsm *src_fsm, const struct state_set *src_set,
 	return 1;
 }
 
+/* Make a new hash table, copying over converted entries and/or discarding. */
+int
+fsm_endid_compact(struct fsm *fsm,
+    const fsm_state_t *mapping, size_t mapping_count)
+{
+	struct endid_info *info = fsm->endid_info;
+	const size_t ocount = info->bucket_count;
+	const size_t ncount = ocount; /* does not need to grow */
+	struct endid_info_bucket *obuckets = info->buckets;
+	struct endid_info_bucket *nbuckets = f_malloc(fsm->opt->alloc,
+	    ncount * sizeof(nbuckets[0]));
+	const size_t nmask = ncount - 1;
+	size_t ob_i, nb_i;
+	size_t moved = 0;
+
+#if LOG_ENDIDS > 3
+	fprintf(stderr, "fsm_endid_compact: rehashing mapped entries\n");
+#endif
+
+	if (nbuckets == NULL) {
+		return 0;
+	}
+
+	for (nb_i = 0; nb_i < ncount; nb_i++) {		/* clear table */
+		nbuckets[nb_i].state = BUCKET_NO_STATE;
+	}
+
+	for (ob_i = 0; ob_i < ocount; ob_i++) {
+		const struct endid_info_bucket *ob = &obuckets[ob_i];
+		uint64_t hash;
+		fsm_state_t nstate;
+
+		if (ob->state == BUCKET_NO_STATE) {
+			continue;
+		}
+
+		assert(ob->state < mapping_count);
+		nstate = mapping[ob->state];
+
+		if (nstate == FSM_STATE_REMAP_NO_STATE) {
+			info->buckets_used--;		  /* discarded */
+			continue;
+		}
+
+		hash = hash_id(nstate);
+		for (nb_i = 0; nb_i < ncount; nb_i++) {
+			struct endid_info_bucket *nb = &nbuckets[(hash + nb_i) & nmask];
+			if (nb->state == BUCKET_NO_STATE) {
+				nb->state = nstate;
+				nb->ids = ob->ids;
+				moved++;
+				break;
+			} else {
+				continue; /* collision */
+			}
+		}
+	}
+
+	assert(moved == info->buckets_used);
+
+	f_free(fsm->opt->alloc, info->buckets);
+	info->bucket_count = ncount;
+	info->buckets = nbuckets;
+	return 1;
+}
+
 void
 fsm_endid_iter(const struct fsm *fsm,
     fsm_endid_iter_cb *cb, void *opaque)
@@ -867,7 +981,7 @@ fsm_endid_iter(const struct fsm *fsm,
 		count = b->ids->count;
 
 		for (id_i = 0; id_i < count; id_i++) {
-			if (!cb(b->state, b->ids->ids[id_i], opaque)) {
+			if (!cb(fsm, b->state, id_i, b->ids->ids[id_i], opaque)) {
 				break;
 			}
 
@@ -969,7 +1083,7 @@ fsm_endid_iter_state(const struct fsm *fsm, fsm_state_t state,
 				fprintf(stderr, "fsm_endid_iter_state[%d], ids[%ld] -> %d\n",
 				    b->state, id_i, b->ids->ids[id_i]);
 #endif
-				if (!cb(b->state, b->ids->ids[id_i], opaque)) {
+				if (!cb(fsm, b->state, id_i, b->ids->ids[id_i], opaque)) {
 					return;
 				}
 				id_i++;
@@ -991,10 +1105,13 @@ struct dump_env {
 };
 
 static int
-dump_cb(fsm_state_t state, const fsm_end_id_t id, void *opaque)
+dump_cb(const struct fsm *fsm, fsm_state_t state,
+    size_t nth, const fsm_end_id_t id, void *opaque)
 {
 	struct dump_env *env = opaque;
-	fprintf(env->f, "state[%u]: %u\n", state, id);
+	fprintf(env->f, "endids: state[%u]: %u\n", state, id);
+	(void)fsm;
+	(void)nth;
 	return 1;
 }
 
diff --git a/src/libfsm/endids.h b/src/libfsm/endids.h
index 6c46567b3..c43e3ffdf 100644
--- a/src/libfsm/endids.h
+++ b/src/libfsm/endids.h
@@ -58,10 +58,15 @@ int
 fsm_endid_carry(const struct fsm *src_fsm, const struct state_set *src_set,
     struct fsm *dst_fsm, fsm_state_t dst_state);
 
+int
+fsm_endid_compact(struct fsm *fsm,
+    const fsm_state_t *mapping, size_t mapping_count);
+
 /* Callback when iterating over the endids.
  * Return 0 to halt, or non-zero to continue. */
 typedef int
-fsm_endid_iter_cb(fsm_state_t state, const fsm_end_id_t id, void *opaque);
+fsm_endid_iter_cb(const struct fsm *fsm, fsm_state_t state,
+    size_t nth, const fsm_end_id_t id, void *opaque);
 
 void
 fsm_endid_iter(const struct fsm *fsm,
diff --git a/src/libfsm/endids_internal.h b/src/libfsm/endids_internal.h
deleted file mode 100644
index 27450af3b..000000000
--- a/src/libfsm/endids_internal.h
+++ /dev/null
@@ -1,43 +0,0 @@
-#ifndef ENDIDS_INTERNAL_H
-#define ENDIDS_INTERNAL_H
-
-#include <stdlib.h>
-#include <stdint.h>
-
-#include <fsm/alloc.h>
-#include <fsm/capture.h>
-#include <fsm/fsm.h>
-#include <fsm/pred.h>
-
-#include <adt/hash.h>
-#include <adt/stateset.h>
-
-#include <string.h>
-#include <assert.h>
-#include <errno.h>
-
-#include "internal.h"
-#include "endids.h"
-
-#define BUCKET_NO_STATE ((fsm_state_t)-1)
-#define DEF_BUCKET_COUNT 4
-#define DEF_BUCKET_ID_COUNT 16
-
-struct endid_info {
-	/* Add-only hash table, with a state ID and an associated
-	 * non-empty ordered array of unique end IDs. The state is the
-	 * key. Grows when the buckets are more than half full. */
-	unsigned bucket_count;
-	unsigned buckets_used;
-
-	struct endid_info_bucket {
-		fsm_state_t state;
-		struct end_info_ids {
-			unsigned count;
-			unsigned ceil;
-			fsm_end_id_t ids[1];
-		} *ids;
-	} *buckets;
-};
-
-#endif
diff --git a/src/libfsm/epsilons.c b/src/libfsm/epsilons.c
index e87d9d974..52b73db6c 100644
--- a/src/libfsm/epsilons.c
+++ b/src/libfsm/epsilons.c
@@ -12,6 +12,7 @@
 #include <fsm/pred.h>
 
 #include <adt/alloc.h>
+#include <adt/queue.h>
 #include <adt/set.h>
 #include <adt/edgeset.h>
 #include <adt/stateset.h>
@@ -21,52 +22,79 @@
 #include "endids.h"
 
 #define DUMP_EPSILON_CLOSURES 0
-#define DEF_PENDING_CAPTURE_ACTIONS_CEIL 2
 #define LOG_RM_EPSILONS_CAPTURES 0
-#define DEF_CARRY_ENDIDS_COUNT 2
+#define LOG_COPYING 0
+#define LOG_RESULT 0
 
-struct remap_env {
-	char tag;
-	const struct fsm_alloc *alloc;
-	struct state_set **rmap;
-	int ok;
-
-	size_t count;
-	size_t ceil;
-	struct remap_action {
-		fsm_state_t state;
-		enum capture_action_type type;
-		unsigned capture_id;
-		fsm_state_t to;
-	} *actions;
-};
+/* #define DEF_CARRY_ENDIDS_COUNT 2 */
+/* #define DEF_CARRY_CAPTUREIDS_COUNT 2 */
 
-static int
-remap_capture_actions(struct fsm *nfa, struct state_set **eclosures);
+#if LOG_RESULT
+#include <fsm/print.h>
+#endif
 
-static int
-remap_capture_action_cb(fsm_state_t state,
-    enum capture_action_type type, unsigned capture_id, fsm_state_t to,
-    void *opaque);
+#define DEF_END_METADATA_ENDIDS_CEIL 4
+#define DEF_END_METADATA_CAPTUREIDS_CEIL 4
+#define DEF_END_METADATA_PROGRAMIDS_CEIL 4
+struct carry_end_metadata_env {
+	struct fsm *fsm;
+	const struct fsm_alloc *alloc;
+
+	struct {
+		size_t ceil;
+		fsm_end_id_t *ids;
+	} end;
+	struct {
+		int ok;
+		size_t count;
+		size_t ceil;
+		unsigned *ids;
+	} capture;
+	struct {
+		int ok;
+		size_t count;
+		size_t ceil;
+		uint32_t *ids;
+	} program;
+};
 
 static int
-carry_endids(struct fsm *fsm, struct state_set *states,
-    fsm_state_t s);
+carry_end_metadata(struct carry_end_metadata_env *env,
+    fsm_state_t end_state, fsm_state_t dst_state);
 
 int
 fsm_remove_epsilons(struct fsm *nfa)
 {
+#if LOG_RESULT
+	fprintf(stderr, "==== before\n");
+	fsm_print_fsm(stderr, nfa);
+	fsm_capture_dump(stderr, "#### before_remove_epsilons", nfa);
+	fprintf(stderr, "====\n");
+#endif
+
 	const size_t state_count = fsm_countstates(nfa);
 	int res = 0;
 	struct state_set **eclosures = NULL;
-	fsm_state_t s;
+	fsm_state_t s, start_id;
+	const struct fsm_alloc *alloc = nfa->opt->alloc;
 
 	INIT_TIMERS();
 
+	struct carry_end_metadata_env em_env = { 0 };
+	em_env.fsm = nfa;
+	em_env.alloc = alloc;
+
 	assert(nfa != NULL);
 
+	if (!fsm_getstart(nfa, &start_id)) {
+		goto cleanup;
+	}
+
+	/* TODO: This could successfully exit early if none of the
+	 * states have epsilon edges. */
+
 	TIME(&pre);
-	eclosures = epsilon_closure(nfa);
+	eclosures = fsm_epsilon_closure(nfa);
 	TIME(&post);
 	DIFF_MSEC("epsilon_closure", pre, post, NULL);
 
@@ -107,20 +135,14 @@ fsm_remove_epsilons(struct fsm *nfa)
 			 * end states.
 			 *
 			 * Similarly, any end state metadata on states
-			 * in its epsilon-closure is copied to it.
-			 *
-			 * Capture actions are copied in a later pass. */
+			 * in its epsilon-closure is copied to it. */
 			if (fsm_isend(nfa, es_id)) {
 #if LOG_COPYING
 				fprintf(stderr, "remove_epsilons: setting end on %d (due to %d)\n", s, es_id);
 #endif
 				fsm_setend(nfa, s, 1);
 
-				/*
-				 * Carry through end IDs, if present. This isn't anything to do
-				 * with the NFA conversion; it's meaningful only to the caller.
-				 */
-				if (!carry_endids(nfa, eclosures[s], s)) {
+				if (!carry_end_metadata(&em_env, es_id, s)) {
 					goto cleanup;
 				}
 			}
@@ -150,14 +172,7 @@ fsm_remove_epsilons(struct fsm *nfa)
 		state->epsilons = NULL;
 	}
 
-#if LOG_RESULT
-	fprintf(stderr, "=== %s: about to update capture actions\n", __func__);
-	fsm_print_fsm(stderr, nfa);
-#endif
-
-	if (!remap_capture_actions(nfa, eclosures)) {
-		goto cleanup;
-	}
+	fsm_capture_integrity_check(nfa);
 
 #if LOG_RESULT
 	fsm_print_fsm(stderr, nfa);
@@ -167,255 +182,144 @@ fsm_remove_epsilons(struct fsm *nfa)
 	res = 1;
 cleanup:
 	if (eclosures != NULL) {
-		closure_free(eclosures, state_count);
-	}
-
-	return res;
-}
-
-static int
-remap_capture_actions(struct fsm *nfa, struct state_set **eclosures)
-{
-	int res = 0;
-	fsm_state_t s, i;
-	struct state_set **rmap;
-	struct state_iter si;
-	fsm_state_t si_s;
-	struct remap_env env = { 'R', NULL, NULL, 1, 0, 0, NULL };
-	env.alloc = nfa->opt->alloc;
-
-	/* build a reverse mapping */
-	rmap = f_calloc(nfa->opt->alloc, nfa->statecount, sizeof(rmap[0]));
-	if (rmap == NULL) {
-		goto cleanup;
+		fsm_closure_free(eclosures, state_count);
 	}
-
-	for (s = 0; s < nfa->statecount; s++) {
-		if (eclosures[s] == NULL) { continue; }
-		for (state_set_reset(eclosures[s], &si); state_set_next(&si, &si_s); ) {
-			if (si_s == s) {
-				continue; /* ignore identical states */
-			}
-#if LOG_RM_EPSILONS_CAPTURES
-			fprintf(stderr, "remap_capture_actions: %u <- %u\n",
-			    s, si_s);
-#endif
-			if (!state_set_add(&rmap[si_s], nfa->opt->alloc, s)) {
-				goto cleanup;
-			}
-		}
+	if (em_env.end.ids != NULL) {
+		f_free(alloc, em_env.end.ids);
 	}
-	env.rmap = rmap;
-
-	/* Iterate over the current set of actions with the reverse
-	 * mapping (containing only states which will be skipped,
-	 * collecting info about every new capture action that will need
-	 * to be added.
-	 *
-	 * It can't be added during the iteration, because that would
-	 * modify the hash table as it's being iterated over. */
-	fsm_capture_action_iter(nfa, remap_capture_action_cb, &env);
-
-	/* Now that we're done iterating, add those actions. */
-	for (i = 0; i < env.count; i++) {
-		const struct remap_action *a = &env.actions[i];
-		if (!fsm_capture_add_action(nfa, a->state, a->type,
-			a->capture_id, a->to)) {
-			goto cleanup;
-		}
+	if (em_env.program.ids != NULL) {
+		f_free(alloc, em_env.program.ids);
 	}
-
-	res = 1;
-
-cleanup:
-	if (env.actions != NULL) {
-		f_free(nfa->opt->alloc, env.actions);
+	if (em_env.capture.ids != NULL) {
+		f_free(alloc, em_env.capture.ids);
 	}
 
-	if (rmap != NULL) {
-		for (i = 0; i < nfa->statecount; i++) {
-			state_set_free(rmap[i]);
-		}
-		f_free(nfa->opt->alloc, rmap);
-	}
 	return res;
-
 }
 
 static int
-add_pending_capture_action(struct remap_env *env,
-    fsm_state_t state, enum capture_action_type type,
-    unsigned capture_id, fsm_state_t to)
+collect_captureid_cb(fsm_state_t state, unsigned id, void *opaque)
 {
-	struct remap_action *a;
-	if (env->count == env->ceil) {
-		struct remap_action *nactions;
-		const size_t nceil = (env->ceil == 0
-		    ? DEF_PENDING_CAPTURE_ACTIONS_CEIL : 2*env->ceil);
-		assert(nceil > 0);
-		nactions = f_realloc(env->alloc,
-		    env->actions,
-		    nceil * sizeof(nactions[0]));
-		if (nactions == NULL) {
-			return 0;
-		}
-
-		env->ceil = nceil;
-		env->actions = nactions;
-	}
+	struct carry_end_metadata_env *env = opaque;
+	(void)state;
 
-	a = &env->actions[env->count];
-#if LOG_RM_EPSILONS_CAPTURES
-	fprintf(stderr, "add_pending_capture_action: state %d, type %s, capture_id %u, to %d\n",
-	    state, fsm_capture_action_type_name[type], capture_id, to);
-#endif
+	if (env->capture.count == env->capture.ceil) {
+		const size_t nceil = (env->capture.ceil == 0)
+		    ? DEF_END_METADATA_CAPTUREIDS_CEIL
+		    : 2 * env->capture.ceil;
+		unsigned *nids;
+		assert(nceil > env->capture.ceil);
+		nids = f_realloc(env->alloc, env->capture.ids,
+		    nceil * sizeof(env->capture.ids[0]));
+		if (nids == NULL) {
+			env->capture.ok = 0;
+			return 0;
+ 		}
+		env->capture.ceil = nceil;
+		env->capture.ids = nids;
+ 	}
 
-	a->state = state;
-	a->type = type;
-	a->capture_id = capture_id;
-	a->to = to;
-	env->count++;
+	env->capture.ids[env->capture.count] = id;
+	env->capture.count++;
 	return 1;
 }
 
 static int
-remap_capture_action_cb(fsm_state_t state,
-    enum capture_action_type type, unsigned capture_id, fsm_state_t to,
-    void *opaque)
+collect_progid_cb(fsm_state_t state, unsigned id, void *opaque)
 {
-	struct state_iter si;
-	fsm_state_t si_s;
-	struct remap_env *env = opaque;
-	assert(env->tag == 'R');
-
-#if LOG_RM_EPSILONS_CAPTURES
-	fprintf(stderr, "remap_capture_action_cb: state %d, type %s, capture_id %u, to %d\n",
-	    state, fsm_capture_action_type_name[type], capture_id, to);
-#endif
-
-	for (state_set_reset(env->rmap[state], &si); state_set_next(&si, &si_s); ) {
-		struct state_iter si_to;
-		fsm_state_t si_tos;
-
-#if LOG_RM_EPSILONS_CAPTURES
-		fprintf(stderr, " -- rcac: state %d -> %d\n", state, si_s);
-#endif
-
-		if (!add_pending_capture_action(env, si_s, type, capture_id, to)) {
-			goto fail;
-		}
-
-		if (to == CAPTURE_NO_STATE) {
-			continue;
-		}
-
-		for (state_set_reset(env->rmap[to], &si_to); state_set_next(&si, &si_tos); ) {
-#if LOG_RM_EPSILONS_CAPTURES
-			fprintf(stderr, " -- rcac:     to %d -> %d\n", to, si_tos);
-#endif
-
-			if (!add_pending_capture_action(env, si_tos, type, capture_id, to)) {
-				goto fail;
-			}
-
-		}
-	}
+	struct carry_end_metadata_env *env = opaque;
+	uint32_t prog_id = (uint32_t)id;
+ 	(void)state;
+
+	if (env->program.count == env->program.ceil) {
+		const size_t nceil = (env->program.ceil == 0)
+		    ? DEF_END_METADATA_PROGRAMIDS_CEIL
+		    : 2 * env->program.ceil;
+		unsigned *nids;
+		assert(nceil > env->program.ceil);
+		nids = f_realloc(env->alloc, env->program.ids,
+		    nceil * sizeof(env->program.ids[0]));
+ 		if (nids == NULL) {
+			env->program.ok = 0;
+			return 0;
+ 		}
+		env->program.ceil = nceil;
+		env->program.ids = nids;
+ 	}
 
+	env->program.ids[env->program.count] = prog_id;
+	env->program.count++;
 	return 1;
-
-fail:
-	env->ok = 0;
-	return 0;
 }
 
-struct collect_env {
-	char tag;
-	const struct fsm_alloc *alloc;
-	size_t count;
-	size_t ceil;
-	fsm_end_id_t *ids;
-	int ok;
-};
-
+/* Because we're modifying the FSM in place, we can't iterate and add
+ * new entries -- it could lead to the underlying hash table resizing.
+ * Instead, collect, then add in a second pass. */
 static int
-collect_cb(fsm_state_t state, fsm_end_id_t id, void *opaque)
+carry_end_metadata(struct carry_end_metadata_env *env,
+    fsm_state_t end_state, fsm_state_t dst_state)
 {
-	struct collect_env *env = opaque;
-	assert(env->tag == 'E');
-
-	(void)state;
-
-	if (env->count == env->ceil) {
-		const size_t nceil = 2 * env->ceil;
-		fsm_end_id_t *nids;
-		assert(nceil > env->ceil);
-		nids = f_realloc(env->alloc, env->ids,
-		    nceil * sizeof(*env->ids));
-		if (nids == NULL) {
-			env->ok = 0;
-			return 0;
+	size_t i;
+	const size_t id_count = fsm_getendidcount(env->fsm, end_state);
+	if (id_count > 0) { /* copy end IDs */
+		enum fsm_getendids_res id_res;
+		size_t written;
+		if (id_count > env->end.ceil) { /* grow buffer */
+			size_t nceil = (env->end.ceil == 0)
+			    ? DEF_END_METADATA_ENDIDS_CEIL
+			    : 2*env->end.ceil;
+			while (nceil < id_count) {
+				nceil *= 2;
+			}
+			assert(nceil > 0);
+			fsm_end_id_t *nids = f_realloc(env->alloc,
+			    env->end.ids, nceil * sizeof(env->end.ids[0]));
+			if (nids == NULL) {
+				return 0;
+			}
+			env->end.ids = nids;
+			env->end.ceil = nceil;
 		}
-		env->ceil = nceil;
-		env->ids = nids;
-	}
 
-	env->ids[env->count] = id;
-	env->count++;
+		id_res = fsm_getendids(env->fsm, end_state,
+		    id_count, env->end.ids, &written);
+		assert(id_res == FSM_GETENDIDS_FOUND);
+		assert(written == id_count);
 
-	return 1;
-}
-
-/* fsm_remove_epsilons can't use fsm_endid_carry directly, because the src
- * and dst FSMs are the same -- that would lead to adding entries to a
- * hash table, possibly causing it to resize, while iterating over it.
- *
- * Instead, collect entries that need to be added (if not already
- * present), and then add them in a second pass. */
-static int
-carry_endids(struct fsm *fsm, struct state_set *states,
-    fsm_state_t dst_state)
-{
-	struct state_iter it;
-	fsm_state_t s;
-	size_t i;
+		for (i = 0; i < id_count; i++) {
+#if LOG_COPYING
+			fprintf(stderr, "carry_end_metadata: setting end ID %u on %d (due to %d)\n",
+			    env->end.ids[i], dst_state, end_state);
+#endif
+			if (!fsm_setendid_state(env->fsm, dst_state, env->end.ids[i])) {
+				return 0;
+			}
+ 		}
+	}
 
-	struct collect_env env;
-	env.tag = 'E';		/* for fsm_remove_epsilons */
-	env.alloc = fsm->opt->alloc;
-	env.count = 0;
-	env.ceil = DEF_CARRY_ENDIDS_COUNT;
-	env.ids = f_malloc(fsm->opt->alloc,
-	    env.ceil * sizeof(*env.ids));
-	if (env.ids == NULL) {
+	env->capture.ok = 1;
+	env->capture.count = 0;
+	fsm_capture_iter_active_for_end_state(env->fsm, end_state,
+	    collect_captureid_cb, env);
+	if (!env->capture.ok) {
 		return 0;
 	}
-	env.ok = 1;
-
-	/* collect from states */
-	for (state_set_reset(states, &it); state_set_next(&it, &s); ) {
-		if (!fsm_isend(fsm, s)) {
-			continue;
-		}
-
-		fsm_endid_iter_state(fsm, s, collect_cb, &env);
-		if (!env.ok) {
-			goto cleanup;
+	for (i = 0; i < env->capture.count; i++) {
+		if (!fsm_capture_set_active_for_end(env->fsm,
+			env->capture.ids[i], dst_state)) {
+			return 0;
 		}
 	}
 
-	/* add them */
-	for (i = 0; i < env.count; i++) {
-		enum fsm_endid_set_res sres;
-		sres = fsm_endid_set(fsm, dst_state, env.ids[i]);
-		if (sres == FSM_ENDID_SET_ERROR_ALLOC_FAIL) {
-			env.ok = 0;
-			goto cleanup;
+	env->program.count = 0;
+	fsm_capture_iter_program_ids_for_end_state(env->fsm, end_state,
+	    collect_progid_cb, env);
+	for (i = 0; i < env->program.count; i++) {
+		if (!fsm_capture_associate_program_with_end_state(env->fsm,
+			env->program.ids[i], dst_state)) {
+			return 0;
 		}
 	}
 
-cleanup:
-	f_free(fsm->opt->alloc, env.ids);
-
-	return env.ok;
+	return 1;
 }
-
diff --git a/src/libfsm/exec.c b/src/libfsm/exec.c
index 9f7b21802..47d27a50e 100644
--- a/src/libfsm/exec.c
+++ b/src/libfsm/exec.c
@@ -12,6 +12,7 @@
 #include <fsm/fsm.h>
 #include <fsm/capture.h>
 #include <fsm/pred.h>
+#include <fsm/print.h>
 #include <fsm/walk.h>
 
 #include <adt/set.h>
@@ -25,7 +26,6 @@
 
 static int
 transition(const struct fsm *fsm, fsm_state_t state, int c,
-	size_t offset, struct fsm_capture *captures,
 	fsm_state_t *next)
 {
 	assert(state < fsm->statecount);
@@ -35,31 +35,72 @@ transition(const struct fsm *fsm, fsm_state_t state, int c,
 		return 0;
 	}
 
-	if (captures != NULL && fsm_capture_has_capture_actions(fsm, state)) {
-		fsm_capture_update_captures(fsm, state, *next,
-		    offset, captures);
-	}
-
 	return 1;
 }
 
 int
 fsm_exec(const struct fsm *fsm,
-	int (*fsm_getc)(void *opaque), void *opaque,
-	fsm_state_t *end, struct fsm_capture *captures)
+	int (*fsm_getc)(void *opaque), void *opaque, fsm_state_t *end)
 {
 	fsm_state_t state;
 	int c;
 	size_t offset = 0;
-	unsigned i;
-	size_t capture_count;
 
 	assert(fsm != NULL);
 	assert(fsm_getc != NULL);
 	assert(end != NULL);
 
-	capture_count = fsm_countcaptures(fsm);
+	/* TODO: check prerequisites; that it has literal edges, DFA, etc */
+
+	/* TODO: pass struct of callbacks to call during each event; transitions etc */
+
+	if (!fsm_all(fsm, fsm_isdfa)) {
+		errno = EINVAL;
+		return -1;
+	}
+
+	if (!fsm_getstart(fsm, &state)) {
+		errno = EINVAL;
+		return -1;
+	}
+
+#if LOG_EXEC
+	fprintf(stderr, "fsm_exec: starting at %d\n", state);
+#endif
+
+	while (c = fsm_getc(opaque), c != EOF) {
+		if (!transition(fsm, state, c, &state)) {
+#if LOG_EXEC
+			fprintf(stderr, "fsm_exec: edge not found\n");
+#endif
+			return 0;
+		}
 
+#if LOG_EXEC
+		fprintf(stderr, "fsm_exec: @ %zu, input '%c', new state %u\n",
+		    offset, c, state);
+#endif
+		offset++;
+	}
+
+	if (!fsm_isend(fsm, state)) {
+		return 0;
+	}
+
+	*end = state;
+	return 1;
+}
+
+int
+fsm_exec_with_captures(const struct fsm *fsm, const unsigned char *input,
+	size_t input_length, fsm_state_t *end,
+	struct fsm_capture *captures, size_t capture_buf_length)
+{
+	fsm_state_t state;
+	size_t offset = 0;
+
+	assert(fsm != NULL);
+	assert(end != NULL);
 	/* TODO: check prerequisites; that it has literal edges, DFA, etc */
 
 	/* TODO: pass struct of callbacks to call during each event; transitions etc */
@@ -74,17 +115,26 @@ fsm_exec(const struct fsm *fsm,
 		return -1;
 	}
 
-	for (i = 0; i < capture_count; i++) {
-		captures[i].pos[0] = FSM_CAPTURE_NO_POS;
-		captures[i].pos[1] = FSM_CAPTURE_NO_POS;
+	if (captures != NULL) {
+		const size_t capture_ceil = fsm_capture_ceiling(fsm);
+		if (capture_buf_length < capture_ceil) {
+			errno = EINVAL;
+			return -1;
+		}
+
+		for (size_t i = 0; i < capture_ceil; i++) {
+			captures[i].pos[0] = FSM_CAPTURE_NO_POS;
+			captures[i].pos[1] = FSM_CAPTURE_NO_POS;
+		}
 	}
 
 #if LOG_EXEC
 	fprintf(stderr, "fsm_exec: starting at %d\n", state);
 #endif
 
-	while (c = fsm_getc(opaque), c != EOF) {
-		if (!transition(fsm, state, c, offset, captures, &state)) {
+	while (offset < input_length) {
+		const unsigned char c = input[offset];
+		if (!transition(fsm, state, c, &state)) {
 #if LOG_EXEC
 			fprintf(stderr, "fsm_exec: edge not found\n");
 #endif
@@ -102,15 +152,15 @@ fsm_exec(const struct fsm *fsm,
 		return 0;
 	}
 
-	/* Check for capture actions on end state */
-	if (captures != NULL && fsm_capture_has_capture_actions(fsm, state)) {
-		fsm_capture_update_captures(fsm, state, NEXT_STATE_END,
-		    offset, captures);
+	/* Resolve captures associated with the end state. */
+	if (captures != NULL) {
+		if (!fsm_capture_resolve_during_exec(fsm, state,
+			input, offset, captures, capture_buf_length)) {
+			assert(errno != 0);
+			return -1;
+		}
 	}
 
-	fsm_capture_finalize_captures(fsm, capture_count, captures);
-
 	*end = state;
 	return 1;
 }
-
diff --git a/src/libfsm/internal.h b/src/libfsm/internal.h
index 6e77510a7..5d372e30f 100644
--- a/src/libfsm/internal.h
+++ b/src/libfsm/internal.h
@@ -52,10 +52,6 @@ struct fsm_edge {
 struct fsm_state {
 	unsigned int end:1;
 
-	/* If 0, then this state has no need for checking
-	 * the fsm->capture_info struct. */
-	unsigned int has_capture_actions:1;
-
 	/* meaningful within one particular transformation only */
 	unsigned int visited:1;
 
@@ -90,10 +86,10 @@ state_hasnondeterminism(const struct fsm *fsm, fsm_state_t state, struct bm *bm)
  * for states, with wrapper to populate malloced array of user-facing structs.
  */
 struct state_set **
-epsilon_closure(struct fsm *fsm);
+fsm_epsilon_closure(struct fsm *fsm);
 
 void
-closure_free(struct state_set **closures, size_t n);
+fsm_closure_free(struct state_set **closures, size_t n);
 
 /*
  * Internal free function that invokes free(3) by default, or a user-provided
diff --git a/src/libfsm/libfsm.syms b/src/libfsm/libfsm.syms
index 415bffbea..a8a0976ca 100644
--- a/src/libfsm/libfsm.syms
+++ b/src/libfsm/libfsm.syms
@@ -108,6 +108,7 @@ fsm_shortest
 fsm_example
 
 fsm_exec
+fsm_exec_with_captures
 
 # exec
 fsm_fgetc
@@ -117,8 +118,8 @@ fsm_print_cfrag # XXX: workaround for lx
 make_ir # XXX: workaround for lx
 free_ir # XXX: workaround for lx
 
-epsilon_closure # XXX: workaround for fsm
-closure_free # XXX: workaround for fsm
+fsm_epsilon_closure # XXX: workaround for fsm
+fsm_closure_free # XXX: workaround for fsm
 
 fsm_mergeab
 
@@ -129,11 +130,19 @@ fsm_vm_match_buffer
 fsm_vm_match_file
 
 # <fsm/capture.h>
-fsm_countcaptures
-fsm_capture_has_capture_actions
+fsm_capture_ceiling
 fsm_capture_set_path
 fsm_capture_rebase_capture_id
-fsm_capture_alloc
+fsm_capture_alloc_capture_buffer
+fsm_capture_free_capture_buffer
 fsm_capture_dump
+fsm_capture_set_active_for_end
+fsm_capture_add_program
+fsm_capture_associate_program_with_end_state
+
+# capture_vm*
+fsm_capvm_program_free
+fsm_capvm_program_dump
+fsm_capvm_program_exec
 
 fsm_minimise_test_oracle
diff --git a/src/libfsm/merge.c b/src/libfsm/merge.c
index fd4d35ee5..7abe8cf25 100644
--- a/src/libfsm/merge.c
+++ b/src/libfsm/merge.c
@@ -17,25 +17,27 @@
 #include <adt/set.h>
 #include <adt/edgeset.h>
 #include <adt/stateset.h>
+#include <adt/u64bitset.h>
 
 #include "capture.h"
+#include "capture_vm.h"
 #include "internal.h"
 #include "endids.h"
 
 #define LOG_MERGE_ENDIDS 0
-
-struct copy_capture_env {
-	char tag;
-	struct fsm *dst;
-	int ok;
-};
+#define LOG_COPY_CAPTURE_PROGRAMS 0
 
 static int
-copy_capture_actions(struct fsm *dst, struct fsm *src);
+copy_end_metadata(struct fsm *dst, struct fsm *src,
+    fsm_state_t base_src, unsigned capture_base_src);
 
 static int
 copy_end_ids(struct fsm *dst, struct fsm *src, fsm_state_t base_src);
 
+static int
+copy_active_capture_ids(struct fsm *dst, struct fsm *src,
+    fsm_state_t base_src, unsigned capture_base_src);
+
 static struct fsm *
 merge(struct fsm *dst, struct fsm *src,
 	fsm_state_t *base_dst, fsm_state_t *base_src,
@@ -72,17 +74,12 @@ merge(struct fsm *dst, struct fsm *src,
 		*base_dst = 0;
 		*base_src = dst->statecount;
 		*capture_base_dst = 0;
-		*capture_base_src = fsm_countcaptures(dst);
+		*capture_base_src = fsm_capture_ceiling(dst);
 
 		for (i = 0; i < src->statecount; i++) {
 			state_set_rebase(&src->states[i].epsilons, *base_src);
 			edge_set_rebase(&src->states[i].edges, *base_src);
 		}
-
-		/* FIXME: instead of rebasing these here, they could
-		 * also be updated in copy_capture_actions below. */
-		fsm_capture_rebase_capture_id(src, *capture_base_src);
-		fsm_capture_rebase_capture_action_states(src, *base_src);
 	}
 
 	memcpy(dst->states + dst->statecount, src->states,
@@ -90,22 +87,10 @@ merge(struct fsm *dst, struct fsm *src,
 	dst->statecount += src->statecount;
 	dst->endcount   += src->endcount;
 
-	/* We need to explicitly copy over the capture actions and end
-	 * ID info here because they're stored on the FSMs as a whole,
-	 * rather than individual states; `memcpy`ing the states alone
-	 * won't transfer them.
-	 *
-	 * They're stored separately because they are likely to only
-	 * be on a small portion of the states, and adding two extra
-	 * NULL pointers to `struct fsm_state` increases memory usage
-	 * significantly. */
-
-	if (!copy_capture_actions(dst, src)) {
-		/* non-recoverable -- destructive operation */
-		return NULL;
-	}
-
-	if (!copy_end_ids(dst, src, *base_src)) {
+	/* We need to explicitly copy over end metadata here. They're
+	 * stored separately because they are likely to only be on a
+	 * small portion of the states. */
+	if (!copy_end_metadata(dst, src, *base_src, *capture_base_src)) {
 		/* non-recoverable -- destructive operation */
 		return NULL;
 	}
@@ -123,16 +108,91 @@ merge(struct fsm *dst, struct fsm *src,
 	return dst;
 }
 
+struct copy_capture_programs_env {
+	const struct fsm_alloc *alloc;
+	const struct fsm *src;
+	struct fsm *dst;
+	int ok;
+	fsm_state_t state_base_src;
+	unsigned capture_base_src;
+
+#define DEF_MAPPING_CEIL 1
+	size_t mapping_used;
+	size_t mapping_ceil;
+	/* TODO: could cache last_map to check first if this becomes expensive */
+	struct prog_mapping {
+		unsigned src_prog_id;
+		unsigned dst_prog_id;
+	} *mappings;
+};
+
 static int
-copy_capture_cb(fsm_state_t state,
-    enum capture_action_type type, unsigned capture_id, fsm_state_t to,
+copy_capture_programs_cb(fsm_state_t src_state, unsigned src_prog_id,
     void *opaque)
 {
-	struct copy_capture_env *env = opaque;
-	assert(env->tag == 'C');
+	struct copy_capture_programs_env *env = opaque;
+
+	const fsm_state_t dst_state = src_state + env->state_base_src;
+	assert(dst_state < fsm_countstates(env->dst));
+
+#if LOG_COPY_CAPTURE_PROGRAMS
+	fprintf(stderr, "%s: src %p, dst %p, src_prog_id %u, src_state %d, dst_state %d, capture_base_src %u\n",
+	    __func__, (void *)env->src, (void *)env->dst,
+	    src_prog_id, src_state, dst_state, env->capture_base_src);
+#endif
+	int found = 0;
+	uint32_t dst_prog_id;
+
+	for (size_t i = 0; i < env->mapping_used; i++) {
+		const struct prog_mapping *m = &env->mappings[i];
+		if (m->src_prog_id == src_prog_id) {
+			dst_prog_id = m->dst_prog_id;
+			found = 1;
+		}
+	}
+
+	if (!found) {
+		if (env->mapping_used == env->mapping_ceil) { /* grow */
+			const size_t nceil = 2*env->mapping_ceil;
+			struct prog_mapping *nmappings = f_realloc(env->alloc,
+			    env->mappings, nceil * sizeof(nmappings[0]));
+			if (nmappings == NULL) {
+				env->ok = 0;
+				return 0;
+			}
+
+			env->mapping_ceil = nceil;
+			env->mappings = nmappings;
+		}
+
+		const struct capvm_program *p = fsm_capture_get_program_by_id(env->src,
+		    src_prog_id);
+		assert(p != NULL);
+
+		struct capvm_program *cp = capvm_program_copy(env->alloc, p);
+		if (cp == NULL) {
+			env->ok = 0;
+			return 0;
+		}
+		capvm_program_rebase(cp, env->capture_base_src);
+
+		/* add program, if not present */
+		if (!fsm_capture_add_program(env->dst,
+			cp, &dst_prog_id)) {
+			f_free(env->alloc, cp);
+			env->ok = 0;
+			return 0;
+		}
 
-	if (!fsm_capture_add_action(env->dst, state, type,
-		capture_id, to)) {
+		struct prog_mapping *m = &env->mappings[env->mapping_used];
+		m->src_prog_id = src_prog_id;
+		m->dst_prog_id = dst_prog_id;
+		env->mapping_used++;
+	}
+
+	/* associate with end states */
+	if (!fsm_capture_associate_program_with_end_state(env->dst,
+		dst_prog_id, dst_state)) {
 		env->ok = 0;
 		return 0;
 	}
@@ -141,18 +201,55 @@ copy_capture_cb(fsm_state_t state,
 }
 
 static int
-copy_capture_actions(struct fsm *dst, struct fsm *src)
+copy_capture_programs(struct fsm *dst, const struct fsm *src,
+	fsm_state_t state_base_src, unsigned capture_base_src)
 {
-	struct copy_capture_env env;
-	env.tag = 'C';
-	env.dst = dst;
-	env.ok = 1;
+	const struct fsm_alloc *alloc = src->opt->alloc;
+	struct prog_mapping *mappings = f_malloc(alloc,
+	    DEF_MAPPING_CEIL * sizeof(mappings[0]));
+	if (mappings == NULL) {
+		return 0;
+	}
 
-	fsm_capture_action_iter(src, copy_capture_cb, &env);
+	struct copy_capture_programs_env env = {
+		.alloc = alloc,
+		.src = src,
+		.dst = dst,
+		.ok = 1,
+		.state_base_src = state_base_src,
+		.capture_base_src = capture_base_src,
+		.mapping_ceil = DEF_MAPPING_CEIL,
+		.mappings = mappings,
+	};
+	fsm_capture_iter_program_ids_for_all_end_states(src,
+	    copy_capture_programs_cb, &env);
+
+	f_free(alloc, env.mappings);
 
 	return env.ok;
 }
 
+static int
+copy_end_metadata(struct fsm *dst, struct fsm *src,
+    fsm_state_t base_src, unsigned capture_base_src)
+{
+	/* TODO: inline */
+
+	if (!copy_end_ids(dst, src, base_src)) {
+		return 0;
+	}
+
+	if (!copy_active_capture_ids(dst, src, base_src, capture_base_src)) {
+		return 0;
+	}
+
+	if (!copy_capture_programs(dst, src, base_src, capture_base_src)) {
+		return 0;
+	}
+
+	return 1;
+}
+
 struct copy_end_ids_env {
 	char tag;
 	struct fsm *dst;
@@ -186,12 +283,50 @@ copy_end_ids(struct fsm *dst, struct fsm *src, fsm_state_t base_src)
 	struct copy_end_ids_env env;
 	env.tag = 'M';		/* for Merge */
 	env.dst = dst;
-	env.src = src;
 	env.base_src = base_src;
 
 	return fsm_endid_iter_bulk(src, copy_end_ids_cb, &env);
 }
 
+struct copy_active_capture_ids_env {
+	char tag;
+	struct fsm *dst;
+	fsm_state_t base_src;
+	unsigned capture_base_src;
+	int ok;
+};
+
+static int
+copy_active_capture_ids_cb(fsm_state_t state, unsigned capture_id, void *opaque)
+{
+	struct copy_active_capture_ids_env *env = opaque;
+	assert(env->tag == 'A');
+
+	if (!fsm_capture_set_active_for_end(env->dst,
+		capture_id + env->capture_base_src,
+		state + env->base_src)) {
+		env->ok = 0;
+		return 0;
+	}
+	return 1;
+}
+
+static int
+copy_active_capture_ids(struct fsm *dst, struct fsm *src,
+    fsm_state_t base_src, unsigned capture_base_src)
+{
+	struct copy_active_capture_ids_env env;
+	env.tag = 'A';
+	env.dst = dst;
+	env.base_src = base_src;
+	env.capture_base_src = capture_base_src;
+	env.ok = 1;
+
+	fsm_capture_iter_active_for_all_end_states(src,
+	    copy_active_capture_ids_cb, &env);
+	return env.ok;
+}
+
 struct fsm *
 fsm_mergeab(struct fsm *a, struct fsm *b,
 	fsm_state_t *base_b)
diff --git a/src/libfsm/minimise.c b/src/libfsm/minimise.c
index 60cec48c5..1ca4a36ac 100644
--- a/src/libfsm/minimise.c
+++ b/src/libfsm/minimise.c
@@ -22,6 +22,7 @@
 #endif
 
 #include <adt/edgeset.h>
+#include <adt/hash.h>
 #include <adt/pv.h>
 #include <adt/set.h>
 #include <adt/u64bitset.h>
@@ -38,6 +39,45 @@
 #include "minimise_internal.h"
 #include "minimise_test_oracle.h"
 
+static int
+label_sets_match(const uint64_t a[256/64], const uint64_t b[256/64]);
+
+static int
+split_ecs_by_end_metadata(struct min_env *env, const struct fsm *fsm);
+
+#if EXPENSIVE_CHECKS
+#include <fsm/print.h>
+
+static void
+check_done_ec_offset(const struct min_env *env);
+
+static int
+all_end_states_are_currently_together(const struct min_env *env);
+#endif
+
+#define DEF_CAPTURE_ID_CEIL 4
+struct end_metadata {
+	struct end_metadata_capture {
+		unsigned count;
+		unsigned ceil;
+		unsigned *ids;
+	} capture;
+
+	struct end_metadata_program {
+		unsigned count;
+		unsigned ceil;
+		unsigned *ids;
+	} program;
+};
+
+static int
+collect_capture_ids(const struct fsm *fsm, fsm_state_t s,
+	struct end_metadata_capture *c);
+
+static int
+collect_capture_program_ids(const struct fsm *fsm, fsm_state_t s,
+	struct end_metadata_program *p);
+
 int
 fsm_minimise(struct fsm *fsm)
 {
@@ -55,19 +95,36 @@ fsm_minimise(struct fsm *fsm)
 	assert(fsm != NULL);
 	assert(fsm_all(fsm, fsm_isdfa));
 
+#if LOG_INIT > 1
+	fprintf(stderr, "=== BEFORE TRIM, %d states\n", fsm_countstates(fsm));
+	fsm_print_fsm(stderr, fsm);
+	fsm_capture_dump(stderr, "#### pre_minimise", fsm);
+	fprintf(stderr, "=== BEFORE TRIM\n");
+#endif
+
 	/* The algorithm used below won't remove states without a path
 	 * to an end state, because it cannot prove they're
 	 * unnecessary, so they must be trimmed away first. */
+	TIME(&pre);
 	if (fsm_trim(fsm, FSM_TRIM_START_AND_END_REACHABLE,
 		&shortest_end_distance) < 0) {
 		return 0;
 	}
+	TIME(&post);
+	DIFF_MSEC("trim", pre, post, NULL);
 
 	if (fsm->statecount == 0) {
 		r = 1;
 		goto cleanup;
 	}
 
+#if LOG_INIT > 1
+	fprintf(stderr, "=== AFTER TRIM, %d states\n", fsm_countstates(fsm));
+	fprintf(stderr, "# pre_minimise\n");
+	fsm_print_fsm(stderr, fsm);
+	fsm_capture_dump(stderr, "#### pre_minimise", fsm);
+#endif
+
 	TIME(&pre);
 	collect_labels(fsm, labels, &label_count);
 	TIME(&post);
@@ -113,6 +170,8 @@ fsm_minimise(struct fsm *fsm)
 		goto cleanup;
 	}
 
+	fsm_capture_integrity_check(dst);
+
 #if EXPENSIVE_CHECKS
 	if (!fsm_capture_has_captures(fsm)) {
 		struct fsm *oracle = fsm_minimise_test_oracle(fsm);
@@ -253,6 +312,12 @@ build_minimised_mapping(const struct fsm *fsm,
 		goto cleanup;
 	}
 
+	/* This only needs to be run once, but must run before the main
+	 * fixpoint loop below, because it potentially refines ECs. */
+	if (!split_ecs_by_end_metadata(&env, fsm)) {
+		goto cleanup;
+	}
+
 #if LOG_INIT
 	for (i = 0; i < env.ec_count; i++) {
 		fprintf(stderr, "# --ec[%lu]: %d\n", i, env.ecs[i]);
@@ -329,7 +394,7 @@ build_minimised_mapping(const struct fsm *fsm,
 					}
 				}
 
-#if EXPENSIVE_INTEGRITY_CHECKS
+#if EXPENSIVE_CHECKS
 				check_done_ec_offset(&env);
 #endif
 			}
@@ -365,6 +430,12 @@ build_minimised_mapping(const struct fsm *fsm,
 	}
 #endif
 
+#if EXPENSIVE_CHECKS
+	for (i = 0; i < fsm->statecount; i++) {
+		assert(mapping[i] < fsm->statecount);
+	}
+#endif
+
 #if LOG_STEPS
 	fprintf(stderr, "# done in %lu iteration(s), %lu step(s), %ld -> %ld states, label_count %lu\n",
             env.iter, env.steps, fsm->statecount,
@@ -403,7 +474,7 @@ dump_ecs(FILE *f, const struct min_env *env)
 #endif
 }
 
-#if EXPENSIVE_INTEGRITY_CHECKS
+#if EXPENSIVE_CHECKS
 static void
 check_descending_EC_counts(const struct min_env *env)
 {
@@ -611,7 +682,7 @@ populate_initial_ecs(struct min_env *env, const struct fsm *fsm,
 	/* The dead state is not a member of any EC. */
 	env->state_ecs[env->dead_state] = NO_ID;
 
-#if EXPENSIVE_INTEGRITY_CHECKS
+#if EXPENSIVE_CHECKS
 	check_descending_EC_counts(env);
 #endif
 
@@ -646,7 +717,394 @@ populate_initial_ecs(struct min_env *env, const struct fsm *fsm,
 #endif
 }
 
-#if EXPENSIVE_INTEGRITY_CHECKS
+SUPPRESS_EXPECTED_UNSIGNED_INTEGER_OVERFLOW()
+static void
+incremental_hash_of_ids(uint64_t *accum, fsm_end_id_t id)
+{
+	(*accum) += hash_id(id);
+}
+
+static int
+same_end_metadata(const struct end_metadata *a, const struct end_metadata *b)
+{
+	if (a->capture.count != b->capture.count) {
+		return 0;
+	}
+
+	if (a->program.count != b->program.count) {
+		return 0;
+	}
+
+	/* compare -- these must be sorted */
+
+	for (size_t i = 0; i < a->capture.count; i++) {
+		if (a->capture.ids[i] != b->capture.ids[i]) {
+			return 0;
+		}
+	}
+	for (size_t i = 0; i < a->program.count; i++) {
+		if (a->program.ids[i] != b->program.ids[i]) {
+			return 0;
+		}
+	}
+
+	return 1;
+}
+
+static int
+split_ecs_by_end_metadata(struct min_env *env, const struct fsm *fsm)
+{
+	int res = 0;
+
+	struct end_metadata *end_md;
+	fsm_state_t *htab = NULL;
+
+	const size_t state_count = fsm_countstates(fsm);
+
+#if EXPENSIVE_CHECKS
+	/* Invariant: For each EC, either all or none of the states
+	 * are end states. We only partition the set(s) of end states
+	 * here. */
+	assert(all_end_states_are_currently_together(env));
+#endif
+
+	/* Use the hash table to assign to new groups. */
+
+	end_md = f_calloc(fsm->opt->alloc,
+	    state_count, sizeof(end_md[0]));
+	if (end_md == NULL) {
+		goto cleanup;
+	}
+
+	size_t bucket_count = 1;
+	while (bucket_count < state_count) {
+		bucket_count *= 2; /* power of 2 ceiling */
+	}
+	const size_t mask = bucket_count - 1;
+
+	htab = f_malloc(fsm->opt->alloc,
+	    bucket_count * sizeof(htab[0]));
+	if (htab == NULL) {
+		goto cleanup;
+	}
+
+	/* First pass: collect end state metadata */
+	for (size_t ec_i = 0; ec_i < env->ec_count; ec_i++) {
+		fsm_state_t s = MASK_EC_HEAD(env->ecs[ec_i]);
+#if LOG_ECS
+		fprintf(stderr, "## EC %zu\n", ec_i);
+#endif
+		while (s != NO_ID) {
+			struct end_metadata *e = &end_md[s];
+			if (!fsm_isend(fsm, s)) {
+				break; /* this EC has non-end states, skip */
+			}
+
+			if (!collect_capture_ids(fsm, s, &e->capture)) {
+				goto cleanup;
+			}
+
+			if (!collect_capture_program_ids(fsm, s, &e->program)) {
+				goto cleanup;
+			}
+
+			s = env->jump[s];
+		}
+	}
+
+#if LOG_ECS
+	fprintf(stderr, "==== BEFORE PARTITIONING BY END METADATA\n");
+	dump_ecs(stderr, env);
+	fprintf(stderr, "====\n");
+#endif
+
+	/* FIXME: is this actually the right behavior? */
+	/* Second pass: partition ECs into groups with identical end IDs.
+	 * for each group with different end IDs, unlink them. */
+	const size_t max_ec = env->ec_count;
+	for (size_t ec_i = 0; ec_i < max_ec; ec_i++) {
+		fsm_state_t s = MASK_EC_HEAD(env->ecs[ec_i]);
+		fsm_state_t prev = NO_ID;
+
+		for (size_t i = 0; i < bucket_count; i++) {
+			htab[i] = NO_ID; /* reset hash table */
+		}
+
+		while (s != NO_ID) {
+			const struct end_metadata *s_md = &end_md[s];
+
+			uint64_t hash = 0;
+			const fsm_state_t next = env->jump[s];
+
+			for (size_t pid_i = 0; pid_i < s_md->program.count; pid_i++) {
+				incremental_hash_of_ids(&hash, s_md->program.ids[pid_i]);
+			}
+
+			for (size_t b_i = 0; b_i < bucket_count; b_i++) {
+				fsm_state_t *b = &htab[(b_i + hash) & mask];
+				const fsm_state_t other = *b;
+				const struct end_metadata *other_md = &end_md[other];
+
+				if (other == NO_ID) { /* empty hash bucket */
+					*b = s;
+					if (prev == NO_ID) {
+						/* keep the first state, along with other states
+						 * with matching end IDs, in this EC. no-op. */
+#if LOG_ECS
+						fprintf(stderr, " -- keeping state s %d in EC %u\n",
+						    s, env->state_ecs[s]);
+#endif
+						prev = s;
+					} else { /* not first (prev is set), so it landed somewhere else */
+						/* unlink and assign new EC */
+#if LOG_ECS
+						fprintf(stderr, " -- moving state s %d from EC %u to EC %u\n",
+						    s, env->state_ecs[s], env->ec_count);
+#endif
+						env->jump[prev] = env->jump[s]; /* unlink */
+						env->ecs[env->ec_count] = s;    /* head of new EC */
+						env->state_ecs[s] = env->ec_count;
+						env->jump[s] = NO_ID;
+						env->ec_count++;
+					}
+					break;
+				} else if (same_end_metadata(s_md, other_md)) {
+					if (env->state_ecs[other] == ec_i) {
+						/* keep in the current EC -- no-op */
+#if LOG_ECS
+						fprintf(stderr, " -- keeping state s %d in EC %u\n",
+						    s, env->state_ecs[s]);
+#endif
+						prev = s;
+					} else {
+						/* unlink and link to other state's EC */
+#if LOG_ECS
+						fprintf(stderr, " -- appending s %d to EC %u, after state %d, before %d\n",
+						    s, env->state_ecs[other], other, env->jump[other]);
+#endif
+						assert(prev != NO_ID);
+						env->jump[prev] = env->jump[s]; /* unlink */
+						env->state_ecs[s] = env->state_ecs[other];
+						env->jump[s] = env->jump[other];
+						env->jump[other] = s; /* link after other */
+					}
+					break;
+				} else {
+					continue; /* collision */
+				}
+			}
+
+			s = next;
+		}
+
+		/* If this EC only has one entry and it's before the
+		 * done_ec_offset, then set that here so that invariants
+		 * will be restored while sweeping forward after this loop. */
+
+		if (env->jump[MASK_EC_HEAD(env->ecs[ec_i])] == NO_ID && ec_i < env->done_ec_offset) {
+			env->done_ec_offset = ec_i; /* will be readjusted later */
+		}
+
+#if LOG_ECS
+		fprintf(stderr, "==== AFTER PARTITIONING BY END METADATA -- EC %zu\n", ec_i);
+		dump_ecs(stderr, env);
+		fprintf(stderr, "==== (done_ec_offset: %d)\n", env->done_ec_offset);
+#endif
+	}
+
+#if LOG_ECS
+	fprintf(stderr, "==== AFTER PARTITIONING BY END IDs\n");
+	dump_ecs(stderr, env);
+	fprintf(stderr, "==== (done_ec_offset: %d)\n", env->done_ec_offset);
+#endif
+
+	/* Sweep forward and swap ECs as necessary so all single-entry
+	 * ECs are at the end -- they're done. */
+	size_t ec_i = env->done_ec_offset;
+
+	while (ec_i < env->ec_count) {
+		const fsm_state_t head = MASK_EC_HEAD(env->ecs[ec_i]);
+		if (env->jump[head] == NO_ID) {
+			/* offset stays where it is */
+#if LOG_ECS
+			fprintf(stderr, "ec_i: %zu / %u -- branch a\n", ec_i, env->ec_count);
+#endif
+			env->ecs[ec_i] = SET_SMALL_EC_FLAG(head);
+		} else {
+			/* this EC has more than one state, but is after
+			 * the done_ec_offset, so swap it with an EC at
+			 * the boundary. */
+			const fsm_state_t n_ec_i = env->done_ec_offset;
+#if LOG_ECS
+			fprintf(stderr, "ec_i: %zu / %u -- branch b -- swap %ld and %d\n",
+			    ec_i, env->ec_count, ec_i, n_ec_i);
+#endif
+
+			/* swap ec[n_ec_i] and ec[ec_i] */
+			const fsm_state_t tmp = env->ecs[ec_i];
+			env->ecs[ec_i] = env->ecs[n_ec_i];
+			env->ecs[n_ec_i] = tmp;
+			/* note: this may set the SMALL_EC_FLAG. */
+			update_ec_links(env, ec_i);
+			update_ec_links(env, n_ec_i);
+			env->done_ec_offset++;
+		}
+		ec_i++;
+	}
+
+#if LOG_ECS
+	fprintf(stderr, "==== (done_ec_offset is now: %d, ec_count %u)\n", env->done_ec_offset, env->ec_count);
+	dump_ecs(stderr, env);
+#endif
+
+	/* check that all ECs are before/after done_ec_offset */
+	for (size_t ec_i = 0; ec_i < env->ec_count; ec_i++) {
+		const fsm_state_t s = MASK_EC_HEAD(env->ecs[ec_i]);
+#if LOG_ECS
+		fprintf(stderr, "  -- ec_i %zu: s %d\n", ec_i, s);
+#endif
+		if (ec_i < env->done_ec_offset) {
+			assert(env->jump[s] != NO_ID);
+		} else {
+			assert(env->jump[s] == NO_ID);
+		}
+	}
+
+	res = 1;
+
+cleanup:
+	if (htab != NULL) {
+		f_free(fsm->opt->alloc, htab);
+	}
+	if (end_md != NULL) {
+		size_t i;
+		for (i = 0; i < state_count; i++) {
+			struct end_metadata *e = &end_md[i];
+			if (e->capture.ids != NULL) {
+				f_free(fsm->opt->alloc, e->capture.ids);
+			}
+			if (e->program.ids != NULL) {
+				f_free(fsm->opt->alloc, e->program.ids);
+			}
+		}
+		f_free(fsm->opt->alloc, end_md);
+	}
+
+	return res;
+}
+
+static int
+cmp_unsigned(const void *pa, const void *pb)
+{
+	const unsigned a = *(unsigned *)pa;
+	const unsigned b = *(unsigned *)pb;
+	return a < b ? -1 : a > b ? 1 : 0;
+}
+
+struct collect_capture_env {
+	int ok;
+	const struct fsm_alloc *alloc;
+	struct end_metadata_capture *c;
+	struct end_metadata_program *p;
+};
+
+static int
+collect_capture_cb(fsm_state_t state, unsigned capture_id,
+    void *opaque)
+{
+	struct collect_capture_env *env = opaque;
+	struct end_metadata_capture *c = env->c;
+	(void)state;
+	if (c->count == c->ceil) {
+		const size_t nceil = (c->count == 0)
+		    ? DEF_CAPTURE_ID_CEIL
+		    : 2*c->ceil;
+		unsigned *nids = f_realloc(env->alloc, c->ids, nceil * sizeof(nids[0]));
+		if (nids == NULL) {
+			env->ok = 0;
+			return 0;
+		}
+		c->ids = nids;
+		c->ceil = nceil;
+	}
+
+	c->ids[c->count] = capture_id;
+	c->count++;
+	return 1;
+}
+
+static int
+collect_capture_ids(const struct fsm *fsm, fsm_state_t s,
+	struct end_metadata_capture *c)
+{
+	struct collect_capture_env env = {
+		.ok = 1,
+		.alloc = fsm->opt->alloc,
+		.c = c,
+	};
+	fsm_capture_iter_active_for_end_state(fsm, s,
+	    collect_capture_cb, &env);
+
+	if (env.ok) {
+		if (c->ids == NULL) {
+			assert(c->count == 0);
+		} else {
+			qsort(c->ids, c->count, sizeof(c->ids[0]), cmp_unsigned);
+		}
+	}
+
+	return env.ok;
+}
+
+static int
+collect_capture_program_ids_cb(fsm_state_t state, unsigned prog_id,
+    void *opaque)
+{
+	struct collect_capture_env *env = opaque;
+	struct end_metadata_program *p = env->p;
+	(void)state;
+	if (p->count == p->ceil) {
+		const size_t nceil = (p->count == 0)
+		    ? DEF_CAPTURE_ID_CEIL
+		    : 2*p->ceil;
+		unsigned *nids = f_realloc(env->alloc, p->ids, nceil * sizeof(nids[0]));
+		if (nids == NULL) {
+			env->ok = 0;
+			return 0;
+		}
+		p->ids = nids;
+		p->ceil = nceil;
+	}
+
+	p->ids[p->count] = prog_id;
+	p->count++;
+	return 1;
+}
+
+static int
+collect_capture_program_ids(const struct fsm *fsm, fsm_state_t s,
+	struct end_metadata_program *p)
+{
+	struct collect_capture_env env = {
+		.ok = 1,
+		.alloc = fsm->opt->alloc,
+		.p = p,
+	};
+	fsm_capture_iter_program_ids_for_end_state(fsm, s,
+	    collect_capture_program_ids_cb, &env);
+
+	if (env.ok) {
+		if (p->ids == NULL) {
+			assert(p->count == 0);
+		} else {
+			qsort(p->ids, p->count, sizeof(p->ids[0]), cmp_unsigned);
+		}
+	}
+
+	return env.ok;
+}
+
+#if EXPENSIVE_CHECKS
 static void
 check_done_ec_offset(const struct min_env *env)
 {
@@ -661,13 +1119,34 @@ check_done_ec_offset(const struct min_env *env)
 	 * worth the added complexity to avoid checking ECs 0 and 1. */
 	for (i = 0; i < env->ec_count; i++) {
 		const fsm_state_t head = MASK_EC_HEAD(env->ecs[i]);
-		if (i >= done_ec_offset) {
+		if (i >= env->done_ec_offset) {
 			assert(head == NO_ID || env->jump[head] == NO_ID);
 		} else if (i >= 2) {
 			assert(env->jump[head] != NO_ID);
 		}
 	}
 }
+
+static int
+all_end_states_are_currently_together(const struct min_env *env)
+{
+	/* For each EC, either all or none of the states in it
+	 * are end states. */
+	for (size_t i = 0; i < env->ec_count; i++) {
+		const fsm_state_t head = MASK_EC_HEAD(env->ecs[i]);
+		const int ec_first_is_end = fsm_isend(env->fsm, head);
+
+		fsm_state_t s = env->jump[head];
+		while (s != NO_ID) {
+			if (fsm_isend(env->fsm, s) != ec_first_is_end) {
+				return 0;
+			}
+			s = env->jump[s];
+		}
+	}
+
+	return 1;
+}
 #endif
 
 static int
@@ -817,7 +1296,7 @@ try_partition(struct min_env *env, unsigned char label,
 	const unsigned dead_state_ec = env->state_ecs[env->dead_state];
 	const struct fsm_state *states = env->fsm->states;
 
-#if EXPENSIVE_INTEGRITY_CHECKS
+#if EXPENSIVE_CHECKS
 	/* Count states here, to compare against the partitioned
 	 * EC' counts later. */
 	size_t state_count = 0, psrc_count, pdst_count;
@@ -857,7 +1336,7 @@ try_partition(struct min_env *env, unsigned char label,
 		first_ec = dead_state_ec;
 	}
 #if LOG_PARTITIONS > 1
-		fprintf(stderr, "# --- try_partition: label '%c' -> EC %d\n", label, first_ec);
+	fprintf(stderr, "# --- try_partition: label '%c' -> first_ec %d\n", label, first_ec);
 #endif
 
 	partition_counts[0] = 1;
@@ -897,7 +1376,7 @@ try_partition(struct min_env *env, unsigned char label,
 			partition_counts[0]++;
 			prev = cur;
 			cur = env->jump[cur];
-		} else {	/* unlink, split */
+		} else if (to_ec != first_ec) { /* definitely different destination EC: unlink, split */
 			fsm_state_t next;
 #if LOG_PARTITIONS > 1
 			fprintf(stderr, "# try_partition: unlinking -- label '%c', src %u, dst %u, first_ec %d, cur %u -> to_ec %d\n", label, ec_src, ec_dst, first_ec, cur, to_ec);
@@ -912,10 +1391,21 @@ try_partition(struct min_env *env, unsigned char label,
 			env->ecs[ec_dst] = cur;
 			cur = next;
 			partition_counts[1]++;
+		} else {
+			/* Restrict the ones that will be marked as checked
+			 * to the common subset before continuing, so that any
+			 * other labels will still be checked in a later pass. */
+			for (size_t i = 0; i < 4; i++) {
+				checked_labels[i] &= cur_label_set[i];
+			}
+
+			partition_counts[0]++;
+			prev = cur;
+			cur = env->jump[cur];
 		}
 	}
 
-#if EXPENSIVE_INTEGRITY_CHECKS
+#if EXPENSIVE_CHECKS
 	/* Count how many states were split into each EC
 	 * and check that the sum matches the original count. */
 	psrc_count = 0;
diff --git a/src/libfsm/mode.c b/src/libfsm/mode.c
index 76c60b8ad..87af0bdf9 100644
--- a/src/libfsm/mode.c
+++ b/src/libfsm/mode.c
@@ -28,6 +28,7 @@ fsm_findmode(const struct fsm *fsm, fsm_state_t state, unsigned int *freq)
 	} mode;
 
 	mode.freq = 1;
+	mode.state = (fsm_state_t)-1;
 
 	edge_set_group_iter_reset(fsm->states[state].edges, EDGE_GROUP_ITER_ALL, &iter);
 	while (edge_set_group_iter_next(&iter, &info)) {
@@ -46,6 +47,9 @@ fsm_findmode(const struct fsm *fsm, fsm_state_t state, unsigned int *freq)
 		*freq = mode.freq;
 	}
 
+	/* It's not meaningful to call this on a state without edges. */
+	assert(mode.state != (fsm_state_t)-1);
+
 	assert(mode.freq >= 1);
 	return mode.state;
 }
diff --git a/src/libfsm/print/Makefile b/src/libfsm/print/Makefile
index c2911318b..9c6f42bbf 100644
--- a/src/libfsm/print/Makefile
+++ b/src/libfsm/print/Makefile
@@ -20,6 +20,9 @@ SRC += src/libfsm/print/vmasm.c
 .for src in ${SRC:Msrc/libfsm/print/*.c}
 CFLAGS.${src} += -I src # XXX: for internal.h
 DFLAGS.${src} += -I src # XXX: for internal.h
+
+CFLAGS.${src} += -std=c99
+DFLAGS.${src} += -std=c99
 .endfor
 
 .for src in ${SRC:Msrc/libfsm/print/*.c}
diff --git a/src/libfsm/state.c b/src/libfsm/state.c
index acf2bff25..c4425077a 100644
--- a/src/libfsm/state.c
+++ b/src/libfsm/state.c
@@ -17,6 +17,8 @@
 #include <adt/edgeset.h>
 
 #include "internal.h"
+#include "capture.h"
+#include "endids.h"
 
 int
 fsm_addstate(struct fsm *fsm, fsm_state_t *state)
@@ -33,17 +35,12 @@ fsm_addstate(struct fsm *fsm, fsm_state_t *state)
 		const size_t factor = 2; /* a guess */
 		const size_t n = fsm->statealloc * factor;
 		struct fsm_state *tmp;
-		size_t i;
 
 		tmp = f_realloc(fsm->opt->alloc, fsm->states, n * sizeof *fsm->states);
 		if (tmp == NULL) {
 			return 0;
 		}
 
-		for (i = fsm->statealloc; i < n; i++) {
-			tmp[i].has_capture_actions = 0;
-		}
-
 		fsm->statealloc = n;
 		fsm->states = tmp;
 	}
@@ -253,6 +250,18 @@ fsm_compact_states(struct fsm *fsm,
 		}
 	}
 
+	if (!fsm_endid_compact(fsm, mapping, orig_statecount)) {
+		goto error;
+	}
+
+	if (!fsm_capture_id_compact(fsm, mapping, orig_statecount)) {
+		goto error;
+	}
+
+	if (!fsm_capture_program_association_compact(fsm, mapping, orig_statecount)) {
+		goto error;
+	}
+
 	assert(dst == kept);
 	assert(kept == fsm->statecount);
 
@@ -278,4 +287,9 @@ fsm_compact_states(struct fsm *fsm,
 		*removed = removed_count;
 	}
 	return 1;
+
+error:
+	f_free(fsm->opt->alloc, mapping);
+
+	return 0;
 }
diff --git a/src/libfsm/trim.c b/src/libfsm/trim.c
index 6a9a25f09..36bf9145d 100644
--- a/src/libfsm/trim.c
+++ b/src/libfsm/trim.c
@@ -10,13 +10,16 @@
 
 #include <fsm/fsm.h>
 #include <fsm/pred.h>
+#include <fsm/print.h>
 
 #include <adt/set.h>
 #include <adt/edgeset.h>
+#include <adt/pv.h>
 #include <adt/stateset.h>
 #include <adt/queue.h>
 
 #include "internal.h"
+#include "capture.h"
 
 #define DEF_EDGES_CEIL 8
 #define DEF_ENDS_CEIL 8
@@ -42,17 +45,18 @@ save_edge(const struct fsm_alloc *alloc,
     size_t *count, size_t *ceil, struct edge **edges,
     fsm_state_t from, fsm_state_t to);
 
-static int
-cmp_edges_by_to(const void *pa, const void *pb)
+static fsm_state_t
+get_max_to(const struct edge *edges, size_t edge_count)
 {
-	const struct edge *a = (const struct edge *)pa;
-	const struct edge *b = (const struct edge *)pb;
-
-	return a->to < b->to ? -1
-	    : a->to > b->to ? 1
-	    : a->from < b->from ? -1
-	    : a->from > b->from ? 1
-	    : 0;
+	size_t i;
+	fsm_state_t res = edges[0].to;
+	for (i = 1; i < edge_count; i++) {
+		const fsm_state_t to = edges[i].to;
+		if (to > res) {
+			res = to;
+		}
+	}
+	return res;
 }
 
 static int
@@ -61,7 +65,7 @@ mark_states(struct fsm *fsm, enum fsm_trim_mode mode,
 {
 	/* Use a queue to walk breath-first over all states reachable
 	 * from the start state. Note all end states. Collect all the
-	 * edges, then sort them by the note they lead to, to convert it
+	 * edges, then sort them by the node they lead to, to convert it
 	 * to a reverse edge index. Then, enqueue all the end states,
 	 * and again use the queue to walk the graph breadth-first, but
 	 * this time iterating bottom-up from the end states, and mark
@@ -81,10 +85,13 @@ mark_states(struct fsm *fsm, enum fsm_trim_mode mode,
 	fsm_state_t max_end;
 
 	const size_t state_count = fsm->statecount;
+	fsm_state_t max_to;
 
+	unsigned *pv = NULL;
 	size_t *offsets = NULL;
+	INIT_TIMERS();
 
-	if (!fsm_getstart(fsm, &start)) {
+	if (!fsm_getstart(fsm, &start) || state_count == 0) {
 		return 1;	/* nothing is reachable */
 	}
 
@@ -225,7 +232,33 @@ mark_states(struct fsm *fsm, enum fsm_trim_mode mode,
 	}
 
 	/* Sort edges by state they lead to, inverting the index. */
-	qsort(edges, edge_count, sizeof(edges[0]), cmp_edges_by_to);
+	max_to = edge_count == 0 ? 0 : get_max_to(edges, edge_count);
+#if LOG_TRIM
+	fprintf(stderr, " -- edge count %zu, got max_to %u\n", edge_count, max_to);
+#endif
+	TIME(&pre);
+	pv = permutation_vector_with_size_and_offset(fsm->opt->alloc,
+	    edge_count, max_to, edges, sizeof(edges[0]), offsetof(struct edge, to));
+	TIME(&post);
+	DIFF_MSEC("trim_pv_so", pre, post, NULL);
+
+	if (EXPENSIVE_CHECKS) {
+		size_t i;
+		int ok = 1;
+#if LOG_TRIM
+		fprintf(stderr, "\n#i\tedge\tpv\tsorted, max_to %u\n", max_to);
+#endif
+		for (i = 0; i < edge_count; i++) {
+#if LOG_TRIM
+			fprintf(stderr, "%zu\t%u\t%u\t%u\n",
+			    i, edges[i].to, pv[i], edges[pv[i]].to);
+#endif
+			if (i > 0 && edges[pv[i]].to < edges[pv[i - 1]].to) {
+				ok = 0;
+			}
+		}
+		assert(ok);
+	}
 
 	max_end = 0;
 
@@ -282,7 +315,6 @@ mark_states(struct fsm *fsm, enum fsm_trim_mode mode,
 	 * offsets[i - 1], to represent zero entries. */
 	{
 		size_t i;
-		const fsm_state_t max_to = edges[edge_count - 1].to;
 		const size_t offset_count = fsm_countstates(fsm);
 
 		offsets = f_calloc(fsm->opt->alloc,
@@ -292,7 +324,7 @@ mark_states(struct fsm *fsm, enum fsm_trim_mode mode,
 		}
 
 		for (i = 0; i < edge_count; i++) {
-			const fsm_state_t to = edges[i].to;
+			const fsm_state_t to = edges[pv[i]].to;
 			offsets[to] = i + 1;
 		}
 
@@ -312,8 +344,8 @@ mark_states(struct fsm *fsm, enum fsm_trim_mode mode,
 	if (LOG_TRIM > 1) {
 		size_t i;
 		for (i = 0; i < edge_count; i++) {
-		fprintf(stderr, "mark_states: edges[%zu]: %d -> %d\n",
-		    i, edges[i].from, edges[i].to);
+		fprintf(stderr, "mark_states: edges[pv[%zu]]: %d -> %d\n",
+		    i, edges[pv[i]].from, edges[pv[i]].to);
 		}
 	}
 
@@ -331,13 +363,13 @@ mark_states(struct fsm *fsm, enum fsm_trim_mode mode,
 		}
 
 		for (e_i = base; e_i < limit; e_i++) {
-			const fsm_state_t from = edges[e_i].from;
+			const fsm_state_t from = edges[pv[e_i]].from;
 			const unsigned end_distance = (sed == NULL
 			    ? 0 : sed[s_id]);
 			assert(from < state_count);
 
 			if (LOG_TRIM > 0) {
-				fprintf(stderr, "mark_states: edges[%zu]: from: %d, visited? %d\n",
+				fprintf(stderr, "mark_states: edges[pv[%zu]]: from: %d, visited? %d\n",
 				    e_i, from, fsm->states[from].visited);
 			}
 
@@ -370,6 +402,7 @@ mark_states(struct fsm *fsm, enum fsm_trim_mode mode,
 	if (ends != NULL) { f_free(fsm->opt->alloc, ends); }
 	if (offsets != NULL) { f_free(fsm->opt->alloc, offsets); }
 	if (q != NULL) { queue_free(q); }
+	if (pv != NULL) { f_free(fsm->opt->alloc, pv); }
 
 	return res;
 }
@@ -457,7 +490,7 @@ integrity_check(const char *descr, const struct fsm *fsm)
 	struct edge_iter edge_iter;
 	struct fsm_edge e;
 
-#ifdef NDEBUG
+#if defined(NDEBUG) || !EXPENSIVE_CHECKS
 	return;
 #endif
 
@@ -484,8 +517,14 @@ integrity_check(const char *descr, const struct fsm *fsm)
 		}
 	}
 
+	fsm_capture_integrity_check(fsm);
+
 	if (LOG_TRIM > 1) {
 		fprintf(stderr, "integrity check: %s...PASS\n", descr);
+		if (LOG_TRIM > 2) {
+			fsm_print_fsm(stderr, fsm);
+			fsm_capture_dump(stderr, "post_trim", fsm);
+		}
 	}
 }
 
@@ -504,6 +543,8 @@ fsm_trim(struct fsm *fsm, enum fsm_trim_mode mode,
 		return 1;
 	}
 
+	integrity_check("pre", fsm);
+
 	if (shortest_end_distance != NULL
 		&& mode == FSM_TRIM_START_AND_END_REACHABLE) {
 		size_t s_i;
diff --git a/src/libfsm/union.c b/src/libfsm/union.c
index 60e1fbaff..736292a8b 100644
--- a/src/libfsm/union.c
+++ b/src/libfsm/union.c
@@ -32,6 +32,7 @@ fsm_union(struct fsm *a, struct fsm *b,
 	if (combine_info == NULL) {
 		combine_info = &combine_info_internal;
 	}
+	memset(combine_info, 0x00, sizeof(*combine_info));
 
 	memset(combine_info, 0x00, sizeof(*combine_info));
 
@@ -105,6 +106,7 @@ fsm_union_array(size_t fsm_count,
 
 	for (i = 1; i < fsm_count; i++) {
 		struct fsm_combine_info ci;
+
 		struct fsm *combined = fsm_union(res, fsms[i], &ci);
 		fsms[i] = NULL;
 		if (combined == NULL) {
diff --git a/src/libfsm/vm/v1.c b/src/libfsm/vm/v1.c
index a326b88d8..de1f6ea93 100644
--- a/src/libfsm/vm/v1.c
+++ b/src/libfsm/vm/v1.c
@@ -217,7 +217,9 @@ encode_opasm_v1(const struct dfavm_vm_op *instr, size_t ninstr, size_t total_byt
 	return ret;
 
 error:
-	/* XXX - cleanup */
+	if (ret != NULL) {
+		free(ret);
+	}
 	return NULL;
 }
 
diff --git a/src/libfsm/vm/v2.c b/src/libfsm/vm/v2.c
index c85edff98..07eb12ef4 100644
--- a/src/libfsm/vm/v2.c
+++ b/src/libfsm/vm/v2.c
@@ -155,7 +155,10 @@ encode_opasm_v2(const struct dfavm_vm_op *instr, size_t ninstr)
 	return ret;
 
 error:
-	/* XXX - cleanup */
+	if (ret != NULL) {
+		free(ret);
+	}
+
 	return NULL;
 }
 
diff --git a/src/libre/Makefile b/src/libre/Makefile
index a88a92418..508b77a76 100644
--- a/src/libre/Makefile
+++ b/src/libre/Makefile
@@ -10,13 +10,17 @@ SRC += src/libre/ast_new_from_fsm.c
 SRC += src/libre/ast_rewrite.c
 SRC += src/libre/ac.c
 SRC += src/libre/re_strings.c
+SRC += src/libre/re_capvm_compile.c
 
 # generated
 SRC += src/libre/class_name.c
 
-.for src in ${SRC:Msrc/libre/ast_compile.c}
+.for src in ${SRC:Msrc/libre/ast*.c} ${SRC:Msrc/libre/re*.c}
 CFLAGS.${src} += -I src # XXX: for internal.h
 DFLAGS.${src} += -I src # XXX: for internal.h
+
+CFLAGS.${src} += -std=c99
+DFLAGS.${src} += -std=c99
 .endfor
 
 LIB        += libre
diff --git a/src/libre/ast.c b/src/libre/ast.c
index 2e1d21817..6bd6063f5 100644
--- a/src/libre/ast.c
+++ b/src/libre/ast.c
@@ -494,6 +494,7 @@ ast_expr_cmp(const struct ast_expr *a, const struct ast_expr *b)
 	case AST_EXPR_GROUP:
 		if (a->u.group.id < b->u.group.id) { return -1; }
 		if (a->u.group.id > b->u.group.id) { return +1; }
+		/* .repeated flag is ignored here */
 
 		return ast_expr_cmp(a->u.group.e, b->u.group.e);
 
@@ -753,6 +754,7 @@ ast_make_expr_group(struct ast_expr_pool **poolp, enum re_flags re_flags, struct
 	res->re_flags = re_flags;
 	res->u.group.e = e;
 	res->u.group.id = id;
+	res->u.group.repeated = 0; /* may be set during analysis */
 
 	return res;
 }
@@ -770,6 +772,7 @@ ast_make_expr_anchor(struct ast_expr_pool **poolp, enum re_flags re_flags, enum
 	res->type = AST_EXPR_ANCHOR;
 	res->re_flags = re_flags;
 	res->u.anchor.type = type;
+	res->u.anchor.is_end_nl = 0; /* may be set later */
 
 	return res;
 }
diff --git a/src/libre/ast.h b/src/libre/ast.h
index 233744847..3ef0c1f5f 100644
--- a/src/libre/ast.h
+++ b/src/libre/ast.h
@@ -7,6 +7,11 @@
 #ifndef RE_AST_H
 #define RE_AST_H
 
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <re/re.h>
+
 /*
  * This is a duplicate of struct lx_pos, but since we're linking to
  * code with several distinct lexers, there isn't a clear lexer.h
@@ -62,7 +67,9 @@ enum ast_anchor_type {
  *   followed by nullable nodes.
  *
  * - AST_FLAG_UNSATISFIABLE
- *   The node caused the regex to become unsatisfiable.
+ *   The node is unsatisfiable (can never match anything).
+ *   This can cause AST subtrees to be pruned, or for the
+ *   entire regex to become unsatisfiable.
  *
  * - AST_FLAG_NULLABLE
  *   The node is not always evaluated, such as nodes that
@@ -159,6 +166,8 @@ struct ast_expr {
 			size_t count; /* used */
 			size_t alloc; /* allocated */
 			struct ast_expr **n;
+			int contains_empty_groups;
+			int nullable_alt_inside_plus_repeat;
 		} alt;
 
 		struct {
@@ -172,12 +181,14 @@ struct ast_expr {
 		struct ast_expr_repeat {
 			struct ast_expr *e;
 			unsigned min;
-			unsigned max;
+			unsigned max; /* can be AST_COUNT_UNBOUNDED */
+			int contains_empty_groups;
 		} repeat;
 
 		struct {
 			struct ast_expr *e;
 			unsigned id;
+			int repeated; /* set during analysis */
 		} group;
 
 		struct {
@@ -235,9 +246,12 @@ ast_pool_free(struct ast_expr_pool *pool);
 struct ast_expr_pool *
 ast_expr_pool_save(void);
 
+#define AST_NO_MAX_CAPTURE_ID ((long)-1)
+
 struct ast {
 	struct ast_expr_pool *pool;
 	struct ast_expr *expr;
+	long max_capture_id;
 	int has_unanchored_start;
 	int has_unanchored_end;
 };
diff --git a/src/libre/ast_analysis.c b/src/libre/ast_analysis.c
index 3298f62c2..df9e8ce54 100644
--- a/src/libre/ast_analysis.c
+++ b/src/libre/ast_analysis.c
@@ -21,7 +21,7 @@
 
 #define LOG_ANALYSIS 0
 #define LOG_FIRST_ANALYSIS (0 + LOG_ANALYSIS)
-#define LOG_REPEATED_GROUPS (0 + LOG_ANALYSIS)
+#define LOG_REPETITION_CASES (0 + LOG_ANALYSIS)
 #define LOG_FORKING (0 + LOG_ANALYSIS)
 #define LOG_ANCHORING (0 + LOG_ANALYSIS)
 #define LOG_CONCAT_FLAGS (0 + LOG_ANALYSIS)
@@ -37,6 +37,11 @@
 /* Mask for end-anchor flags */
 #define END_ANCHOR_FLAG_MASK (AST_FLAG_ANCHORED_END | AST_FLAG_END_NL)
 
+struct capture_env {
+	long max_capture_id;
+	int use_captures;
+};
+
 static int
 is_nullable(const struct ast_expr *n)
 {
@@ -239,7 +244,7 @@ analysis_iter(struct ast_expr *n)
 	}
 
 	case AST_EXPR_ANCHOR:
-		/* anchor flags will be handled on the second pass */
+		/* anchor flags will be handled on the next pass */
 		break;
 
 	case AST_EXPR_SUBTRACT:
@@ -267,23 +272,280 @@ analysis_iter(struct ast_expr *n)
 	return AST_ANALYSIS_OK;
 }
 
-/* Analysis for unanchored starts/ends uses three values, because some
- * details decide the whole subtree is/isn't, others should defer to
- * analysis elsewhere it the tree, but an overall result of undecided
- * still decides yes. */
-enum unanchored_analysis_res {
-	UA_NO = 'N',
-	UA_YES = 'Y',
-	UA_UNDECIDED = 'U',
+static int
+is_only_anchors(struct ast_expr *expr)
+{
+	if (can_consume_input(expr)) { return 0; }
+
+	switch (expr->type) {
+	case AST_EXPR_ANCHOR:
+		return 1;
+
+	case AST_EXPR_CONCAT:
+		if (expr->u.concat.count == 0) { return 0; }
+		for (size_t i = 0; i < expr->u.concat.count; i++) {
+			if (!is_only_anchors(expr->u.concat.n[i])
+			    && can_consume_input(expr->u.concat.n[i])) {
+				return 0;
+			}
+		}
+		return 1;
+
+	case AST_EXPR_ALT:
+		assert(expr->u.alt.count > 0);
+		for (size_t i = 0; i < expr->u.alt.count; i++) {
+			/* earlier matches will shadow later ones */
+			if (is_only_anchors(expr->u.alt.n[i])) {
+				return 1;
+			}
+		}
+		return 0;
+
+	case AST_EXPR_REPEAT:
+		if (expr->u.repeat.min == 0 && expr->u.repeat.max == 0) {
+			return 0;
+		}
+		return is_only_anchors(expr->u.repeat.e);
+
+	case AST_EXPR_GROUP:
+		return is_only_anchors(expr->u.group.e);
+
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static enum ast_analysis_res
+analysis_iter_repetition(struct ast_expr *n, struct ast_expr *outermost_repeat_parent,
+	int shadowed_by_previous_alt_case, struct ast_expr *repeat_plus_ancestor)
+{
+	enum ast_analysis_res res = AST_ANALYSIS_OK;
+
+	LOG(3 - LOG_REPETITION_CASES, "%s: node %p, type %s, shadowed_by_previous_alt_case %d\n",
+	    __func__, (void *)n, ast_node_type_name(n->type), shadowed_by_previous_alt_case);
+
+	if (shadowed_by_previous_alt_case) {
+		assert(outermost_repeat_parent == NULL
+		    || outermost_repeat_parent->type == AST_EXPR_ALT);
+	}
+
+	switch (n->type) {
+	case AST_EXPR_EMPTY:
+	case AST_EXPR_TOMBSTONE:
+	case AST_EXPR_ANCHOR:
+	case AST_EXPR_LITERAL:
+	case AST_EXPR_CODEPOINT:
+	case AST_EXPR_RANGE:
+		break;
+
+	case AST_EXPR_CONCAT: {
+		/* If this CONCAT array of nodes always consumes input, then
+		 * it cannot be repeated empty, so it cannot produce the
+		 * special case that needs outermost_repeat_parent for
+		 * AST_EXPR_REPEAT's case below.
+		 *
+		 * An example input that needs this is 'x(()x)*' for "xx",
+		 * because the 'x' prevents the outermost group
+		 * from repeating and matching empty again after consuming
+		 * a run of "x"s. */
+		if (always_consumes_input(n)) {
+			outermost_repeat_parent = NULL;
+		}
+
+		for (size_t i = 0; i < n->u.concat.count; i++) {
+			res = analysis_iter_repetition(n->u.concat.n[i], outermost_repeat_parent,
+			    shadowed_by_previous_alt_case, repeat_plus_ancestor);
+			if (res != AST_ANALYSIS_OK) { return res; }
+		}
+		break;
+	}
+
+	case AST_EXPR_ALT: {
+		/* See AST_EXPR_GROUP below for why this matters. */
+		int new_shadowed_by_previous_alt_case = shadowed_by_previous_alt_case;
+
+		/* FIXME: check nesting of this construct */
+
+		LOG(3 - LOG_REPETITION_CASES,
+		    "%s: ALT node %p, repeat_plus_ancestor %p\n",
+		    __func__, (void *)n, (void *)repeat_plus_ancestor);
+
+		for (size_t i = 0; i < n->u.alt.count; i++) {
+			/* If this is an ALT inside of a repeated subtree that contains
+			 * a capture, this will need special handling. */
+			if (outermost_repeat_parent != NULL) {
+				LOG(3 - LOG_REPETITION_CASES,
+				    "%s: setting outermost_repeat_parent to %p for alt branch %zu, repeat_plus_ancestor %p\n",
+				    __func__, (void *)n, i, (void *)repeat_plus_ancestor);
+				outermost_repeat_parent = n;
+			}
+
+			if (is_nullable(n->u.alt.n[i]) || is_only_anchors(n->u.alt.n[i])) {
+				LOG(3 - LOG_REPETITION_CASES,
+				    "%s: setting new_shadowed_by_previous_alt_case for alt branch %zu, repeat_plus_ancestor %p\n",
+				    __func__, i, (void *)repeat_plus_ancestor);
+				new_shadowed_by_previous_alt_case = 1;
+				if (repeat_plus_ancestor != NULL) {
+					n->u.alt.nullable_alt_inside_plus_repeat = 1;
+					assert(repeat_plus_ancestor->type == AST_EXPR_REPEAT);
+					assert(repeat_plus_ancestor->u.repeat.min == 1);
+					assert(repeat_plus_ancestor->u.repeat.max == AST_COUNT_UNBOUNDED);
+
+					/* Repetition of an alt subtree which has a capture group child that
+					 * only contains only* anchors is not handled properly yet. This
+					 * isn't actually _useful_, it's just something that comes up
+					 * in fuzzing, so reject it as an unsupported PCRE construct.
+					 *
+					 * An example input that triggers this is '^(($)|)+$' . */
+					set_flags(n, AST_FLAG_UNSATISFIABLE);
+					return AST_ANALYSIS_ERROR_UNSUPPORTED_PCRE;
+				}
+			}
+
+			res = analysis_iter_repetition(n->u.alt.n[i],
+			    outermost_repeat_parent,
+			    new_shadowed_by_previous_alt_case,
+			    repeat_plus_ancestor);
+			if (res != AST_ANALYSIS_OK) { return res; }
+		}
+		break;
+	}
+
+	case AST_EXPR_REPEAT:
+	{
+		struct ast_expr *child = n->u.repeat.e;
+
+		LOG(3 - LOG_REPETITION_CASES, "%s: REPEAT node %p, min %u max %u nullable? %d, !cci %d\n",
+		    __func__, (void *)n, n->u.repeat.min, n->u.repeat.max,
+		    is_nullable(child), can_consume_input(child));
+
+		if (n->u.repeat.min == 1 &&
+		    n->u.repeat.max == AST_COUNT_UNBOUNDED) {
+			LOG(3 - LOG_REPETITION_CASES, "%s: setting repeat_plus_ancestor to %p\n",
+			    __func__, (void *)n);
+			repeat_plus_ancestor = n;
+		} else {
+			repeat_plus_ancestor = NULL;
+		}
+
+		/* Special cases for a repeated group that contains possibly empty captures,
+		 * in order to correctly reflect their repeating one more time and capture
+		 * at the end (but without an infinite loop).
+		 *
+		 * For example, '^((x?))*$' will always end up with capture groups 1 and 2
+		 * at the end of the input for any number of "x"s, since the outermost ()*
+		 * can always repeat once more time, consuming nothing, and clobber the
+		 * existing captures. We mark repeated groups so that the compiled capture
+		 * program can move saving the captures after the repetition, instead
+		 * behaving like `^((?:x?)*(())$`.
+		 *
+		 * However, if the repeated subtree always consumes input, such as with
+		 * '^(()a)+b$', then clear any passed in outermost_repeat_parent, because
+		 * having to consume input will prevent that extra repetition of the
+		 * empty captures. */
+		if (always_consumes_input(n)) {
+			res = analysis_iter_repetition(child, NULL, shadowed_by_previous_alt_case,
+			    repeat_plus_ancestor);
+		} else if (outermost_repeat_parent == NULL && n->u.repeat.max > 1) {
+		        LOG(3 - LOG_REPETITION_CASES, "%s: recursing with outermost_repeat_parent set to %p\n",
+			    __func__, (void *)n);
+			res = analysis_iter_repetition(child, n, 0,
+			    repeat_plus_ancestor);
+		} else {
+			LOG(3 - LOG_REPETITION_CASES, "%s: recursing with outermost_repeat_parent %p\n",
+			    __func__, (void *)outermost_repeat_parent);
+			res = analysis_iter_repetition(child, outermost_repeat_parent, shadowed_by_previous_alt_case,
+			    repeat_plus_ancestor);
+		}
+		if (res != AST_ANALYSIS_OK) { return res; }
+		break;
+	}
+
+	case AST_EXPR_GROUP:
+		LOG(3 - LOG_REPETITION_CASES,
+		    "%s: GROUP %p, repeat_plus_ancestor %p\n",
+		    __func__, (void *)n, (void *)repeat_plus_ancestor);
+
+
+		if (outermost_repeat_parent != NULL && (is_nullable(n) || !can_consume_input(n))) {
+			int should_mark_repeated = 1;
+			/* If the outermost_repeat_parent is an ALT node and a previous ALT subtree
+			 * matching the empty string is shadowing this group, then do not mark it
+			 * as repeated, because that can lead to incorrect handling in somewhat
+			 * contrived regexes like '^(?:|(|x))*$'. */
+			if (outermost_repeat_parent->type == AST_EXPR_ALT && shadowed_by_previous_alt_case) {
+				LOG(3 - LOG_REPETITION_CASES,
+				    "%s: hit group shadowed_by_previous_alt_case, skipping\n", __func__);
+				should_mark_repeated = 0;
+			}
+
+			if (n->flags & (AST_FLAG_ANCHORED_START | AST_FLAG_ANCHORED_END)) {
+				LOG(3 - LOG_REPETITION_CASES,
+				    "%s: hit repeating anchor, skipping\n", __func__);
+				should_mark_repeated = 0;
+			}
+
+			if (should_mark_repeated) {
+				LOG(3 - LOG_REPETITION_CASES, "%s: setting group %u to repeated\n",
+				    __func__, n->u.group.id);
+				n->u.group.repeated = 1;
+				assert(outermost_repeat_parent->type == AST_EXPR_REPEAT ||
+				    outermost_repeat_parent->type == AST_EXPR_ALT);
+				LOG(3 - LOG_REPETITION_CASES, "%s: setting contains_empty_groups on outermost_repeat_parent %p\n",
+				    __func__, (void *)outermost_repeat_parent);
+				if (outermost_repeat_parent->type == AST_EXPR_REPEAT) {
+					outermost_repeat_parent->u.repeat.contains_empty_groups = 1;
+				} else if (outermost_repeat_parent->type == AST_EXPR_ALT) {
+					outermost_repeat_parent->u.alt.contains_empty_groups = 1;
+				} else {
+					assert(!"type mismatch");
+				}
+			}
+		}
+
+		if (repeat_plus_ancestor != NULL && (is_nullable(n) || !can_consume_input(n))) {
+			assert(repeat_plus_ancestor->type == AST_EXPR_REPEAT
+			    && repeat_plus_ancestor->u.repeat.min == 1
+			    && repeat_plus_ancestor->u.repeat.max == AST_COUNT_UNBOUNDED);
+			LOG(3 - LOG_REPETITION_CASES,
+			    "%s: not yet implemented, skipping\n", __func__);
+			/* return AST_ANALYSIS_ERROR_UNSUPPORTED_PCRE; */
+		}
+
+		res = analysis_iter_repetition(n->u.group.e, outermost_repeat_parent,
+		    shadowed_by_previous_alt_case, repeat_plus_ancestor);
+		if (res != AST_ANALYSIS_OK) { return res; }
+		break;
+
+	case AST_EXPR_SUBTRACT:
+		res = analysis_iter_repetition(n->u.subtract.a, outermost_repeat_parent, shadowed_by_previous_alt_case,
+			repeat_plus_ancestor);
+		if (res != AST_ANALYSIS_OK) { return res; }
+		res = analysis_iter_repetition(n->u.subtract.b, outermost_repeat_parent, shadowed_by_previous_alt_case,
+			repeat_plus_ancestor);
+		break;
+
+	default:
+		assert(!"unreached");
+	}
+	return res;
+}
+
+enum anchoring_analysis_res {
+	ANCHOR_ANALYSIS_ANCHORED = 'a',
+	ANCHOR_ANALYSIS_UNANCHORED = 'u',
+	ANCHOR_ANALYSIS_UNDECIDED = '_',
 };
 
-static enum unanchored_analysis_res
+static enum anchoring_analysis_res
 analysis_iter_unanchored_start(const struct ast_expr *n)
 {
 	if (n->flags & AST_FLAG_ANCHORED_START) {
 		LOG(4 - LOG_UNANCHORED_FLAGS, "%s: n (%p)->flags & AST_FLAG_ANCHORED_START -> N\n",
 		    __func__, (void *)n);
-		return UA_NO;
+		return ANCHOR_ANALYSIS_ANCHORED;
 	}
 
 	LOG(4 - LOG_UNANCHORED_FLAGS, "%s: node %p, type %s\n",
@@ -293,7 +555,7 @@ analysis_iter_unanchored_start(const struct ast_expr *n)
 	case AST_EXPR_EMPTY:
 		LOG(3 - LOG_UNANCHORED_FLAGS, "%s: %s -> U\n",
 		    __func__, ast_node_type_name(n->type));
-		return UA_UNDECIDED;
+		return ANCHOR_ANALYSIS_UNDECIDED;
 
 	case AST_EXPR_TOMBSTONE:
 		break;
@@ -302,12 +564,12 @@ analysis_iter_unanchored_start(const struct ast_expr *n)
 		switch (n->u.anchor.type) {
 		case AST_ANCHOR_START:
 			LOG(3 - LOG_UNANCHORED_FLAGS, "%s: ^ anchor -> N\n", __func__);
-			return UA_NO;
+			return ANCHOR_ANALYSIS_ANCHORED;
 
 		case AST_ANCHOR_END:
 			LOG(3 - LOG_UNANCHORED_FLAGS, "%s: $ anchor -> U\n", __func__);
 			/* will be handled by other cases */
-			return UA_UNDECIDED;
+			return ANCHOR_ANALYSIS_UNDECIDED;
 
 		default:
 			assert(!"unreached");
@@ -323,7 +585,7 @@ analysis_iter_unanchored_start(const struct ast_expr *n)
 	case AST_EXPR_RANGE:
 		LOG(3 - LOG_UNANCHORED_FLAGS, "%s: %s -> Y\n",
 		    __func__, ast_node_type_name(n->type));
-		return UA_YES;
+		return ANCHOR_ANALYSIS_UNANCHORED;
 
 	case AST_EXPR_CONCAT: {
 		size_t i;
@@ -331,8 +593,8 @@ analysis_iter_unanchored_start(const struct ast_expr *n)
 		/* has unanchored start if first non-nullable child does */
 		for (i = 0; i < n->u.concat.count; i++) {
 			const struct ast_expr *child = n->u.concat.n[i];
-			const enum unanchored_analysis_res child_res = analysis_iter_unanchored_start(child);
-			if (child_res != UA_UNDECIDED) {
+			const enum anchoring_analysis_res child_res = analysis_iter_unanchored_start(child);
+			if (child_res != ANCHOR_ANALYSIS_UNDECIDED) {
 				LOG(3 - LOG_UNANCHORED_FLAGS, "%s: %s -> %c (child res)\n",
 				    __func__, ast_node_type_name(n->type), child_res);
 				return child_res;
@@ -350,19 +612,19 @@ analysis_iter_unanchored_start(const struct ast_expr *n)
 		size_t i;
 
 		/* if all children agree, return that result, otherwise undecided */
-		const enum unanchored_analysis_res first_child_res = analysis_iter_unanchored_start(n->u.alt.n[0]);
+		const enum anchoring_analysis_res first_child_res = analysis_iter_unanchored_start(n->u.alt.n[0]);
 		LOG(3 - LOG_UNANCHORED_FLAGS, "%s: ALT child 0 -- %s -> %c (child res)\n",
 		    __func__, ast_node_type_name(n->type), first_child_res);
 
 		for (i = 1; i < n->u.alt.count; i++) {
 			const struct ast_expr *child = n->u.alt.n[i];
-			const enum unanchored_analysis_res child_res = analysis_iter_unanchored_start(child);
+			const enum anchoring_analysis_res child_res = analysis_iter_unanchored_start(child);
 			LOG(3 - LOG_UNANCHORED_FLAGS, "%s: ALT child %zd -- %s -> %c (child res)\n",
 			    __func__, i, ast_node_type_name(n->type), child_res);
 			if (child_res != first_child_res) {
 				LOG(3 - LOG_UNANCHORED_FLAGS, "%s: %s -> %c (child result) contracts first, returning U\n",
 				    __func__, ast_node_type_name(n->type), child_res);
-				return UA_UNDECIDED;
+				return ANCHOR_ANALYSIS_UNDECIDED;
 			}
 		}
 
@@ -375,7 +637,7 @@ analysis_iter_unanchored_start(const struct ast_expr *n)
 		if (n->u.repeat.min == 0) {
 			LOG(3 - LOG_UNANCHORED_FLAGS, "%s: %s -> U (repeat.min == 0)\n",
 			    __func__, ast_node_type_name(n->type));
-			return UA_UNDECIDED;
+			return ANCHOR_ANALYSIS_UNDECIDED;
 		}
 		return analysis_iter_unanchored_start(n->u.repeat.e);
 
@@ -389,16 +651,16 @@ analysis_iter_unanchored_start(const struct ast_expr *n)
 		assert(!"unreached");
 	}
 
-	return UA_UNDECIDED;
+	return ANCHOR_ANALYSIS_UNDECIDED;
 }
 
-static enum unanchored_analysis_res
+static enum anchoring_analysis_res
 analysis_iter_unanchored_end(const struct ast_expr *n)
 {
 	if (n->flags & AST_FLAG_ANCHORED_END) {
 		LOG(4 - LOG_UNANCHORED_FLAGS, "%s: node (%p)->flags & AST_FLAG_ANCHORED_END -> N\n",
 		    __func__, (void *)n);
-		return UA_NO;
+		return ANCHOR_ANALYSIS_ANCHORED;
 	}
 
 	LOG(4 - LOG_UNANCHORED_FLAGS, "%s: node %p, type %s\n",
@@ -408,7 +670,7 @@ analysis_iter_unanchored_end(const struct ast_expr *n)
 	case AST_EXPR_EMPTY:
 		LOG(3 - LOG_UNANCHORED_FLAGS, "%s: %s -> U\n",
 		    __func__, ast_node_type_name(n->type));
-		return UA_UNDECIDED;
+		return ANCHOR_ANALYSIS_UNDECIDED;
 
 	case AST_EXPR_TOMBSTONE:
 		break;
@@ -418,12 +680,12 @@ analysis_iter_unanchored_end(const struct ast_expr *n)
 		case AST_ANCHOR_START:
 			LOG(3 - LOG_UNANCHORED_FLAGS, "%s: ^ %s -> U\n",
 			    __func__, ast_node_type_name(n->type));
-			return UA_UNDECIDED;
+			return ANCHOR_ANALYSIS_UNDECIDED;
 
 		case AST_ANCHOR_END:
 			LOG(3 - LOG_UNANCHORED_FLAGS, "%s: $ %s -> N\n",
 			    __func__, ast_node_type_name(n->type));
-			return UA_NO;
+			return ANCHOR_ANALYSIS_ANCHORED;
 
 		default:
 			assert(!"unreached");
@@ -439,7 +701,7 @@ analysis_iter_unanchored_end(const struct ast_expr *n)
 	case AST_EXPR_RANGE:
 		LOG(3 - LOG_UNANCHORED_FLAGS, "%s: %s -> Y\n",
 		    __func__, ast_node_type_name(n->type));
-		return UA_YES;
+		return ANCHOR_ANALYSIS_UNANCHORED;
 
 	case AST_EXPR_CONCAT: {
 		size_t i;
@@ -447,8 +709,8 @@ analysis_iter_unanchored_end(const struct ast_expr *n)
 		/* has unanchored end if last non-nullable child does */
 		for (i = n->u.concat.count; i > 0; i--) {
 			const struct ast_expr *child = n->u.concat.n[i - 1];
-			const enum unanchored_analysis_res child_res = analysis_iter_unanchored_end(child);
-			if (child_res != UA_UNDECIDED) {
+			const enum anchoring_analysis_res child_res = analysis_iter_unanchored_end(child);
+			if (child_res != ANCHOR_ANALYSIS_UNDECIDED) {
 				LOG(3 - LOG_UNANCHORED_FLAGS, "%s: %s -> %c (child res)\n",
 				    __func__, ast_node_type_name(n->type), child_res);
 				return child_res;
@@ -466,19 +728,19 @@ analysis_iter_unanchored_end(const struct ast_expr *n)
 		size_t i;
 
 		/* if all children agree, return that result, otherwise undecided */
-		const enum unanchored_analysis_res first_child_res = analysis_iter_unanchored_end(n->u.alt.n[0]);
+		const enum anchoring_analysis_res first_child_res = analysis_iter_unanchored_end(n->u.alt.n[0]);
 		LOG(3 - LOG_UNANCHORED_FLAGS, "%s: ALT child 0 -- %s -> %c (child res)\n",
 		    __func__, ast_node_type_name(n->type), first_child_res);
 
 		for (i = 1; i < n->u.alt.count; i++) {
 			const struct ast_expr *child = n->u.alt.n[i];
-			const enum unanchored_analysis_res child_res = analysis_iter_unanchored_end(child);
+			const enum anchoring_analysis_res child_res = analysis_iter_unanchored_end(child);
 			LOG(3 - LOG_UNANCHORED_FLAGS, "%s: ALT child %zd -- %s -> %c (child res)\n",
 			    __func__, i, ast_node_type_name(n->type), child_res);
 			if (child_res != first_child_res) {
 				LOG(3 - LOG_UNANCHORED_FLAGS, "%s: %s -> %c (child result) contracts first, returning U\n",
 				    __func__, ast_node_type_name(n->type), child_res);
-				return UA_UNDECIDED;
+				return ANCHOR_ANALYSIS_UNDECIDED;
 			}
 		}
 
@@ -491,7 +753,7 @@ analysis_iter_unanchored_end(const struct ast_expr *n)
 		if (n->u.repeat.min == 0) {
 			LOG(3 - LOG_UNANCHORED_FLAGS, "%s: %s -> U (repeat.min == 0)\n",
 			    __func__, ast_node_type_name(n->type));
-			return UA_UNDECIDED;
+			return ANCHOR_ANALYSIS_UNDECIDED;
 		}
 		return analysis_iter_unanchored_end(n->u.repeat.e);
 
@@ -505,7 +767,7 @@ analysis_iter_unanchored_end(const struct ast_expr *n)
 		assert(!"unreached");
 	}
 
-	return UA_UNDECIDED;
+	return ANCHOR_ANALYSIS_UNDECIDED;
 }
 
 static void
@@ -563,9 +825,68 @@ struct anchoring_env {
 	/* Corresponding flag for end anchors while sweeping backward. */
 	int followed_by_consuming;
 
+	/* Special case for detecting '$[^a]', which matches "\n" with
+	 * a capture group 0 of (0,1) in PCRE. */
+	int followed_by_consuming_newline;
+
+	/* Flag for tracking whether we're in a part of the subtree that
+	 * is always before a start anchor. This influences satisfiability
+	 * and edge cases like '()*^'. */
 	int before_start_anchor;
+
+	/* Flag used to detect and reject the awkward case in '$[^a]',
+	 * where (according to PCRE) the character class after the '$'
+	 * should match the literal newline, but nothing else, and only
+	 * once. Because $ is actually a zero-width assertion that
+	 * execution is either at the end of input or a trailing
+	 * newline, it has the rather surprising result that '$[^a]'
+	 * will not match "x" but *will* match "x\n" (because it has a $
+	 * before a trailing newline, and because the newline matches
+	 * the non-skippable [^a]). We just return an unsupported
+	 * error for this case. */
+	enum newline_after_end_anchor_state {
+		NAEAS_NONE,
+		NAEAS_WOULD_MATCH_ONCE,
+	} newline_after_end_anchor_state;
+
+	int after_end_anchor;
 };
 
+/* Does the subtree match a literal '\n'? */
+static int
+matches_newline(const struct ast_expr *n)
+{
+	switch (n->type) {
+	case AST_EXPR_LITERAL:
+		return n->u.literal.c == '\n';
+
+	case AST_EXPR_SUBTRACT:
+		return matches_newline(n->u.subtract.a)
+		    && !matches_newline(n->u.subtract.b);
+
+	case AST_EXPR_RANGE:
+	{
+		const struct ast_endpoint *f = &n->u.range.from;
+		const struct ast_endpoint *t = &n->u.range.to;
+		if (f->type != AST_ENDPOINT_LITERAL
+		    || t->type != AST_ENDPOINT_LITERAL) {
+			/* not implemented */
+			LOG(1 - LOG_ANCHORING, "%s: not implemented\n", __func__);
+			return 0;
+		}
+
+		const int res = f->u.literal.c <= '\n' && t->u.literal.c >= '\n';
+		LOG(1 - LOG_ANCHORING, "%s: RANGE res %d\n", __func__, res);
+		return res;
+	}
+
+	default:
+		break;
+	}
+
+	return 0;
+}
+
 /* Tree walker that analyzes the AST, marks which nodes and subtrees are
  * anchored at the start and/or end, and determines which subtrees are
  * unsatisfiable due to start anchoring.
@@ -600,7 +921,7 @@ analysis_iter_anchoring(struct anchoring_env *env, struct ast_expr *n)
 		case AST_ANCHOR_START:
 			/*
 			 * If it's not possible to get here without consuming
-			 * any input and there's a start anchor, the regex is
+			 * any input and there's a start anchor, the subtree is
 			 * inherently unsatisfiable.
 			 */
 			set_flags(n, AST_FLAG_ANCHORED_START);
@@ -630,6 +951,9 @@ analysis_iter_anchoring(struct anchoring_env *env, struct ast_expr *n)
 			set_flags(n, AST_FLAG_ANCHORED_END);
 			if (n->u.anchor.is_end_nl && !(env->re_flags & RE_ANCHORED)) {
 				set_flags(n, AST_FLAG_END_NL);
+				if (env->newline_after_end_anchor_state == NAEAS_NONE) {
+					env->newline_after_end_anchor_state = NAEAS_WOULD_MATCH_ONCE;
+				}
 			}
 			break;
 
@@ -642,6 +966,15 @@ analysis_iter_anchoring(struct anchoring_env *env, struct ast_expr *n)
 	 * These are the types that actually consume input.
 	 */
 	case AST_EXPR_LITERAL:
+		if (n->u.literal.c == '\n' &&
+		    (env->newline_after_end_anchor_state == NAEAS_WOULD_MATCH_ONCE)) {
+			LOG(3 - LOG_ANCHORING,
+			    "%s: LITERAL: rejecting non-optional newline match after $ as unsupported\n",
+			    __func__);
+			set_flags(n, AST_FLAG_UNSATISFIABLE);
+			return AST_ANALYSIS_ERROR_UNSUPPORTED_PCRE;
+		}
+		break;
 	case AST_EXPR_CODEPOINT:
 	case AST_EXPR_RANGE:
 		break;		/* handled outside switch/case */
@@ -673,6 +1006,13 @@ analysis_iter_anchoring(struct anchoring_env *env, struct ast_expr *n)
 
 			if (res != AST_ANALYSIS_OK &&
 			    res != AST_ANALYSIS_UNSATISFIABLE) { /* unsat is handled below */
+				if (res == AST_ANALYSIS_ERROR_UNSUPPORTED_PCRE) {
+
+					/* FIXME: check this */
+					assert(child->flags & AST_FLAG_UNSATISFIABLE);
+
+					set_flags(n, AST_FLAG_UNSATISFIABLE);
+				}
 				return res;
 			}
 
@@ -703,12 +1043,11 @@ analysis_iter_anchoring(struct anchoring_env *env, struct ast_expr *n)
 				env->past_always_consuming = 1;
 			}
 
+			env->newline_after_end_anchor_state = child_env.newline_after_end_anchor_state;
 		}
 
 		/* flow ANCHORED_START and ANCHORED_END flags upward */
 		{
-			int after_always_consumes = 0;
-
 			for (i = 0; i < n->u.concat.count; i++) {
 				struct ast_expr *child = n->u.concat.n[i];
 				if (child->flags & AST_FLAG_ANCHORED_START) {
@@ -716,20 +1055,11 @@ analysis_iter_anchoring(struct anchoring_env *env, struct ast_expr *n)
 					    "%s: setting ANCHORED_START due to child %zu (%p)'s analysis\n",
 					    __func__, i, (void *)child);
 					set_flags(n, AST_FLAG_ANCHORED_START);
-
-					if (after_always_consumes) {
-						LOG(3 - LOG_ANCHORING,
-						    "%s: setting %p and child %p UNSATISFIABLE due to ^-anchored child that always consumes input\n",
-						    __func__, (void *)n, (void *)child);
-						set_flags(n, AST_FLAG_UNSATISFIABLE);
-						set_flags(child, AST_FLAG_UNSATISFIABLE);
-					}
 				}
 
 				if (always_consumes_input(child)) {
 					LOG(3 - LOG_ANCHORING,
 					    "%s: child %zd always consumes input\n", __func__, i);
-					after_always_consumes = 1;
 				}
 			}
 		}
@@ -786,16 +1116,21 @@ analysis_iter_anchoring(struct anchoring_env *env, struct ast_expr *n)
 		for (i = 0; i < n->u.concat.count; i++) {
 			struct ast_expr *child = n->u.concat.n[i];
 			if (after_end_anchor) {
-				if (always_consumes_input(child)) {
+				if (child->type == AST_EXPR_REPEAT
+				    && (child->flags & AST_FLAG_UNSATISFIABLE)
+				    && child->u.repeat.min == 0) {
 					LOG(3 - LOG_ANCHORING,
-					    "%s: after_end_anchor & ALWAYS_CONSUMES on child %p -> UNSATISFIABLE\n",
-					    __func__, (void *)child);
-					set_flags(child, AST_FLAG_UNSATISFIABLE);
+					    "%s: setting unsatisfiable {0,*} repeat after $ anchor to {0,0}\n",
+					    __func__);
+					child->u.repeat.max = 0;
 				}
 
 				if (child->type == AST_EXPR_REPEAT
-				    && (child->flags & AST_FLAG_UNSATISFIABLE)
+				    && (child->flags & AST_FLAG_END_NL)
 				    && child->u.repeat.min == 0) {
+					LOG(3 - LOG_ANCHORING,
+					    "%s: setting {0,*} repeat subtree matching \n after $ anchor to {0,0}\n",
+					    __func__);
 					child->u.repeat.max = 0;
 				}
 			} else if (!after_end_anchor
@@ -846,6 +1181,10 @@ analysis_iter_anchoring(struct anchoring_env *env, struct ast_expr *n)
 			} else if (res == AST_ANALYSIS_OK) {
 				all_set_past_always_consuming &= child_env.past_always_consuming;
 				any_sat = 1;
+			} else if (res == AST_ANALYSIS_ERROR_UNSUPPORTED_CAPTURE
+			    || res == AST_ANALYSIS_ERROR_UNSUPPORTED_PCRE) {
+				assert(child->flags & AST_FLAG_UNSATISFIABLE);
+				continue;
 			} else {
 				return res;
 			}
@@ -883,7 +1222,7 @@ analysis_iter_anchoring(struct anchoring_env *env, struct ast_expr *n)
 		if (all_end_anchored) {
 			LOG(3 - LOG_ANCHORING,
 			    "%s: ALT: all_end_anchored -> ANCHORED_END\n", __func__);
-			/* FIXME: AST_FLAG_END_NL: need to determine how this interacts
+			/* TODO: AST_FLAG_END_NL: need to determine how this interacts
 			 * with alt nodes. `^(?:(a)\z|(a)$)` */
 			set_flags(n, AST_FLAG_ANCHORED_END);
 		}
@@ -925,15 +1264,26 @@ analysis_iter_anchoring(struct anchoring_env *env, struct ast_expr *n)
 			return res;
 		}
 
-		if (n->u.repeat.e->flags & AST_FLAG_ANCHORED_END && n->u.repeat.min > 0) {
-			/* FIXME: if repeating something that is always
-			 * anchored at the end, repeat.max could be
-			 * capped at 1, but I have not yet found any
-			 * inputs where that change is necessary to
-			 * produce a correct result. */
-			LOG(3 - LOG_ANCHORING,
-			    "%s: REPEAT: repeating ANCHORED_END subtree >0 times -> ANCHORED_END\n", __func__);
-			set_flags(n, n->u.repeat.e->flags & END_ANCHOR_FLAG_MASK);
+		/* If the child subtree is anchored at the start and/or end, then this
+		 * node can be repeated at most once. */
+		const int child_is_anchored = n->u.repeat.e->flags & (AST_FLAG_ANCHORED_START | AST_FLAG_ANCHORED_END);
+		if (child_is_anchored) {
+			if (n->u.repeat.min >= 1) {
+				LOG(3 - LOG_ANCHORING,
+				    "%s: REPEAT: copying anchor flags from child subtree since we cannot repeat it 0 times\n", __func__);
+				set_flags(n, n->u.repeat.e->flags & END_ANCHOR_FLAG_MASK);
+			}
+
+			if (n->u.repeat.min > 1) {
+				LOG(3 - LOG_ANCHORING,
+				    "%s: REPEAT: anchored, so clamping min repeat to at most once\n", __func__);
+				n->u.repeat.min = 1;
+			}
+			if (n->u.repeat.max > 1) {
+				LOG(3 - LOG_ANCHORING,
+				    "%s: REPEAT: anchored, so clamping max repeat to at most once\n", __func__);
+				n->u.repeat.max = 1;
+			}
 		}
 		break;
 
@@ -943,27 +1293,22 @@ analysis_iter_anchoring(struct anchoring_env *env, struct ast_expr *n)
 		/* This flows anchoring flags upward even when the node
 		 * is unsatisfiable, because that info can impact how
 		 * the node's unsatisfiability is handled. */
-#define PROPAGATE_CHILD_FLAGS(TAG, N, CHILD)				\
-		do {							\
-			struct ast_expr *child = CHILD;			\
-			if (child->flags & AST_FLAG_ANCHORED_START) {   \
-				set_flags(N, AST_FLAG_ANCHORED_START);	\
-			}						\
-			if (child->flags & AST_FLAG_ANCHORED_END) {	\
-				set_flags(N, AST_FLAG_ANCHORED_END);	\
-			}						\
-			if (res == AST_ANALYSIS_UNSATISFIABLE) {	\
-				LOG(3 - LOG_ANCHORING,			\
-				    "%s: %s: setting UNSATISFIABLE due to unsatisfiable child\n", \
-				    __func__, TAG);			\
-				set_flags(N, AST_FLAG_UNSATISFIABLE);	\
-			}						\
-			if (res != AST_ANALYSIS_OK) {			\
-				return res;				\
-			}						\
-		} while(0)
-
-		PROPAGATE_CHILD_FLAGS("GROUP", n, n->u.group.e);
+		struct ast_expr *child = n->u.group.e;
+		if (child->flags & AST_FLAG_ANCHORED_START) {
+			set_flags(n, AST_FLAG_ANCHORED_START);
+		}
+		if (child->flags & AST_FLAG_ANCHORED_END) {
+			set_flags(n, AST_FLAG_ANCHORED_END);
+		}
+		if (res == AST_ANALYSIS_UNSATISFIABLE) {
+			LOG(3 - LOG_ANCHORING,
+			    "%s: GROUP: setting UNSATISFIABLE due to unsatisfiable childn",
+			    __func__);
+			set_flags(n, AST_FLAG_UNSATISFIABLE);
+		}
+		if (res != AST_ANALYSIS_OK) {
+			return res;
+		}
 		break;
 
 	case AST_EXPR_SUBTRACT:
@@ -974,6 +1319,15 @@ analysis_iter_anchoring(struct anchoring_env *env, struct ast_expr *n)
 		if (n->u.subtract.a->flags & AST_FLAG_ANCHORED_END) {
 			set_flags(n, n->u.subtract.a->flags & END_ANCHOR_FLAG_MASK);
 		}
+
+		if (env->newline_after_end_anchor_state == NAEAS_WOULD_MATCH_ONCE) {
+			LOG(3 - LOG_ANCHORING,
+			    "%s: SUBTRACT: rejecting non-optional newline match after $ as unsupported\n",
+			    __func__);
+			set_flags(n, AST_FLAG_UNSATISFIABLE);
+			return AST_ANALYSIS_ERROR_UNSUPPORTED_PCRE;
+		}
+
 		if (res != AST_ANALYSIS_OK) {
 			if (res == AST_ANALYSIS_UNSATISFIABLE) {
 				set_flags(n, AST_FLAG_UNSATISFIABLE);
@@ -1017,9 +1371,9 @@ analysis_iter_reverse_anchoring(struct anchoring_env *env, struct ast_expr *n)
 	enum ast_analysis_res res;
 
 	/*
-	 * Second pass, in reverse, specifically checking for end-anchored
-	 * subtrees that are unsatisfiable because they are followed by
-	 * nodes that always consume input.
+	 * Second anchoring pass, in reverse, specifically checking for
+	 * end-anchored subtrees that are unsatisfiable because they are
+	 * followed by nodes that always consume input.
 	 *
 	 * Also check for subtrees that always consume input appearing
 	 * before a start anchor and mark them as unsatisfiable.
@@ -1031,6 +1385,14 @@ analysis_iter_reverse_anchoring(struct anchoring_env *env, struct ast_expr *n)
 
 	switch (n->type) {
 	case AST_EXPR_EMPTY:
+		if (env->before_start_anchor) {
+			/* Needed for cases like '()*^' matching "". */
+			LOG(3 - LOG_ANCHORING, "%s: skipping EMPTY before ^\n", __func__);
+
+			set_flags(n, AST_FLAG_ANCHORED_START);
+		}
+		break;
+
 	case AST_EXPR_TOMBSTONE:
 		break;
 
@@ -1044,10 +1406,19 @@ analysis_iter_reverse_anchoring(struct anchoring_env *env, struct ast_expr *n)
 			break;
 
 		case AST_ANCHOR_END:
-			/* should already be set during forward pass */
-			assert(n->flags & AST_FLAG_ANCHORED_END);
+			/* Normally this will have been set during the forward pass,
+			 * but if it's inside an unsatisfiable CONCAT node whose earlier
+			 * children casued it to be rejected, forward analysis will not
+			 * have reached it. */
+			set_flags(n, AST_FLAG_ANCHORED_END);
 
-			if (env->followed_by_consuming) {
+			if (env->followed_by_consuming_newline) {
+				LOG(3 - LOG_ANCHORING,
+				    "%s: RANGE: rejecting possible newline match after $ as unsupported\n",
+				    __func__);
+				set_flags(n, AST_FLAG_UNSATISFIABLE);
+				return AST_ANALYSIS_ERROR_UNSUPPORTED_PCRE;
+			} else if (env->followed_by_consuming) {
 				LOG(3 - LOG_ANCHORING,
 				    "%s: END anchor & followed_by_consuming, setting UNSATISFIABLE\n",
 				    __func__);
@@ -1113,6 +1484,7 @@ analysis_iter_reverse_anchoring(struct anchoring_env *env, struct ast_expr *n)
 					set_flags(n, AST_FLAG_UNSATISFIABLE);
 				}
 			} else if (res != AST_ANALYSIS_OK) {
+				set_flags(n, AST_FLAG_UNSATISFIABLE);
 				return res;
 			}
 
@@ -1135,6 +1507,13 @@ analysis_iter_reverse_anchoring(struct anchoring_env *env, struct ast_expr *n)
 				    __func__, (void *)child);
 				env->before_start_anchor = 1;
 			}
+
+			if (!env->followed_by_consuming_newline && child_env.followed_by_consuming_newline) {
+				LOG(3 - LOG_ANCHORING,
+				    "%s: setting followed_by_consuming_newline due to child %p's analysis\n",
+				    __func__, (void *)child);
+				env->followed_by_consuming_newline = 1;
+			}
 		}
 
 		break;
@@ -1143,6 +1522,7 @@ analysis_iter_reverse_anchoring(struct anchoring_env *env, struct ast_expr *n)
 	case AST_EXPR_ALT: {
 		int any_sat = 0;
 		int all_set_followed_by_consuming = 1;
+		int any_set_followed_by_consuming_newline = 0;
 		int all_set_before_start_anchor = 1;
 
 		assert(n->u.alt.count > 0);
@@ -1168,7 +1548,14 @@ analysis_iter_reverse_anchoring(struct anchoring_env *env, struct ast_expr *n)
 			} else if (res == AST_ANALYSIS_OK) {
 				all_set_followed_by_consuming &= child_env.followed_by_consuming;
 				all_set_before_start_anchor &= child_env.before_start_anchor;
+
+				any_set_followed_by_consuming_newline |= child_env.followed_by_consuming_newline;
+
 				any_sat = 1;
+			} else if (res == AST_ANALYSIS_ERROR_UNSUPPORTED_CAPTURE
+			    || res == AST_ANALYSIS_ERROR_UNSUPPORTED_PCRE) {
+				assert(child->flags & AST_FLAG_UNSATISFIABLE);
+				continue;
 			} else {
 				return res;
 			}
@@ -1181,6 +1568,13 @@ analysis_iter_reverse_anchoring(struct anchoring_env *env, struct ast_expr *n)
 			env->followed_by_consuming = 1;
 		}
 
+		if (!env->followed_by_consuming_newline && any_set_followed_by_consuming_newline) {
+			LOG(3 - LOG_ANCHORING,
+			    "%s: ALT: any_set_followed_by_consuming_newline -> setting env->followed_by_consuming_newline for feature PCRE rejection\n",
+			    __func__);
+			env->followed_by_consuming_newline = 1;
+		}
+
 		if (!env->before_start_anchor && all_set_before_start_anchor) {
 			LOG(3 - LOG_ANCHORING,
 			    "%s: ALT: all_set_before_start_anchor -> setting env->before_start_anchor\n",
@@ -1205,6 +1599,8 @@ analysis_iter_reverse_anchoring(struct anchoring_env *env, struct ast_expr *n)
 			if (n->u.repeat.min == 0) {
 				LOG(3 - LOG_ANCHORING,
 				    "%s: REPEAT: UNSATISFIABLE but can be repeated 0 times, ignoring\n", __func__);
+				/* Set the REPEAT node to repeat 0 times (to
+				 * prune it) rather than making it as unsatisfiable. */
 				n->u.repeat.max = 0; /* skip */
 				break;
 			} else if (n->u.repeat.min > 0) {
@@ -1269,6 +1665,10 @@ analysis_iter_reverse_anchoring(struct anchoring_env *env, struct ast_expr *n)
 		return AST_ANALYSIS_UNSATISFIABLE;
 	}
 
+	if (n->flags & AST_FLAG_CAN_CONSUME && matches_newline(n)) {
+		env->followed_by_consuming_newline = 1;
+	}
+
 	return AST_ANALYSIS_OK;
 }
 
@@ -1333,7 +1733,7 @@ assign_firsts(struct ast_expr *n)
 		 * subexpression is compiled, the links to the global self-loop
 		 * are created, which the REPEAT node then copies.
 		 *
-		 * It probably makes sense to not go further
+		 * FIXME: needs tests. It probably makes sense to not go further
 		 * here because the top layer of the repeated section
 		 * should only link to the global start once. */
 
@@ -1416,7 +1816,7 @@ assign_lasts(struct ast_expr *n)
 		 * subexpression is compiled, the links to the global self-loop
 		 * are created, which the REPEAT node then copies.
 		 *
-		 * It probably makes sense to not go further
+		 * FIXME: needs tests. It probably makes sense to not go further
 		 * here because the top layer of the repeated section
 		 * should only link to the global start once. */
 
@@ -1437,6 +1837,112 @@ assign_lasts(struct ast_expr *n)
 	}
 }
 
+static enum ast_analysis_res
+analysis_iter_captures(struct capture_env *env, struct ast_expr *n)
+{
+	enum ast_analysis_res res;
+
+	switch (n->type) {
+	case AST_EXPR_EMPTY:
+	case AST_EXPR_TOMBSTONE:
+		break;
+
+	case AST_EXPR_ANCHOR:
+		if (env->use_captures && n->u.anchor.type == AST_ANCHOR_END && !n->u.anchor.is_end_nl) {
+			set_flags(n, AST_FLAG_UNSATISFIABLE);
+			return AST_ANALYSIS_ERROR_UNSUPPORTED_PCRE;
+		}
+		break;
+
+	case AST_EXPR_LITERAL:
+	case AST_EXPR_CODEPOINT:
+	case AST_EXPR_RANGE:
+		break;
+
+	case AST_EXPR_CONCAT: {
+		size_t i;
+
+		for (i = 0; i < n->u.concat.count; i++) {
+			res = analysis_iter_captures(env, n->u.concat.n[i]);
+			if (res != AST_ANALYSIS_OK) {
+				return res;
+			}
+		}
+
+		break;
+	}
+
+	case AST_EXPR_ALT: {
+		size_t i;
+
+		for (i = 0; i < n->u.alt.count; i++) {
+			res = analysis_iter_captures(env, n->u.alt.n[i]);
+			if (res != AST_ANALYSIS_OK) {
+				return res;
+			}
+		}
+
+		break;
+	}
+
+	case AST_EXPR_REPEAT: {
+		res = analysis_iter_captures(env, n->u.repeat.e);
+		if (res != AST_ANALYSIS_OK) {
+			return res;
+		}
+
+/* Set this to 1 when running the fuzzer, so that it ignores
+ * uninteresting failures from regexes like '.{1000000}' that use
+ * repetition to hit memory limits.
+ *
+ * This should be set by the build system when building for fuzzing. */
+#ifndef FUZZING_LIMITS
+#define FUZZING_LIMITS 0
+#endif
+
+#if FUZZING_LIMITS
+                if ((n->u.repeat.max != AST_COUNT_UNBOUNDED && n->u.repeat.max >= 10)
+		    || (n->u.repeat.min >= 10)) {
+			fprintf(stderr, "%s: rejecting regex with {count} >= 10 (%u)\n",
+			    __func__, n->u.repeat.max);
+			return AST_ANALYSIS_ERROR_MEMORY;
+                }
+#endif
+
+		break;
+	}
+
+	case AST_EXPR_GROUP:
+		if (env->max_capture_id == AST_NO_MAX_CAPTURE_ID
+		    || n->u.group.id > env->max_capture_id) {
+			env->max_capture_id = n->u.group.id;
+		}
+
+		res = analysis_iter_captures(env, n->u.group.e);
+		if (res != AST_ANALYSIS_OK) {
+			return res;
+		}
+		break;
+
+	case AST_EXPR_SUBTRACT:
+		res = analysis_iter_captures(env, n->u.subtract.a);
+		if (res != AST_ANALYSIS_OK) {
+			return res;
+		}
+
+		res = analysis_iter_captures(env, n->u.subtract.b);
+		if (res != AST_ANALYSIS_OK) {
+			return res;
+		}
+		break;
+
+	default:
+		assert(!"unreached");
+	}
+
+	return AST_ANALYSIS_OK;
+}
+
 enum ast_analysis_res
 ast_analysis(struct ast *ast, enum re_flags flags)
 {
@@ -1449,14 +1955,26 @@ ast_analysis(struct ast *ast, enum re_flags flags)
 	assert(ast->expr != NULL);
 
 	/*
-	 * First pass -- track nullability, clean up some artifacts from
-	 * parsing.
+	 * First pass -- track nullability, which subtrees can/always consume
+	 * input, and clean up some artifacts from parsing.
 	 */
 	res = analysis_iter(ast->expr);
 	if (res != AST_ANALYSIS_OK) {
 		return res;
 	}
 
+	/* Next pass, check for capture IDs. */
+	{
+		struct capture_env env = { .max_capture_id = AST_NO_MAX_CAPTURE_ID };
+		env.use_captures = !(flags & RE_NOCAPTURE);
+
+		res = analysis_iter_captures(&env, ast->expr);
+		if (res != AST_ANALYSIS_OK) {
+			return res;
+		}
+		ast->max_capture_id = env.max_capture_id;
+	}
+
 	/*
 	 * Next pass: set anchoring, now that nullability info from
 	 * the first pass is in place and some other things have been
@@ -1464,8 +1982,10 @@ ast_analysis(struct ast *ast, enum re_flags flags)
 	 * start anchors.
 	 */
 	{
-		/* first anchoring analysis pass, sweeping forward */
-		struct anchoring_env env = { .re_flags = flags };
+		struct anchoring_env env = {
+			.re_flags = flags,
+			.newline_after_end_anchor_state = NAEAS_NONE,
+		};
 		res = analysis_iter_anchoring(&env, ast->expr);
 		if (res != AST_ANALYSIS_OK) { return res; }
 
@@ -1473,20 +1993,30 @@ ast_analysis(struct ast *ast, enum re_flags flags)
 		res = analysis_iter_reverse_anchoring(&env, ast->expr);
 		if (res != AST_ANALYSIS_OK) { return res; }
 
-		/*
-		 * Next passes, mark all nodes in a first and/or last
-		 * position. This is informed by the anchoring flags, so
-		 * that needs to happen first.
-		 */
-		assign_firsts(ast->expr);
-		assign_lasts(ast->expr);
-
-		ast->has_unanchored_start = (analysis_iter_unanchored_start(ast->expr) != UA_NO);
-		ast->has_unanchored_end = (analysis_iter_unanchored_end(ast->expr) != UA_NO);
-		LOG(2 - LOG_UNANCHORED_FLAGS,
-		    "%s: has_unanchored_start %d, has_unanchored_end %d\n",
-		    __func__, ast->has_unanchored_start, ast->has_unanchored_end);
 	}
 
+	/* Next pass, mark some cases that need special handling
+	 * due to repetition. For example, with cases like
+	 * ^((x?))*$ the inner capture will always need to repeat
+	 * one more time to match () after any 'x's.
+	 *
+	 * This needs to happen after the anchoring passes. */
+	res = analysis_iter_repetition(ast->expr, NULL, 0, NULL);
+	if (res != AST_ANALYSIS_OK) { return res; }
+
+	/*
+	 * Next passes, mark all nodes in a first and/or last
+	 * position. This is informed by the anchoring flags, so
+	 * that needs to happen first.
+	 */
+	assign_firsts(ast->expr);
+	assign_lasts(ast->expr);
+
+	ast->has_unanchored_start = (analysis_iter_unanchored_start(ast->expr) != ANCHOR_ANALYSIS_ANCHORED);
+	ast->has_unanchored_end = (analysis_iter_unanchored_end(ast->expr) != ANCHOR_ANALYSIS_ANCHORED);
+	LOG(2 - LOG_UNANCHORED_FLAGS,
+	    "%s: has_unanchored_start %d, has_unanchored_end %d\n",
+	    __func__, ast->has_unanchored_start, ast->has_unanchored_end);
+
 	return res;
 }
diff --git a/src/libre/ast_analysis.h b/src/libre/ast_analysis.h
index f9ec8ebbb..5390cce57 100644
--- a/src/libre/ast_analysis.h
+++ b/src/libre/ast_analysis.h
@@ -30,7 +30,9 @@ enum ast_analysis_res {
 	AST_ANALYSIS_UNSATISFIABLE,
 
 	AST_ANALYSIS_ERROR_NULL   = -1,
-	AST_ANALYSIS_ERROR_MEMORY = -2
+	AST_ANALYSIS_ERROR_MEMORY = -2,
+	AST_ANALYSIS_ERROR_UNSUPPORTED_CAPTURE = -3,
+	AST_ANALYSIS_ERROR_UNSUPPORTED_PCRE = -4
 };
 
 enum ast_analysis_res
diff --git a/src/libre/ast_compile.c b/src/libre/ast_compile.c
index 502faf8b4..b376aa144 100644
--- a/src/libre/ast_compile.c
+++ b/src/libre/ast_compile.c
@@ -8,6 +8,7 @@
 #include <stddef.h>
 #include <string.h>
 #include <stdint.h>
+#include <stdbool.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <errno.h>
@@ -15,18 +16,25 @@
 
 #include <fsm/fsm.h>
 #include <fsm/bool.h>
+#include <fsm/capture.h>
 #include <fsm/pred.h>
 #include <fsm/subgraph.h>
 
 #include <re/re.h>
 
+#include <adt/idmap.h>
+#include <adt/u64bitset.h>
+
 #include "class.h"
 #include "ast.h"
 #include "ast_compile.h"
+#include "re_capvm_compile.h"
 
+#include "libfsm/capture.h"
 #include "libfsm/internal.h" /* XXX */
 
 #define LOG_LINKAGE 0
+#define LOG_TRAMPOLINE 0
 
 #if LOG_LINKAGE
 #include "print.h"
@@ -44,12 +52,12 @@ enum link_side {
  *   Use the passed in start/end states (x and y)
  *
  * - LINK_GLOBAL
- *   Link to the global start/end state (env->start or env->end),
+ *   Link to the global start/end state (env->start_inner or env->end_inner),
  *   because this node has a ^ or $ anchor
  *
  * - LINK_GLOBAL_SELF_LOOP
  *   Link to the unanchored self loop adjacent to the start/end
- *   state (env->start_any_loop or env->end_any_loop), because
+ *   states (env->start_any_inner or env->end_any_inner), because
  *   this node is in a FIRST or LAST position, but unanchored.
  */
 enum link_types {
@@ -58,7 +66,12 @@ enum link_types {
 	LINK_GLOBAL_SELF_LOOP,
 };
 
+/* Call stack for AST -> NFA conversion. */
+#define DEF_COMP_STACK_CEIL 4
+#define NO_MAX_CAPTURE_IDS ((unsigned)-1)
+
 struct comp_env {
+	const struct fsm_alloc *alloc;
 	struct fsm *fsm;
 	enum re_flags re_flags;
 	struct re_err *err;
@@ -72,21 +85,90 @@ struct comp_env {
 	 * Also, some states in a first/last context need to link
 	 * directly to the overall start/end states, either in
 	 * place of or along with the adjacent states.
+	 *
+	 * The inner start and end states are considered inside of
+	 * match group 0, outer are not.
 	 */
-	fsm_state_t start;
-	fsm_state_t end;
+	fsm_state_t start_outer;
+	fsm_state_t start_inner;
 	fsm_state_t start_any_loop;
+	fsm_state_t start_any_inner;
+	int have_start_any_loop;
+
+	/* States leading to the end, with and without an unanchored
+	 * `.*` loop that consumes any trailing characters. */
+	fsm_state_t end_inner;
+	fsm_state_t end_outer;
+	fsm_state_t end_nl_inner;
 	fsm_state_t end_any_loop;
-	fsm_state_t end_nl;
-	int has_start_any_loop;
+	fsm_state_t end_any_inner;
 	int has_end_any_loop;
-	int has_end_nl;
+	int has_end_nl_inner;
+
+	/* bitset for active capture IDs */
+	uint64_t *active_capture_ids;
+	long max_capture_id;	/* upper bound */
+
+	/* Evaluation stack */
+	struct comp_stack {
+		size_t ceil;
+		size_t depth;		/* 0 -> empty */
+
+		struct comp_stack_frame {
+			struct ast_expr *n;
+			fsm_state_t x;
+			fsm_state_t y;
+			unsigned step;
+
+			union {
+				struct {
+					fsm_state_t link;
+				} concat;
+				struct {
+					unsigned count;
+				} alt;
+				struct {
+					struct fsm_subgraph subgraph;
+					fsm_state_t na;
+					fsm_state_t nz;
+				} repeat;
+			} u;
+		} *frames;
+	} stack;
 };
 
 static int
-comp_iter(struct comp_env *env,
-	fsm_state_t x, fsm_state_t y,
-	struct ast_expr *n, const struct ast_expr *parent);
+comp_iter(struct comp_env *env, fsm_state_t x, const struct ast *ast);
+
+static int
+eval_stack_frame(struct comp_env *env);
+
+static int
+eval_EMPTY(struct comp_env *env);
+static int
+eval_CONCAT(struct comp_env *env);
+static int
+eval_ALT(struct comp_env *env);
+static int
+eval_LITERAL(struct comp_env *env);
+static int
+eval_CODEPOINT(struct comp_env *env);
+static int
+eval_REPEAT(struct comp_env *env);
+static int
+eval_GROUP(struct comp_env *env);
+static int
+eval_ANCHOR(struct comp_env *env);
+static int
+eval_SUBTRACT(struct comp_env *env);
+static int
+eval_RANGE(struct comp_env *env);
+static int
+eval_TOMBSTONE(struct comp_env *env);
+
+static int
+compile_capvm_program_for_stack_end_states(struct comp_env *env,
+	const struct ast *ast, uint32_t *prog_id);
 
 static int
 utf8(uint32_t cp, char c[])
@@ -193,6 +275,9 @@ expr_compile(struct ast_expr *e, enum re_flags flags,
 	struct ast ast;
 
 	ast.expr = e;
+	ast.max_capture_id = 0;
+	ast.has_unanchored_start = 0;
+	ast.has_unanchored_end = 0;
 
 	return ast_compile(&ast, flags, opt, err);
 }
@@ -208,11 +293,11 @@ addedge_literal(struct comp_env *env, enum re_flags re_flags,
 	assert(to < env->fsm->statecount);
 
 	if (re_flags & RE_ICASE) {
-		if (!fsm_addedge_literal(fsm, from, to, tolower((unsigned char) c))) {
+		if (!fsm_addedge_literal(fsm, from, to, (char)tolower((unsigned char) c))) {
 			return 0;
 		}
-		
-		if (!fsm_addedge_literal(fsm, from, to, toupper((unsigned char) c))) {
+
+		if (!fsm_addedge_literal(fsm, from, to, (char)toupper((unsigned char) c))) {
 			return 0;
 		}
 	} else {
@@ -220,38 +305,50 @@ addedge_literal(struct comp_env *env, enum re_flags re_flags,
 			return 0;
 		}
 	}
-	
+
 	return 1;
 }
 
 static int
 intern_start_any_loop(struct comp_env *env)
 {
-	fsm_state_t loop;
+	fsm_state_t loop, inner;
 
 	assert(env != NULL);
 
-	if (env->has_start_any_loop) {
+	if (env->have_start_any_loop) {
 		return 1;
 	}
 
 	assert(~env->re_flags & RE_ANCHORED);
-	assert(env->start < env->fsm->statecount);
+	assert(env->start_outer < env->fsm->statecount);
 
 	if (!fsm_addstate(env->fsm, &loop)) {
 		return 0;
 	}
 
+	if (!fsm_addstate(env->fsm, &inner)) {
+		return 0;
+	}
+
+#if LOG_LINKAGE
+	fprintf(stderr, "%s: start_any: loop %d, inner: %d\n", __func__, loop, inner);
+#endif
+
 	if (!fsm_addedge_any(env->fsm, loop, loop)) {
 		return 0;
 	}
 
-	if (!fsm_addedge_epsilon(env->fsm, env->start, loop)) {
+	if (!fsm_addedge_epsilon(env->fsm, env->start_outer, loop)) {
+		return 0;
+	}
+	if (!fsm_addedge_epsilon(env->fsm, loop, inner)) {
 		return 0;
 	}
 
 	env->start_any_loop = loop;
-	env->has_start_any_loop = 1;
+	env->start_any_inner = inner;
+	env->have_start_any_loop = 1;
 
 	return 1;
 }
@@ -259,7 +356,7 @@ intern_start_any_loop(struct comp_env *env)
 static int
 intern_end_any_loop(struct comp_env *env)
 {
-	fsm_state_t loop;
+	fsm_state_t loop, inner;
 
 	assert(env != NULL);
 
@@ -268,21 +365,32 @@ intern_end_any_loop(struct comp_env *env)
 	}
 
 	assert(~env->re_flags & RE_ANCHORED);
-	assert(env->end < env->fsm->statecount);
+	assert(env->end_outer < env->fsm->statecount);
 
 	if (!fsm_addstate(env->fsm, &loop)) {
 		return 0;
 	}
+	if (!fsm_addstate(env->fsm, &inner)) {
+		return 0;
+	}
+
+#if LOG_LINKAGE
+	fprintf(stderr, "%s: end_any: %d, inner: %d\n", __func__, loop, inner);
+#endif
 
 	if (!fsm_addedge_any(env->fsm, loop, loop)) {
 		return 0;
 	}
 
-	if (!fsm_addedge_epsilon(env->fsm, loop, env->end)) {
+	if (!fsm_addedge_epsilon(env->fsm, inner, loop)) {
+		return 0;
+	}
+	if (!fsm_addedge_epsilon(env->fsm, loop, env->end_outer)) {
 		return 0;
 	}
 
 	env->end_any_loop = loop;
+	env->end_any_inner = inner;
 	env->has_end_any_loop = 1;
 
 	return 1;
@@ -291,37 +399,39 @@ intern_end_any_loop(struct comp_env *env)
 static int
 intern_end_nl(struct comp_env *env)
 {
-	/* PCRE's end anchor $ matches a single optional newline.
+	/* PCRE's end anchor $ matches a single optional newline,
+	 * which should exist outside of match group 0.
 	 *
-	 * Intern states for a `\n?` that links to the global end. */
+	 * Intern states for a `\n?` that links to */
 	assert(env != NULL);
 
-	if (env->has_end_nl) {
+	if (env->has_end_nl_inner) {
 		return 1;
 	}
 
 	assert(~env->re_flags & RE_ANCHORED);
 	assert(env->re_flags & RE_END_NL);
-	assert(env->end < env->fsm->statecount);
+	assert(~env->re_flags & RE_END_NL_DISABLE);
+	assert(env->end_outer < env->fsm->statecount);
 
-	fsm_state_t end_nl;
-	if (!fsm_addstate(env->fsm, &end_nl)) {
+	fsm_state_t inner;
+	if (!fsm_addstate(env->fsm, &inner)) {
 		return 0;
 	}
 
 #if LOG_LINKAGE
-	fprintf(stderr, "%s: end_nl: %d\n", __func__, end_nl);
+	fprintf(stderr, "%s: end_nl_inner: %d\n", __func__, inner);
 #endif
 
-	if (!fsm_addedge_epsilon(env->fsm, end_nl, env->end)) {
+	if (!fsm_addedge_epsilon(env->fsm, inner, env->end_outer)) {
 		return 0;
 	}
-	if (!fsm_addedge_literal(env->fsm, end_nl, env->end, (char)'\n')) {
+	if (!fsm_addedge_literal(env->fsm, inner, env->end_outer, (char)'\n')) {
 		return 0;
 	}
 
-	env->end_nl = end_nl;
-	env->has_end_nl = 1;
+	env->end_nl_inner = inner;
+	env->has_end_nl_inner = 1;
 	return 1;
 }
 
@@ -338,8 +448,8 @@ can_have_backward_epsilon_edge(const struct ast_expr *e)
 		return 0;
 
 	case AST_EXPR_SUBTRACT:
-		/* XXX: not sure */
-		return 1;
+		/* Single character class */
+		return 0;
 
 	case AST_EXPR_REPEAT:
 		/* 0 and 1 don't have backward epsilon edges */
@@ -397,21 +507,32 @@ can_skip_concat_state_and_epsilon(const struct ast_expr *l,
 	return 0;
 }
 
+static const struct ast_expr *
+get_parent_node_from_stack(const struct comp_stack *stack)
+{
+	if (stack->depth < 2) { return NULL; }
+	return stack->frames[stack->depth - 2].n;
+}
+
 static enum link_types
-decide_linking(struct comp_env *env,
-	struct ast_expr *n, const struct ast_expr *parent, enum link_side side)
+decide_linking(struct comp_env *env, fsm_state_t x, fsm_state_t y,
+	struct ast_expr *n, enum link_side side)
 {
 	assert(n != NULL);
 	assert(env != NULL);
 
+	(void)x;
+	(void)y;
+
+	struct comp_stack *stack = &env->stack;
+
 	/* If the regex is implicitly anchored and the dialect does
 	 * not support anchoring, linking is always top-down. */
 	if ((env->re_flags & RE_ANCHORED)) {
 		return LINK_TOP_DOWN;
 	}
 
-	/* parent can be NULL, if we're at the root node, but it must
-	 * never be the same node. */
+	const struct ast_expr *parent = get_parent_node_from_stack(stack);
 	assert(parent != n);
 
 	/* Note: any asymmetry here should be due to special cases
@@ -479,183 +600,71 @@ print_linkage(enum link_types t)
 #define EPSILON(FROM, TO)           \
     assert((FROM) != (TO));         \
     if (!fsm_addedge_epsilon(env->fsm, (FROM), (TO))) { return 0; }
-        
+
 #define ANY(FROM, TO)               \
     if (!fsm_addedge_any(env->fsm, (FROM), (TO))) { return 0; }
 
 #define LITERAL(FROM, TO, C)        \
     if (!addedge_literal(env, n->re_flags, (FROM), (TO), ((char)C))) { return 0; }
 
-#define RECURSE(FROM, TO, NODE, PARENT)		\
-    if (!comp_iter(env, (FROM), (TO), (NODE), (PARENT))) { return 0; }
-
-static int
-comp_iter_repeated(struct comp_env *env,
-	fsm_state_t x, fsm_state_t y,
-	struct ast_expr *n)
-{
-	fsm_state_t a, b;
-	fsm_state_t na, nz;
-	unsigned i;
-
-	assert(n->type == AST_EXPR_REPEAT);
-	const unsigned min = n->u.repeat.min;
-	const unsigned max = n->u.repeat.max;
-	struct ast_expr *e = n->u.repeat.e;
-
-	assert(min <= max);
-
-	if (min == 0 && max == 0) {                          /* {0,0} */
-		EPSILON(x, y);
-	} else if (min == 0 && max == 1) {                   /* '?' */
-		RECURSE(x, y, e, n);
-		EPSILON(x, y);
-	} else if (min == 1 && max == 1) {                   /* {1,1} */
-		RECURSE(x, y, e, n);
-	} else if (min == 0 && max == AST_COUNT_UNBOUNDED) { /* '*' */
-		NEWSTATE(na);
-		NEWSTATE(nz);
-		EPSILON(x,na);
-		EPSILON(nz,y);
-
-		EPSILON(na, nz);
-		RECURSE(na, nz, e, n);
-		EPSILON(nz, na);
-	} else if (min == 1 && max == AST_COUNT_UNBOUNDED) { /* '+' */
-		NEWSTATE(na);
-		NEWSTATE(nz);
-		EPSILON(x,na);
-		EPSILON(nz,y);
-
-		RECURSE(na, nz, e, n);
-		EPSILON(nz, na);
-	} else {
-		/*
-		 * Make new beginning/end states for the repeated section,
-		 * build its NFA, and link to its head.
-		 */
-
-		struct fsm_subgraph subgraph;
-		fsm_state_t tail;
-
-		fsm_subgraph_start(env->fsm, &subgraph);
-
-		NEWSTATE(na);
-		NEWSTATE(nz);
-		RECURSE(na, nz, e, n);
-		EPSILON(x, na); /* link head to repeated NFA head */
-
-		b = nz; /* set the initial tail */
-
-		/* can be skipped */
-		if (min == 0) {
-			EPSILON(na, nz);
-		}
-		fsm_subgraph_stop(env->fsm, &subgraph);
-		tail = nz;
-
-		if (max != AST_COUNT_UNBOUNDED) {
-			for (i = 1; i < max; i++) {
-				/* copies the original subgraph; need to set b to the
-				 * original tail
-				 */
-				b = tail;
+#define RETURN(ENV) comp_stack_pop(ENV)
 
-				if (!fsm_subgraph_duplicate(env->fsm, &subgraph, &b, &a)) {
-					return 0;
-				}
+#define RECURSE(ENV, FROM, TO, NODE)					\
+	if (!comp_stack_push(ENV, (FROM), (TO), (NODE))) { return 0; }
 
-				EPSILON(nz, a);
-
-				/* To the optional part of the repeated count */
-				if (i >= min) {
-					EPSILON(nz, b);
-				}
-
-				na = a;	/* advance head for next duplication */
-				nz = b;	/* advance tail for concenation */
-			}
-		} else {
-			for (i = 1; i < min; i++) {
-				/* copies the original subgraph; need to set b to the
-				 * original tail
-				 */
-				b = tail;
-
-				if (!fsm_subgraph_duplicate(env->fsm, &subgraph, &b, &a)) {
-					return 0;
-				}
-
-				EPSILON(nz, a);
-
-				na = a;	/* advance head for next duplication */
-				nz = b;	/* advance tail for concenation */
-			}
-
-			/* back link to allow for infinite repetition */
-			EPSILON(nz,na);
-		}
-
-		/* tail to last repeated NFA tail */
-		EPSILON(nz, y);
-	}
-
-	return 1;
-}
+#define TAILCALL(ENV, FROM, TO, NODE)					\
+	comp_stack_tailcall(ENV, (FROM), (TO), (NODE));
 
 static int
-comp_iter(struct comp_env *env,
-	fsm_state_t x, fsm_state_t y,
-	struct ast_expr *n, const struct ast_expr *parent)
+set_linking(struct comp_env *env, struct ast_expr *n,
+    enum link_types link_start, enum link_types link_end,
+    fsm_state_t *px, fsm_state_t *py)
 {
-	enum link_types link_start, link_end;
-
-	if (n == NULL) {
-		return 1;
-	}
-
-	link_start = decide_linking(env, n, parent, LINK_START);
-	link_end   = decide_linking(env, n, parent, LINK_END);
+	fsm_state_t x = *px;
+	fsm_state_t y = *py;
 
 #if LOG_LINKAGE
-	fprintf(stderr, "%s: decide_linking %p: start ", __func__, (void *) n);
+	fprintf(stderr, "%s: decide_linking %p [%s]: start ",
+	    __func__, (void *) n, ast_node_type_name(n->type));
 	print_linkage(link_start);
 	fprintf(stderr, ", end ");
 	print_linkage(link_end);
 	fprintf(stderr, ", x %d, y %d\n", x, y);
 #else
 	(void) print_linkage;
+	(void)n;
 #endif
 
 	switch (link_start) {
 	case LINK_TOP_DOWN:
 		break;
 	case LINK_GLOBAL:
-		x = env->start;
+		x = env->start_inner;
 		break;
 	case LINK_GLOBAL_SELF_LOOP:
 		if (!intern_start_any_loop(env)) {
 			return 0;
 		}
-		assert(env->has_start_any_loop);
+		assert(env->have_start_any_loop);
 
-		x = env->start_any_loop;
+		x = env->start_any_inner;
 		break;
 	default:
-		assert(!"unreachable");
+		assert(!"match fail"); /* these should be mutually exclusive now */
 	}
 
 	switch (link_end) {
 	case LINK_TOP_DOWN:
 		break;
 	case LINK_GLOBAL:
-		if (env->re_flags & RE_END_NL && (n->flags & AST_FLAG_END_NL)) {
+		if (env->re_flags & RE_END_NL && !(env->re_flags & RE_END_NL_DISABLE)
+		    && (n->flags & AST_FLAG_END_NL)) {
 			if (!intern_end_nl(env)) {
 				return 0;
 			}
-			y = env->end_nl;
+			y = env->end_nl_inner;
 		} else {
-			y = env->end;
+			y = env->end_inner;
 		}
 		break;
 	case LINK_GLOBAL_SELF_LOOP:
@@ -664,243 +673,784 @@ comp_iter(struct comp_env *env,
 		}
 		assert(env->has_end_any_loop);
 
-		y = env->end_any_loop;
+		y = env->end_any_inner;
 		break;
 	default:
-		assert(!"unreachable");
+		assert(!"match fail"); /* these should be mutually exclusive now */
 	}
 
 #if LOG_LINKAGE
 	fprintf(stderr, " ---> x: %d, y: %d\n", x, y);
 #endif
+	*px = x;
+	*py = y;
+	return 1;
+}
 
-	switch (n->type) {
-	case AST_EXPR_EMPTY:
-		/* skip these, when possible */
-		EPSILON(x, y);
-		break;
+static void
+comp_stack_pop(struct comp_env *env)
+{
+	assert(env->stack.depth > 0);
+	env->stack.depth--;
+}
 
-	case AST_EXPR_CONCAT:
-	{
-		fsm_state_t curr_x;
-		size_t i;
+static int
+comp_stack_push(struct comp_env *env, fsm_state_t x, fsm_state_t y, struct ast_expr *n)
+{
+	struct comp_stack *stack = &env->stack;
+	assert(n != NULL);
 
-		const size_t count  = n->u.concat.count;
+	if (stack->depth == stack->ceil) {
+		const size_t nceil = 2*stack->ceil;
+		struct comp_stack_frame *nframes = f_realloc(env->alloc,
+		    stack->frames, nceil * sizeof(stack->frames[0]));
+#if LOG_LINKAGE || LOG_TRAMPOLINE
+		fprintf(stderr, "comp_stack_push: reallocating comp_stack, %zu -> %zu frames\n",
+		    stack->ceil, nceil);
+#endif
+		if (nframes == NULL) {
+			return 0;
+		}
+		stack->ceil = nceil;
+		stack->frames = nframes;
+	}
 
-		curr_x = x;
+	assert(stack->depth < stack->ceil);
 
-		assert(count >= 1);
+	struct comp_stack_frame *sf = &stack->frames[stack->depth];
+	memset(sf, 0x00, sizeof(*sf));
+	sf->n = n;
+	sf->x = x;
+	sf->y = y;
 
-		if (!fsm_addstate_bulk(env->fsm, count - 1)) {
-			return 0;
-		}
+	stack->depth++;
+	return 1;
+}
 
-		for (i = 0; i < count; i++) {
-			struct ast_expr *curr = n->u.concat.n[i];
+static void
+comp_stack_tailcall(struct comp_env *env,
+	fsm_state_t x, fsm_state_t y, struct ast_expr *n)
+{
+	struct comp_stack *stack = &env->stack;
 
-			/* If a subtree is unsatisfiable but also nullable, ignore it. */
-			const enum ast_flags nullable_and_unsat = AST_FLAG_NULLABLE
-			    | AST_FLAG_UNSATISFIABLE;
-			if ((curr->flags & nullable_and_unsat) == nullable_and_unsat) {
-				/* if necessary, link the end */
-				if (i == count - 1) {
-					EPSILON(curr_x, y);
-				}
-				continue;
-			}
+	assert(stack->depth > 0);
 
-			struct ast_expr *next = i == count - 1
-				? NULL
-				: n->u.concat.n[i + 1];
+	/* Replace current stack frame. */
+	struct comp_stack_frame *sf = &stack->frames[stack->depth - 1];
+	memset(sf, 0x00, sizeof(*sf));
+	sf->n = n;
+	sf->x = x;
+	sf->y = y;
+}
 
-			fsm_state_t z;
-			if (i + 1 < count) {
-				if (!fsm_addstate(env->fsm, &z)) {
-					return 0;
-				}
-#if LOG_LINKAGE
-				fprintf(stderr, "%s: added state z %d\n", __func__, z);
-#endif
-			} else {
-				z = y; /* connect to right parent to close off subtree */
-			}
+#define JUST_ONE_PROG 1
 
-			/*
-			 * If nullable, add an extra state & epsilion as a one-way gate
-			 */
-			if (!can_skip_concat_state_and_epsilon(curr, next)) {
-				fsm_state_t diode;
+static int
+comp_iter(struct comp_env *env,
+	fsm_state_t x, const struct ast *ast)
+{
+	int res = 1;
+	assert(ast != NULL);
+	assert(ast->expr != NULL);
 
-				NEWSTATE(diode);
-				EPSILON(curr_x, diode);
-				curr_x = diode;
-			}
+	struct comp_stack_frame *frames = NULL;
+	uint64_t *active_capture_ids = NULL;
+	const bool use_captures = (env->re_flags & RE_NOCAPTURE) == 0;
 
-			RECURSE(curr_x, z, curr, n);
+	frames = f_calloc(env->alloc,
+	    DEF_COMP_STACK_CEIL, sizeof(env->stack.frames[0]));
+	if (frames == NULL) {
+		goto alloc_fail;
+	}
 
-			curr_x = z;
+	{
+		const size_t capture_id_words = (env->max_capture_id == AST_NO_MAX_CAPTURE_ID)
+		    ? 1		/* do non-zero allocation to silence EFENCE */
+		    : ((env->max_capture_id)/64 + 1);
+		active_capture_ids = f_calloc(env->alloc, capture_id_words,
+		    sizeof(active_capture_ids[0]));
+		if (active_capture_ids == NULL) {
+			goto alloc_fail;
 		}
+	}
 
-		break;
+	/* Add inner and outer end states. Like start_outer and start_inner,
+	 * these represent the boundary between match group 0 (inner) and
+	 * states outside it (the unanchored end loop). */
+	if (!fsm_addstate(env->fsm, &env->end_inner)) {
+		goto alloc_fail;
+	}
+	if (!fsm_addstate(env->fsm, &env->end_outer)) {
+		goto alloc_fail;
+	}
+	if (!fsm_addedge_epsilon(env->fsm, env->end_inner, env->end_outer)) {
+		goto alloc_fail;
 	}
 
-	case AST_EXPR_ALT:
-	{
-		size_t i;
+	fsm_setend(env->fsm, env->end_outer, 1);
 
-		const size_t count = n->u.alt.count;
+#if LOG_LINKAGE
+	fprintf(stderr, "end: outer %d, inner %d\n",
+	    env->end_outer, env->end_inner);
+#endif
 
-		assert(count >= 1);
+#if LOG_TRAMPOLINE
+	fprintf(stderr, "%s: x %d, y %d\n", __func__, x, env->end_inner);
+#endif
 
-		for (i = 0; i < count; i++) {
-			/* skip unsatisfiable ALT subtrees */
-			if (n->u.alt.n[i]->flags & AST_FLAG_UNSATISFIABLE) {
+	env->stack.ceil = DEF_COMP_STACK_CEIL;
+	env->stack.depth = 1;
+	env->stack.frames = frames;
+	env->active_capture_ids = active_capture_ids;
+
+	{			/* set up the first stack frame */
+		struct comp_stack_frame *sf = &env->stack.frames[0];
+		sf->n = ast->expr;
+		sf->x = x;
+		sf->y = env->end_inner;
+		sf->step = 0;
+	}
+
+#if JUST_ONE_PROG
+	uint32_t prog_id;
+	if (use_captures) {
+		if (!compile_capvm_program_for_stack_end_states(env, ast, &prog_id)) {
+			goto alloc_fail;
+		}
+	}
+#endif
+
+	/* evaluate call stack until termination */
+	while (res && env->stack.depth > 0) {
+		if (!eval_stack_frame(env)) {
+#if LOG_TRAMPOLINE
+			fprintf(stderr, "%s: res -> 0\n", __func__);
+#endif
+			res = 0;
+			break;
+		}
+	}
+
+	if (use_captures && res && env->max_capture_id != AST_NO_MAX_CAPTURE_ID) {
+		/* Set the active captures on the end state. */
+		for (unsigned i = 0; i <= (unsigned)env->max_capture_id; i++) {
+			if (!u64bitset_get(env->active_capture_ids, i)) {
 				continue;
 			}
+			if (!fsm_capture_set_active_for_end(env->fsm, i, env->end_outer)) {
+				goto alloc_fail;
+			}
+		}
 
-			/*
-			 * CONCAT handles adding extra states and
-			 * epsilons when necessary, so there isn't much
-			 * more to do here.
-			 */
-			RECURSE(x, y, n->u.alt.n[i], n);
+#if !JUST_ONE_PROG
+		uint32_t prog_id;
+		if (!compile_capvm_program_for_stack_end_states(env, stack, ast, &prog_id)) {
+			goto alloc_fail;
+		}
+#endif
+
+#if LOG_TRAMPOLINE
+		fprintf(stderr, "%s: associated prog_id %u with state %d\n",
+		    __func__, prog_id, stack->end_outer);
+#endif
+		if (!fsm_capture_associate_program_with_end_state(env->fsm,
+			prog_id, env->end_outer)) {
+			goto alloc_fail;
 		}
-		break;
 	}
 
-	case AST_EXPR_LITERAL:
-		LITERAL(x, y, n->u.literal.c);
-		break;
+	f_free(env->alloc, env->stack.frames);
+	f_free(env->alloc, env->active_capture_ids);
 
-	case AST_EXPR_CODEPOINT: {
-		fsm_state_t a, b;
-		char c[4];
-		int r, i;
+	return res;
 
-		r = utf8(n->u.codepoint.u, c);
-		if (!r) {
-			if (env->err != NULL) {
-				env->err->e = RE_EBADCP;
-				env->err->cp = n->u.codepoint.u;
-			}
+alloc_fail:
+	/* TODO: set env->err to indicate alloc failure */
+	if (frames != NULL) {
+		f_free(env->alloc, frames);
+	}
+	if (active_capture_ids != NULL) {
+		f_free(env->alloc, active_capture_ids);
+	}
 
+	return 0;
+}
+
+static struct comp_stack_frame *
+get_comp_stack_top(struct comp_env *env)
+{
+	struct comp_stack *stack = &env->stack;
+	assert(stack->depth > 0);
+	struct comp_stack_frame *sf = &stack->frames[stack->depth - 1];
+	assert(sf->n != NULL);
+	return sf;
+}
+
+static int
+eval_stack_frame(struct comp_env *env)
+{
+	struct comp_stack_frame *sf = get_comp_stack_top(env);
+
+#if LOG_TRAMPOLINE
+	fprintf(stderr, "%s: depth %zu/%zu, type %s, step %u\n", __func__,
+	    stack->depth, stack->ceil, ast_node_type_name(sf->n->type), sf->step);
+#endif
+
+	/* If this is the first time the trampoline has called this
+	 * state, decide the linking. Some of the states below (such as
+	 * AST_EXPR_CONCAT) can have multiple child nodes, so they will
+	 * increment step and use it to resume where they left off as
+	 * the trampoline returns execution to them. */
+	enum link_types link_end;
+	if (sf->step == 0) {	/* entering state */
+		enum link_types link_start;
+		link_start = decide_linking(env, sf->x, sf->y, sf->n, LINK_START);
+		link_end   = decide_linking(env, sf->x, sf->y, sf->n, LINK_END);
+		if (!set_linking(env, sf->n, link_start, link_end, &sf->x, &sf->y)) {
 			return 0;
 		}
+	}
+
+#if LOG_TRAMPOLINE > 1
+	fprintf(stderr, "%s: x %d, y %d\n", __func__, sf->x, sf->y);
+#endif
+
+	switch (sf->n->type) {
+	case AST_EXPR_EMPTY:
+		return eval_EMPTY(env);
+	case AST_EXPR_CONCAT:
+		return eval_CONCAT(env);
+	case AST_EXPR_ALT:
+		return eval_ALT(env);
+	case AST_EXPR_LITERAL:
+		return eval_LITERAL(env);
+	case AST_EXPR_CODEPOINT:
+		return eval_CODEPOINT(env);
+	case AST_EXPR_REPEAT:
+		return eval_REPEAT(env);
+	case AST_EXPR_GROUP:
+		return eval_GROUP(env);
+	case AST_EXPR_ANCHOR:
+		return eval_ANCHOR(env);
+	case AST_EXPR_SUBTRACT:
+		return eval_SUBTRACT(env);
+	case AST_EXPR_RANGE:
+		return eval_RANGE(env);
+	case AST_EXPR_TOMBSTONE:
+		return eval_TOMBSTONE(env);
+	default:
+		assert(!"unreached");
+		return 0;
+	}
+}
 
-		a = x;
+static int
+eval_EMPTY(struct comp_env *env)
+{
+	struct comp_stack_frame *sf = get_comp_stack_top(env);
+#if LOG_LINKAGE
+	fprintf(stderr, "eval_EMPTY: step %u, x %d -> y %d\n",
+	    sf->step, sf->x, sf->y);
+#endif
 
-		for (i = 0; i < r; i++) {
-			if (i + 1 < r) {
-				NEWSTATE(b);
-			} else {
-				b = y;
+	EPSILON(sf->x, sf->y);
+	RETURN(env);
+	return 1;
+}
+
+static int
+eval_CONCAT(struct comp_env *env)
+{
+	struct comp_stack_frame *sf = get_comp_stack_top(env);
+	struct ast_expr *n = sf->n;
+	const size_t count = n->u.concat.count;
+	assert(count >= 1);
+
+#if LOG_LINKAGE
+		fprintf(stderr, "comp_iter: eval_CONCAT: x %d, y %d, step %d\n",
+		    sf->x, sf->y, sf->step);
+#endif
+
+	if (sf->step == 0) {
+		sf->u.concat.link = sf->x;
+	}
+
+	while (sf->step < count) {
+		fsm_state_t curr_x = sf->u.concat.link;
+		struct ast_expr *curr = n->u.concat.n[sf->step];
+
+		/* If a subtree is unsatisfiable but also nullable, ignore it. */
+		const enum ast_flags nullable_and_unsat = AST_FLAG_NULLABLE
+		    | AST_FLAG_UNSATISFIABLE;
+		if ((curr->flags & nullable_and_unsat) == nullable_and_unsat) {
+			sf->step++;
+
+			/* if necessary, link the end */
+			if (sf->step == count) {
+				EPSILON(curr_x, sf->y);
 			}
+			return 1;
+		}
 
-			LITERAL(a, b, c[i]);
+		struct ast_expr *next = sf->step == count - 1
+		    ? NULL
+		    : n->u.concat.n[sf->step + 1];
 
-			a = b;
+		fsm_state_t z;
+		if (sf->step + 1 < count) {
+			if (!fsm_addstate(env->fsm, &z)) {
+				return 0;
+			}
+#if LOG_LINKAGE
+			fprintf(stderr, "%s: added state z %d\n", __func__, z);
+#endif
+		} else {
+			z = sf->y; /* connect to right parent to close off subtree */
 		}
 
-		break;
-	}
+#if LOG_LINKAGE
+		fprintf(stderr, "%s: curr_x %d, z %d\n",
+		    __func__, curr_x, z);
+#endif
 
-	case AST_EXPR_REPEAT:
 		/*
-		 * REPEAT breaks out into its own function, because
-		 * there are several special cases
+		 * If nullable, add an extra state & epsilon as a one-way gate
 		 */
-		if (!comp_iter_repeated(env, x, y, n)) {
-			return 0;
+		if (!can_skip_concat_state_and_epsilon(curr, next)) {
+			fsm_state_t diode;
+
+			NEWSTATE(diode);
+			EPSILON(curr_x, diode);
+			curr_x = diode;
+#if LOG_LINKAGE
+			fprintf(stderr, "comp_iter: added diode %d\n", diode);
+#endif
 		}
-		break;
 
-	case AST_EXPR_GROUP:
-		RECURSE(x, y, n->u.group.e, n);
-		break;
+#if LOG_LINKAGE
+		fprintf(stderr, "comp_iter: recurse CONCAT[%u/%zu]: link %d, z %d\n",
+		    sf->step, count, sf->u.concat.link, z);
+#endif
+		/* Set the right side link, which will become the
+		 * left side link for the next step (if any). */
+		sf->u.concat.link = z;
+		sf->step++;
+		RECURSE(env, curr_x, z, curr);
+		return 1;
+	}
 
-	case AST_EXPR_TOMBSTONE:
-		/* do not link -- intentionally pruned */
-		break;
+	RETURN(env);
+	return 1;
+}
 
-	case AST_EXPR_ANCHOR:
-		EPSILON(x, y);
-		break;
+static int
+eval_ALT(struct comp_env *env)
+{
+	struct comp_stack_frame *sf = get_comp_stack_top(env);
+	const size_t count = sf->n->u.alt.count;
+	assert(count >= 1);
 
-	case AST_EXPR_SUBTRACT: {
-		struct fsm *a, *b;
-		struct fsm *q;
-		enum re_flags re_flags;
+#if LOG_LINKAGE
+	fprintf(stderr, "eval_ALT: step %u\n", sf->step);
+#endif
 
-		re_flags = n->re_flags;
+	if (sf->step < count) {
+		struct ast_expr *n;
 
-		/* wouldn't want to reverse twice! */
-		re_flags &= ~(unsigned)RE_REVERSE;
+		/*
+		 * CONCAT handles adding extra states and
+		 * epsilons when necessary, so there isn't much
+		 * more to do here.
+		 */
+#if LOG_LINKAGE
+		fprintf(stderr, "eval_ALT: recurse ALT[%u/%zu]: x %d, y %d\n",
+		    sf->step, count, sf->x, sf->y);
+#endif
 
-		a = expr_compile(n->u.subtract.a, re_flags,
-			fsm_getoptions(env->fsm), env->err);
-		if (a == NULL) {
-			return 0;
+		n = sf->n->u.alt.n[sf->step];
+		assert(n != NULL);
+		sf->step++;	/* RECURSE can realloc the stack and make sf stale. */
+
+		if (!(n->flags & AST_FLAG_UNSATISFIABLE)) {
+			RECURSE(env, sf->x, sf->y, n);
 		}
+		return 1;
+	}
 
-		b = expr_compile(n->u.subtract.b, re_flags,
-			fsm_getoptions(env->fsm), env->err);
-		if (b == NULL) {
-			fsm_free(a);
-			return 0;
+	RETURN(env);
+	return 1;
+}
+
+static int
+eval_LITERAL(struct comp_env *env)
+{
+	struct comp_stack_frame *sf = get_comp_stack_top(env);
+	struct ast_expr *n = sf->n;
+#if LOG_LINKAGE
+	fprintf(stderr, "%s: linking %d -> %d with literal '%c' (0x%02x)\n",
+	    __func__, sf->x, sf->y, isprint(n->u.literal.c) ? n->u.literal.c : '.',
+	    n->u.literal.c);
+#endif
+
+	LITERAL(sf->x, sf->y, n->u.literal.c);
+
+	RETURN(env);
+	return 1;
+}
+
+static int
+eval_CODEPOINT(struct comp_env *env)
+{
+	struct comp_stack_frame *sf = get_comp_stack_top(env);
+	struct ast_expr *n = sf->n;
+	fsm_state_t a, b;
+	char c[4];
+	int r, i;
+
+	r = utf8(n->u.codepoint.u, c);
+	if (!r) {
+		if (env->err != NULL) {
+			env->err->e = RE_EBADCP;
+			env->err->cp = n->u.codepoint.u;
 		}
 
-		q = fsm_subtract(a, b);
-		if (q == NULL) {
-			return 0;
+		return 0;
+	}
+
+	a = sf->x;
+
+	for (i = 0; i < r; i++) {
+		if (i + 1 < r) {
+			NEWSTATE(b);
+		} else {
+			b = sf->y;
 		}
 
+		LITERAL(a, b, c[i]);
+
+		a = b;
+	}
+
+	RETURN(env);
+	return 1;
+}
+
+static int
+eval_REPEAT(struct comp_env *env)
+{
+	struct comp_stack_frame *sf = get_comp_stack_top(env);
+	fsm_state_t a, b;
+	unsigned i, min, max;
+
+	assert(sf->n->type == AST_EXPR_REPEAT);
+	struct ast_expr_repeat *n = &sf->n->u.repeat;
+
+	min = n->min;
+	max = n->max;
+
+	assert(min <= max);
+
+	if (min == 0 && max == 0) {                          /* {0,0} */
+		EPSILON(sf->x, sf->y);
+		RETURN(env);
+		return 1;
+	} else if (min == 0 && max == 1) {                   /* '?' */
+		EPSILON(sf->x, sf->y);
+		TAILCALL(env, sf->x, sf->y, n->e);
+		return 1;
+	} else if (min == 1 && max == 1) {                   /* {1,1} */
+		TAILCALL(env, sf->x, sf->y, n->e);
+		return 1;
+	} else if (min == 0 && max == AST_COUNT_UNBOUNDED) { /* '*' */
+		fsm_state_t na, nz;
+		NEWSTATE(na);
+		NEWSTATE(nz);
+		EPSILON(sf->x,na);
+		EPSILON(nz,sf->y);
+
+		EPSILON(na, nz);
+		EPSILON(nz, na);
+		TAILCALL(env, na, nz, n->e);
+		return 1;
+	} else if (min == 1 && max == AST_COUNT_UNBOUNDED) { /* '+' */
+		fsm_state_t na, nz;
+		NEWSTATE(na);
+		NEWSTATE(nz);
+		EPSILON(sf->x, na);
+		EPSILON(nz, sf->y);
+
+		EPSILON(nz, na);
+		TAILCALL(env, na, nz, n->e);
+		return 1;
+	} else if (sf->step == 0) {
 		/*
-		 * Subtraction produces quite a mess. We could trim or minimise here
-		 * while q is self-contained, which might work out better than doing it
-		 * in the larger FSM after merge. I'm not sure if it works out better
-		 * overall or not.
+		 * Make new beginning/end states for the repeated section,
+		 * build its NFA, and link to its head.
 		 */
 
-		if (fsm_empty(q)) {
-			EPSILON(x, y);
-			break;
+		fsm_subgraph_start(env->fsm, &sf->u.repeat.subgraph);
+
+		sf->step++;	/* resume after RECURSE */
+		NEWSTATE(sf->u.repeat.na);
+		NEWSTATE(sf->u.repeat.nz);
+		RECURSE(env, sf->u.repeat.na, sf->u.repeat.nz, n->e);
+		return 1;
+	} else {
+		fsm_state_t tail;
+		assert(sf->step == 1);
+		EPSILON(sf->x, sf->u.repeat.na); /* link head to repeated NFA head */
+
+		b = sf->u.repeat.nz; /* set the initial tail */
+
+		/* can be skipped */
+		if (min == 0) {
+			EPSILON(sf->u.repeat.na, sf->u.repeat.nz);
 		}
+		fsm_subgraph_stop(env->fsm, &sf->u.repeat.subgraph);
+		tail = sf->u.repeat.nz;
 
-		if (!fsm_unionxy(env->fsm, q, x, y)) {
-			return 0;
+		if (max != AST_COUNT_UNBOUNDED) {
+			for (i = 1; i < max; i++) {
+				/* copies the original subgraph; need to set b to the
+				 * original tail
+				 */
+				b = tail;
+
+				if (!fsm_subgraph_duplicate(env->fsm, &sf->u.repeat.subgraph, &b, &a)) {
+					return 0;
+				}
+
+				EPSILON(sf->u.repeat.nz, a);
+
+				/* To the optional part of the repeated count */
+				if (i >= min) {
+					EPSILON(sf->u.repeat.nz, b);
+				}
+
+				sf->u.repeat.na = a;	/* advance head for next duplication */
+				sf->u.repeat.nz = b;	/* advance tail for concenation */
+			}
+		} else {
+			for (i = 1; i < min; i++) {
+				/* copies the original subgraph; need to set b to the
+				 * original tail
+				 */
+				b = tail;
+
+				if (!fsm_subgraph_duplicate(env->fsm, &sf->u.repeat.subgraph, &b, &a)) {
+					return 0;
+				}
+
+				EPSILON(sf->u.repeat.nz, a);
+
+				sf->u.repeat.na = a;	/* advance head for next duplication */
+				sf->u.repeat.nz = b;	/* advance tail for concenation */
+			}
+
+			/* back link to allow for infinite repetition */
+			EPSILON(sf->u.repeat.nz, sf->u.repeat.na);
 		}
 
-		break;
+		/* tail to last repeated NFA tail */
+		EPSILON(sf->u.repeat.nz, sf->y);
+		RETURN(env);
+		return 1;
 	}
+}
+
+static void
+set_active_capture_ids(struct comp_env *env, unsigned id)
+{
+#if LOG_LINKAGE
+	fprintf(stderr, "set_active_capture_ids: id %u\n", id);
+#endif
+	assert(env->active_capture_ids != NULL);
+	u64bitset_set(env->active_capture_ids, id);
+}
 
-	case AST_EXPR_RANGE: {
-		unsigned int i;
+static int
+eval_GROUP(struct comp_env *env)
+{
+	struct comp_stack_frame *sf = get_comp_stack_top(env);
 
-		if (n->u.range.from.type != AST_ENDPOINT_LITERAL || n->u.range.to.type != AST_ENDPOINT_LITERAL) {
-			/* not yet supported */
-			return 0;
+	if (env->re_flags & RE_NOCAPTURE) {
+		/* passthrough, disable captures */
+		if (sf->step == 0) {
+			sf->step++;
+			RECURSE(env, sf->x, sf->y, sf->n->u.group.e);
+		} else {
+			RETURN(env);
 		}
+		return 1;
+	}
 
-		assert(n->u.range.from.u.literal.c <= n->u.range.to.u.literal.c);
+	if (sf->step == 0) {
+		struct ast_expr *n = sf->n;
+		set_active_capture_ids(env, n->u.group.id);
 
-		if (n->u.range.from.u.literal.c == 0x00 &&
-			n->u.range.to.u.literal.c == 0xff)
-		{
-			ANY(x, y);
+#if LOG_LINKAGE
+		fprintf(stderr, "comp_iter: recurse GROUP: x %d, y %d\n",
+		    sf->x, sf->y);
+#endif
+		sf->step++;
+		RECURSE(env, sf->x, sf->y, n->u.group.e);
+		return 1;
+	} else {
+		assert(sf->step == 1);
+
+		RETURN(env);
+		return 1;
+	}
+}
+
+static int
+eval_ANCHOR(struct comp_env *env)
+{
+	struct comp_stack_frame *sf = get_comp_stack_top(env);
+#if 1
+
+#if LOG_LINKAGE
+	fprintf(stderr, "%s: ignoring anchor node %p, epsilon %d -> %d\n",
+	    __func__, (void *)sf->n, sf->x, sf->y);
+#endif
+	EPSILON(sf->x, sf->y);
+#else
+	switch (sf->n->u.anchor.type) {
+	case AST_ANCHOR_START:
+		if (!(sf->n->flags & AST_FLAG_FIRST)) {
+#if LOG_LINKAGE
+			fprintf(stderr, "%s: ignoring START anchor in non-FIRST location\n",
+			    __func__);
+#endif
+			EPSILON(sf->x, sf->y);
 			break;
 		}
 
-		for (i = n->u.range.from.u.literal.c; i <= n->u.range.to.u.literal.c; i++) {
-			LITERAL(x, y, i);
+#if LOG_LINKAGE
+		fprintf(stderr, "%s: START anchor %p epsilon-linking %d -> %d\n",
+		    __func__, (void *)sf->n, env->start_inner, sf->y);
+#endif
+		EPSILON(env->start_inner, sf->y);
+		break;
+
+	case AST_ANCHOR_END:
+		if (!(sf->n->flags & AST_FLAG_LAST)) {
+#if LOG_LINKAGE
+			fprintf(stderr, "%s: ignoring END anchor in non-LAST location\n",
+			    __func__);
+#endif
+			EPSILON(sf->x, sf->y);
+			break;
 		}
 
+#if LOG_LINKAGE
+		fprintf(stderr, "%s: END anchor %p epsilon-linking %d -> %d\n",
+		    __func__, (void *)sf->n, sf->x, stack->end_inner);
+#endif
+		EPSILON(sf->x, stack->end_inner);
 		break;
-	}
 
 	default:
 		assert(!"unreached");
+		return 0;
 	}
+#endif
 
+	RETURN(env);
+	return 1;
+}
+
+static int
+eval_SUBTRACT(struct comp_env *env)
+{
+	struct comp_stack_frame *sf = get_comp_stack_top(env);
+
+	struct fsm *a, *b;
+	struct fsm *q;
+	enum re_flags re_flags = sf->n->re_flags;
+
+	/* wouldn't want to reverse twice! */
+	re_flags &= ~(unsigned)RE_REVERSE;
+
+	/* Don't compile capture resolution programs again for the
+	 * subtrees, just ignore capture behavior. */
+	re_flags |= RE_NOCAPTURE;
+
+	a = expr_compile(sf->n->u.subtract.a, re_flags,
+	    fsm_getoptions(env->fsm), env->err);
+	if (a == NULL) {
+		return 0;
+	}
+
+	b = expr_compile(sf->n->u.subtract.b, re_flags,
+	    fsm_getoptions(env->fsm), env->err);
+	if (b == NULL) {
+		fsm_free(a);
+		return 0;
+	}
+
+	q = fsm_subtract(a, b);
+	if (q == NULL) {
+		return 0;
+	}
+
+	/*
+	 * Subtraction produces quite a mess. We could trim or minimise here
+	 * while q is self-contained, which might work out better than doing it
+	 * in the larger FSM after merge. I'm not sure if it works out better
+	 * overall or not.
+	 */
+
+	if (fsm_empty(q)) {
+		EPSILON(sf->x, sf->y);
+		RETURN(env);
+		return 1;
+	}
+
+	if (!fsm_unionxy(env->fsm, q, sf->x, sf->y)) {
+		return 0;
+	}
+
+	RETURN(env);
+	return 1;
+}
+
+static int
+eval_RANGE(struct comp_env *env)
+{
+	struct comp_stack_frame *sf = get_comp_stack_top(env);
+	struct ast_expr *n = sf->n;
+	unsigned int i;
+
+	if (n->u.range.from.type != AST_ENDPOINT_LITERAL || n->u.range.to.type != AST_ENDPOINT_LITERAL) {
+		/* not yet supported */
+		return 0;
+	}
+
+	assert(n->u.range.from.u.literal.c <= n->u.range.to.u.literal.c);
+
+	if (n->u.range.from.u.literal.c == 0x00 &&
+	    n->u.range.to.u.literal.c == 0xff)
+		{
+			ANY(sf->x, sf->y);
+			RETURN(env);
+			return 1;
+		}
+
+	for (i = n->u.range.from.u.literal.c; i <= n->u.range.to.u.literal.c; i++) {
+		LITERAL(sf->x, sf->y, i);
+	}
+
+	RETURN(env);
+	return 1;
+}
+
+static int
+eval_TOMBSTONE(struct comp_env *env)
+{
+	/* do not link -- intentionally pruned */
+	(void)env;
+	RETURN(env);
 	return 1;
 }
 
@@ -909,6 +1459,8 @@ comp_iter(struct comp_env *env,
 #undef NEWSTATE
 #undef LITERAL
 #undef RECURSE
+#undef RETURN
+#undef TAILCALL
 
 struct fsm *
 ast_compile(const struct ast *ast,
@@ -916,40 +1468,64 @@ ast_compile(const struct ast *ast,
 	const struct fsm_options *opt,
 	struct re_err *err)
 {
-	fsm_state_t x, y;
+	/* Start states inside and outside of match group 0,
+	 * which represents the entire matched input, but does not
+	 * include the implied /.*?/ loop at the start or end when
+	 * a regex is unanchored. */
+	fsm_state_t start_outer, start_inner;
 	struct fsm *fsm;
 
 	assert(ast != NULL);
 
+#if LOG_LINKAGE
+	ast_print_tree(stderr, opt, re_flags, ast);
+#endif
+
 	fsm = fsm_new(opt);
 	if (fsm == NULL) {
 		return NULL;
 	}
 
-	if (!fsm_addstate(fsm, &x)) {
+	/* TODO: move these to the call stack, for symmetry?
+	 * Or possibly combine comp_env and stack. */
+	if (!fsm_addstate(fsm, &start_outer)) {
 		goto error;
 	}
 
-	if (!fsm_addstate(fsm, &y)) {
+	if (!fsm_addstate(fsm, &start_inner)) {
 		goto error;
 	}
 
-	fsm_setstart(fsm, x);
-	fsm_setend(fsm, y, 1);
+	if (!fsm_addedge_epsilon(fsm, start_outer, start_inner)) {
+		goto error;
+	}
+
+	fsm_setstart(fsm, start_outer);
+
+#if LOG_LINKAGE
+	fprintf(stderr, "start: outer %d, inner %d\n",
+	    start_outer, start_inner);
+#endif
 
 	{
 		struct comp_env env;
 
 		memset(&env, 0x00, sizeof(env));
 
+		env.alloc = fsm->opt->alloc;
 		env.fsm = fsm;
 		env.re_flags = re_flags;
 		env.err = err;
 
-		env.start = x;
-		env.end = y;
+		env.start_inner = start_inner;
+		env.start_outer = start_outer;
 
-		if (!comp_iter(&env, x, y, ast->expr, NULL)) {
+		env.max_capture_id = ast->max_capture_id;
+
+		if (!comp_iter(&env, start_inner, ast)) {
+			if (err != NULL && err->e == 0) {
+				err->e = RE_EBADGROUP;
+			}
 			goto error;
 		}
 	}
@@ -981,3 +1557,25 @@ ast_compile(const struct ast *ast,
 	return NULL;
 }
 
+static int
+compile_capvm_program_for_stack_end_states(struct comp_env *env,
+	const struct ast *ast, uint32_t *prog_id)
+{
+	/* compile and save program in ^, associate its id w/ end state */
+	enum re_capvm_compile_ast_res res;
+	struct capvm_program *prog;
+	res = re_capvm_compile_ast(env->alloc,
+	    ast, env->re_flags, &prog);
+	if (res != RE_CAPVM_COMPILE_AST_OK) {
+		if (env->err != NULL && env->err->e == 0 && errno != 0) {
+			env->err->e = RE_EERRNO;
+		}
+		return 0;
+	}
+
+	if (!fsm_capture_add_program(env->fsm, prog, prog_id)) {
+		return 0;
+	}
+
+	return 1;
+}
diff --git a/src/libre/ast_rewrite.c b/src/libre/ast_rewrite.c
index d05fc0a82..adb0690d2 100644
--- a/src/libre/ast_rewrite.c
+++ b/src/libre/ast_rewrite.c
@@ -146,7 +146,7 @@ compile_subexpr(struct ast_expr *e, enum re_flags flags)
 		return 0;
 	}
 
-	fsm = ast_compile(&ast, flags | RE_ANCHORED, NULL, NULL);
+	fsm = ast_compile(&ast, flags | RE_ANCHORED | RE_NOCAPTURE, NULL, NULL);
 	if (fsm == NULL) {
 		return 0;
 	}
diff --git a/src/libre/print/tree.c b/src/libre/print/tree.c
index 5d2f78691..58e1d6050 100644
--- a/src/libre/print/tree.c
+++ b/src/libre/print/tree.c
@@ -147,7 +147,9 @@ pp_iter(FILE *f, const struct fsm_options *opt, size_t indent, enum re_flags re_
 
 	case AST_EXPR_ALT: {
 		size_t i, count = n->u.alt.count;
-		fprintf(f, "ALT (%u):\n", (unsigned)count);
+		fprintf(f, "ALT (%u):%s\n",
+		    (unsigned)count,
+		    n->u.alt.contains_empty_groups ? " (contains_empty_groups)" : "");
 		for (i = 0; i < count; i++) {
 			pp_iter(f, opt, indent + 1 * IND, re_flags, n->u.alt.n[i]);
 		}
@@ -155,7 +157,9 @@ pp_iter(FILE *f, const struct fsm_options *opt, size_t indent, enum re_flags re_
 	}
 
 	case AST_EXPR_LITERAL:
-		fprintf(f, "LITERAL '%c'\n", n->u.literal.c);
+		fprintf(f, "LITERAL '");
+		print_char_or_esc(f, n->u.literal.c);
+		fprintf(f, "'\n");
 		break;
 
 	case AST_EXPR_CODEPOINT:
@@ -167,18 +171,27 @@ pp_iter(FILE *f, const struct fsm_options *opt, size_t indent, enum re_flags re_
 		fprintf_count(f, n->u.repeat.min);
 		fprintf(f, ",");
 		fprintf_count(f, n->u.repeat.max);
-		fprintf(f, "}\n");
+		fprintf(f, "}%s\n", n->u.repeat.contains_empty_groups ? " (contains_empty_groups)" : "");
 		pp_iter(f, opt, indent + 1 * IND, re_flags, n->u.repeat.e);
 		break;
 
 	case AST_EXPR_GROUP:
-		fprintf(f, "GROUP %p: %u\n", (void *) n, n->u.group.id);
+		fprintf(f, "GROUP: %u%s\n", n->u.group.id,
+		    n->u.group.repeated ? " (repeated)" : "");
 		pp_iter(f, opt, indent + 1 * IND, re_flags, n->u.group.e);
 		break;
 
 	case AST_EXPR_ANCHOR:
 		assert(n->u.anchor.type == AST_ANCHOR_START || n->u.anchor.type == AST_ANCHOR_END);
-		fprintf(f, "ANCHOR %s\n", n->u.anchor.type == AST_ANCHOR_START ? "^" : "$");
+		if (n->u.anchor.type == AST_ANCHOR_START) {
+			fprintf(f, "ANCHOR ^\n");
+		} else if (n->u.anchor.type == AST_ANCHOR_END) {
+			assert(n->u.anchor.type == AST_ANCHOR_START || n->u.anchor.type == AST_ANCHOR_END);
+			fprintf(f, "ANCHOR $%s\n",
+			    n->u.anchor.is_end_nl ? " (with \\n)" : "");
+		} else {
+			assert(!"unreachable");
+		}
 		break;
 
 	case AST_EXPR_SUBTRACT:
diff --git a/src/libre/re.c b/src/libre/re.c
index 6c423dc36..6474e2db6 100644
--- a/src/libre/re.c
+++ b/src/libre/re.c
@@ -37,12 +37,12 @@ re_dialect(enum re_dialect dialect)
 	size_t i;
 
 	static const struct dialect a[] = {
-		{ RE_LIKE,    parse_re_like,    0, RE_SINGLE | RE_ANCHORED },
-		{ RE_LITERAL, parse_re_literal, 0, RE_SINGLE | RE_ANCHORED },
-		{ RE_GLOB,    parse_re_glob,    0, RE_SINGLE | RE_ANCHORED },
+		{ RE_LIKE,    parse_re_like,    0, RE_SINGLE | RE_ANCHORED | RE_NOCAPTURE },
+		{ RE_LITERAL, parse_re_literal, 0, RE_SINGLE | RE_ANCHORED | RE_NOCAPTURE },
+		{ RE_GLOB,    parse_re_glob,    0, RE_SINGLE | RE_ANCHORED | RE_NOCAPTURE },
 		{ RE_NATIVE,  parse_re_native,  0, 0                       },
 		{ RE_PCRE,    parse_re_pcre,    0, RE_END_NL               },
-		{ RE_SQL,     parse_re_sql,     1, RE_SINGLE | RE_ANCHORED }
+		{ RE_SQL,     parse_re_sql,     1, RE_SINGLE | RE_ANCHORED | RE_NOCAPTURE }
 	};
 
 	for (i = 0; i < sizeof a / sizeof *a; i++) {
@@ -125,7 +125,15 @@ re_parse(enum re_dialect dialect, int (*getc)(void *opaque), void *opaque,
 
 	if (res < 0) {
 		ast_free(ast);
-		if (err != NULL) { err->e = RE_EERRNO; }
+		if (err != NULL) {
+			if (res == AST_ANALYSIS_ERROR_UNSUPPORTED_PCRE) {
+				err->e = RE_EUNSUPPPCRE;
+			} else if (res == AST_ANALYSIS_ERROR_UNSUPPORTED_CAPTURE) {
+				err->e = RE_EUNSUPCAPTUR;
+			} else if (err->e == RE_ESUCCESS) {
+				err->e = RE_EERRNO;
+			}
+		}
 		return NULL;
 	}
 
diff --git a/src/libre/re_capvm_compile.c b/src/libre/re_capvm_compile.c
new file mode 100644
index 000000000..068c00d32
--- /dev/null
+++ b/src/libre/re_capvm_compile.c
@@ -0,0 +1,1575 @@
+/*
+ * Copyright 2022 Scott Vokes
+ *
+ * See LICENCE for the full copyright terms.
+ */
+
+#include "re_capvm_compile.h"
+#include "../libfsm/capture_vm.h"
+#include "../libfsm/capture_vm_program.h"
+#include "../libfsm/capture_vm_log.h"
+
+/* for EXPENSIVE_CHECKS */
+#include "adt/common.h"
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stddef.h>
+#include <string.h>
+
+#include <ctype.h>
+
+#include <adt/alloc.h>
+#include <adt/hash.h>
+#include <adt/u64bitset.h>
+
+#include <re/re.h>
+
+#include "ast.h"
+
+#define DEF_OPCODE_CEIL 8
+#define DEF_CHARCLASS_BUCKETS 8
+#define DEF_CHARCLASS_CEIL 4
+#define DEF_REPEATED_ALT_BACKPATCH_CEIL 1
+#define NO_BUCKET_ID ((uint32_t)-1)
+#define NO_CAPTURE_ID ((uint32_t)-1)
+
+#define LOG_REPETITION_CASES 0
+
+/* Placeholder markers for pending offset values (which would
+ * otherwise temporarily be uninitialized memory), chosen so
+ * they stand out visually in a debugger. */
+enum pending_offset {
+	PENDING_OFFSET_REPEAT_OPTIONAL_NEW = 11111111,
+	PENDING_OFFSET_REPEAT_OPTIONAL_CONT = 22222222,
+	PENDING_OFFSET_ALT_BACKPATCH_JMP = 33333333,
+	PENDING_OFFSET_ALT_BACKPATCH_NEW = 44444444,
+	PENDING_OFFSET_ALT_BACKPATCH_AFTER_REPEAT_PLUS = 55555555,
+};
+
+struct capvm_compile_env {
+	const struct fsm_alloc *alloc;
+	enum re_flags re_flags;
+	struct capvm_program *program;
+
+	uint32_t max_capture_seen;
+
+	/* Hash table for interning character classes.
+	 * Doubles and rehashes when half full. */
+	struct charclass_htab {
+		uint32_t bucket_count;
+		uint32_t buckets_used;
+		uint32_t ids_used;
+		struct charclass_htab_bucket {
+			uint32_t id; /* or NO_BUCKET_ID for unused */
+			struct capvm_char_class bitset;
+		} *buckets;
+	} charclass_htab;
+
+#define DEF_REPEATED_GROUPS_CEIL 8
+	/* Linked list of nodes used at compile time to compile regexes
+	 * such as '^(a((b*)*)*)$' as if they were '^(a(?:b*)(()))$'.
+	 * Since the inner body of the repeated subexpression with the
+	 * capture groups can be empty, it will always repeat after
+	 * its body matches any input. We move the group captures to
+	 * the end of the repeated subexpression to explicitly represent
+	 * them always capturing afterward, because otherwise the
+	 * infinite loop protection skips them. */
+	struct repeated_group_info {
+		/* Ancestor node that should emit the SAVE opcodes; can
+		 * be either a REPEAT or ALT. */
+		const struct ast_expr *outermost_ancestor;
+		size_t ceil;
+		size_t count;
+		const struct ast_expr **groups;
+		/* linked list */
+		struct repeated_group_info *prev;
+	} *repeated_groups;
+};
+
+static bool
+ensure_program_capacity(const struct fsm_alloc *alloc,
+    struct capvm_program *p, uint32_t count)
+{
+#define STRESS_GROWING (EXPENSIVE_CHECKS && 1)
+
+	const uint32_t capacity = p->used + count;
+
+	if (capacity > p->ceil) {
+#if STRESS_GROWING
+		const uint32_t nceil = (p->ceil + 1 < capacity
+		    ? capacity : p->ceil + 1);
+#else
+		const uint32_t nceil = (p->ceil == 0
+		    ? DEF_OPCODE_CEIL
+		    : 2*p->ceil);
+		/* This should always be enough for any capacity
+		 * requested during compilation. */
+		assert(nceil >= p->used + count);
+#endif
+		LOG(3, "%s: growing %u -> %u (count %u)\n",
+		    __func__, p->ceil, nceil, count);
+		struct capvm_opcode *nops = f_realloc(alloc,
+		    p->ops, nceil * sizeof(p->ops[0]));
+		if (nops == NULL) {
+			return false;
+		}
+
+#if EXPENSIVE_CHECKS
+		for (size_t i = p->ceil; i < nceil; i++) {
+			/* out of range, will trigger asserts */
+			nops[i].t = 'X';
+		}
+#endif
+
+		p->ceil = nceil;
+		p->ops = nops;
+	}
+	return true;
+}
+
+static void
+check_program_for_invalid_labels(const struct capvm_program *p)
+{
+	for (uint32_t op_i = 0; op_i < p->used; op_i++) {
+		const struct capvm_opcode *op = &p->ops[op_i];
+		switch (op->t) {
+		case CAPVM_OP_JMP:
+			assert(op->u.jmp != op_i);
+			break;
+		case CAPVM_OP_JMP_ONCE:
+			assert(op->u.jmp_once != op_i);
+			break;
+		case CAPVM_OP_SPLIT:
+			assert(op->u.split.greedy < p->used);
+			assert(op->u.split.greedy != op_i);
+			assert(op->u.split.nongreedy < p->used);
+			assert(op->u.split.nongreedy != op_i);
+			break;
+
+		case CAPVM_OP_CHAR:
+		case CAPVM_OP_CHARCLASS:
+		case CAPVM_OP_MATCH:
+		case CAPVM_OP_SAVE:
+		case CAPVM_OP_ANCHOR:
+			break;
+		default:
+			assert(!"out of range");
+			break;
+		}
+	}
+}
+
+static uint32_t
+get_program_offset(const struct capvm_program *p)
+{
+	assert(p->used < p->ceil);
+
+#if EXPENSIVE_CHECKS
+	struct capvm_opcode *op = &p->ops[p->used];
+	op->t = 'X';		/* out of range */
+#endif
+
+	return p->used;
+}
+
+static uint32_t
+reserve_program_opcode(struct capvm_program *p)
+{
+	assert(p->used < p->ceil);
+	const uint32_t res = p->used;
+	p->used++;
+
+#if EXPENSIVE_CHECKS
+	struct capvm_opcode *op = &p->ops[res];
+	op->t = 'X';		/* out of range */
+#endif
+
+	return res;
+}
+
+static bool
+grow_program_char_classes(const struct fsm_alloc *alloc,
+    struct capvm_program *p)
+{
+	const uint32_t nceil = (p->char_classes.ceil == 0
+	    ? DEF_CHARCLASS_CEIL
+	    : 2*p->char_classes.ceil);
+	struct capvm_char_class *nsets = f_realloc(alloc,
+	    p->char_classes.sets, nceil * sizeof(nsets[0]));
+	if (nsets == NULL) {
+		return false;
+	}
+
+	p->char_classes.sets = nsets;
+	p->char_classes.ceil = nceil;
+	return true;
+}
+
+static bool
+intern_char_class(struct capvm_compile_env *env,
+    struct capvm_program *p, uint64_t chars[4],
+    uint32_t *id)
+{
+	LOG(5, "%s: used %u/%u\n", __func__,
+	    env->charclass_htab.buckets_used, env->charclass_htab.bucket_count);
+	if (env->charclass_htab.buckets_used >= env->charclass_htab.bucket_count/2) {
+		const uint32_t ocount = env->charclass_htab.bucket_count;
+		const uint32_t ncount = (ocount == 0
+		    ? DEF_CHARCLASS_BUCKETS
+		    : 2*env->charclass_htab.bucket_count);
+		LOG(3, "%s: growing from %u -> %u\n", __func__, ocount, ncount);
+		struct charclass_htab_bucket *nbuckets =
+		    f_malloc(env->alloc, ncount * sizeof(nbuckets[0]));
+		if (nbuckets == NULL) {
+			return false;
+		}
+		for (uint32_t n_i = 0; n_i < ncount; n_i++) {
+			nbuckets[n_i].id = NO_BUCKET_ID;
+		}
+
+		const uint32_t nmask = ncount - 1;
+		assert((ncount & nmask) == 0);
+
+		struct charclass_htab_bucket *obuckets = env->charclass_htab.buckets;
+		for (uint32_t o_i = 0; o_i < ocount; o_i++) {
+			if (obuckets[o_i].id == NO_BUCKET_ID) {
+				continue;
+			}
+			const uint64_t h = hash_fnv1a_64((const uint8_t *)obuckets[o_i].bitset.octets,
+			    sizeof(obuckets[o_i].bitset));
+
+			for (uint32_t n_i = 0; n_i < ncount; n_i++) {
+				const uint64_t b = (h + n_i) & nmask;
+				if (nbuckets[b].id == NO_BUCKET_ID) {
+					memcpy(&nbuckets[b].bitset,
+					    &obuckets[o_i].bitset,
+					    sizeof(obuckets[o_i].bitset));
+					nbuckets[b].id = obuckets[o_i].id;
+					break;
+				}
+			}
+		}
+		f_free(env->alloc, obuckets);
+		env->charclass_htab.bucket_count = ncount;
+		env->charclass_htab.buckets = nbuckets;
+	}
+
+	assert(env->charclass_htab.buckets_used < env->charclass_htab.bucket_count/2);
+
+	const uint32_t count = env->charclass_htab.bucket_count;
+	const uint32_t mask = count - 1;
+	struct charclass_htab_bucket *buckets = env->charclass_htab.buckets;
+
+	const uint64_t h = hash_fnv1a_64((const uint8_t *)chars,
+	    sizeof(buckets[0].bitset));
+	for (uint32_t i = 0; i < count; i++) {
+		const uint64_t b = (h + i) & mask;
+		LOG(5, "%s: buckets[%lu].id == %d\n",
+		    __func__, b, buckets[b].id);
+		if (buckets[b].id == NO_BUCKET_ID) {
+			memcpy(&buckets[b].bitset, chars, sizeof(buckets[b].bitset));
+			if (p->char_classes.count == p->char_classes.ceil) {
+				if (!grow_program_char_classes(env->alloc, p)) {
+					return false;
+				}
+			}
+
+			memcpy(&p->char_classes.sets[p->char_classes.count],
+				chars, sizeof(buckets[b].bitset));
+			p->char_classes.count++;
+			buckets[b].id = env->charclass_htab.ids_used;
+			env->charclass_htab.ids_used++;
+			env->charclass_htab.buckets_used++;
+			*id = buckets[b].id;
+
+			return true;
+		} else if (0 == memcmp(chars, &buckets[b].bitset, sizeof(buckets[b].bitset))) {
+			*id = buckets[b].id;
+			return true; /* already present, reuse */
+		} else {
+			/* collision */
+		}
+	}
+
+	assert(!"unreachable");
+	return false;
+}
+
+static void
+dump_endpoint(const struct ast_endpoint *e)
+{
+	switch (e->type) {
+	case AST_ENDPOINT_LITERAL:
+		fprintf(stderr, "endpoint[LITERAL]: 0x%02x '%c'\n",
+		    e->u.literal.c,
+		    isprint(e->u.literal.c) ? e->u.literal.c : '.');
+		break;
+	case AST_ENDPOINT_CODEPOINT:
+		fprintf(stderr, "endpoint[CODEPOINT]: 0x%x\n",
+		    e->u.codepoint.u);
+		break;
+	case AST_ENDPOINT_NAMED:
+		assert(!"todo?");
+		break;
+	}
+}
+
+static void
+dump_pos(const struct ast_pos *p)
+{
+	fprintf(stderr, "pos: byte %u, line %u, col %u\n",
+	    p->byte, p->line, p->col);
+}
+
+static bool
+active_node(const struct ast_expr *n)
+{
+	assert(n != NULL);
+
+	switch (n->type) {
+	case AST_EXPR_TOMBSTONE:
+		return false;
+	default:
+		return !(n->flags & AST_FLAG_UNSATISFIABLE);
+	}
+}
+
+static bool
+subtree_represents_character_class(const struct ast_expr *expr, uint64_t cc[4])
+{
+	for (size_t i = 0; i < 4; i++) {
+		cc[i] = 0;
+	}
+
+	switch (expr->type) {
+	case AST_EXPR_EMPTY:
+		/* empty set */
+		return false;
+
+	case AST_EXPR_LITERAL:
+		u64bitset_set(cc, (uint8_t)expr->u.literal.c);
+		return true;
+
+	case AST_EXPR_RANGE:
+	{
+		const struct ast_endpoint *f = &expr->u.range.from;
+		const struct ast_endpoint *t = &expr->u.range.to;
+		if (f->type != AST_ENDPOINT_LITERAL
+		    || t->type != AST_ENDPOINT_LITERAL) {
+			return false;
+		}
+		for (uint64_t c = (uint8_t)f->u.literal.c; c <= (uint8_t)t->u.literal.c; c++) {
+			u64bitset_set(cc, (uint8_t)c);
+		}
+		return true;
+	}
+
+	case AST_EXPR_ALT:
+	{
+		/* union character classes from children */
+		assert(expr->u.alt.count > 0);
+		for (size_t c_i = 0; c_i < expr->u.alt.count; c_i++) {
+			uint64_t child_cc[4];
+			const struct ast_expr *child = expr->u.alt.n[c_i];
+			if (subtree_represents_character_class(child, child_cc)) {
+				for (size_t cc_i = 0; cc_i < 4; cc_i++) {
+					cc[cc_i] |= child_cc[cc_i];
+				}
+			} else {
+				return false;
+			}
+		}
+		return true;
+	}
+
+	case AST_EXPR_SUBTRACT:
+	{
+		/* Only support AST_EXPR_SUBTRACT nodes where .a is a
+		 * RANGE:0x00-0xff and .b is either a LITERAL, RANGE, EMPTY,
+		 * or an ALT that itself represents a character class, */
+
+		const struct ast_expr *sub_a = expr->u.subtract.a;
+		if (sub_a->type != AST_EXPR_RANGE) {
+			return false;
+		}
+
+		const struct ast_endpoint *f = &sub_a->u.range.from;
+		const struct ast_endpoint *t = &sub_a->u.range.to;
+		if (f->type != AST_ENDPOINT_LITERAL || t->type != AST_ENDPOINT_LITERAL) {
+			return false;
+		}
+
+		for (uint64_t i = 0; i < 256; i++) {
+			if (i >= (uint8_t)f->u.literal.c && i <= (uint8_t)f->u.literal.c) {
+				u64bitset_set(cc, i);
+			}
+		}
+
+		for (size_t i = 0; i < 4; i++) {
+			cc[i] = ~(uint64_t)0;
+		}
+
+		uint64_t neg_cc[4];
+		if (expr->u.subtract.b->type == AST_EXPR_EMPTY) {
+			for (size_t cc_i = 0; cc_i < 4; cc_i++) {
+				neg_cc[cc_i] = (uint64_t)0;
+			}
+		} else if (subtree_represents_character_class(expr->u.subtract.b, neg_cc)) {
+			for (size_t cc_i = 0; cc_i < 4; cc_i++) {
+				cc[cc_i] &=~ neg_cc[cc_i];
+			}
+		} else {
+			return false;
+		}
+		return true;
+	}
+
+ 	default:
+		return false;
+	}
+}
+
+static void
+make_charclass_case_insensitive(uint64_t *cc)
+{
+	for (size_t i = 0; i < 256; i++) {
+		if (isalpha(i) && u64bitset_get(cc, i)) {
+			const char c = (char)i;
+			const uint64_t cl = (uint64_t)tolower(c);
+			const uint64_t cu = (uint64_t)toupper(c);
+			u64bitset_set(cc, cl);
+			u64bitset_set(cc, cu);
+		}
+	}
+}
+
+static bool
+can_safely_skip_JMP_ONCE(const struct ast_expr *expr)
+{
+	/* There are potentially cases where it's safe to skip the
+	 * JMP_ONCE special case, which would save memory by not
+	 * expanding the path an extra bit per iteration, but the
+	 * criteria are subtle enough that it can probably wait. */
+	(void)expr;
+	return false;
+}
+
+static bool
+push_repeated_group_info(struct capvm_compile_env *env, const struct ast_expr *expr)
+{
+	LOG(3 - LOG_REPETITION_CASES,
+	    "%s: setting env->repeated_groups.outermost_ancestor <- %p\n",
+	    __func__, (void *)expr);
+
+	assert(expr != NULL);
+	assert(expr->type == AST_EXPR_REPEAT || expr->type == AST_EXPR_ALT);
+
+	struct repeated_group_info *rgi = f_calloc(env->alloc, 1, sizeof(*rgi));
+	if (rgi == NULL) {
+		return false;
+	}
+	rgi->outermost_ancestor = expr;
+	rgi->prev = env->repeated_groups;
+	env->repeated_groups = rgi;
+	LOG(3 - LOG_REPETITION_CASES,
+	    "%s: push rgi, allocated %p, prev %p\n",
+	    __func__, (void *)rgi, (void *)rgi->prev);
+	return true;
+}
+
+static void
+pop_repeated_group_info(struct capvm_compile_env *env, const struct ast_expr *expr)
+{
+	assert(expr != NULL);
+	assert(expr->type == AST_EXPR_REPEAT || expr->type == AST_EXPR_ALT);
+	struct repeated_group_info *rgi = env->repeated_groups;
+	LOG(3 - LOG_REPETITION_CASES,
+	    "%s: pop rgi, expecting %p, got %p\n",
+	    __func__, (void *)expr, (void *)rgi->outermost_ancestor);
+	assert(rgi->outermost_ancestor == expr);
+	struct repeated_group_info *prev = rgi->prev;
+	LOG(3 - LOG_REPETITION_CASES,
+	    "%s: pop rgi, freeing %p, prev %p\n",
+	    __func__, (void *)rgi, (void *)prev);
+
+	env->repeated_groups = prev;
+	if (rgi->groups != NULL) {
+		f_free(env->alloc, rgi->groups);
+	}
+	f_free(env->alloc, rgi);
+}
+
+static bool
+emit_repeated_groups(struct capvm_compile_env *env, struct capvm_program *p);
+
+static bool
+capvm_compile_iter_save_groups_in_skipped_subtree(struct capvm_compile_env *env,
+	struct capvm_program *p, const struct ast_expr *expr);
+
+static bool
+compile_kleene_star(struct capvm_compile_env *env,
+    struct capvm_program *p, const struct ast_expr *expr);
+
+static bool
+capvm_compile_iter(struct capvm_compile_env *env,
+	struct capvm_program *p, const struct ast_expr *expr)
+{
+	LOG(4, "%s: expr %p, type %s, %u/%u used, re_flags 0x%02x\n",
+	    __func__, (void *)expr, ast_node_type_name(expr->type),
+	    p->used, p->ceil, expr->re_flags);
+
+	switch (expr->type) {
+	case AST_EXPR_EMPTY:
+	case AST_EXPR_TOMBSTONE:
+		break;
+	case AST_EXPR_CONCAT:
+		for (size_t i = 0; i < expr->u.concat.count; i++) {
+			/* append instructions from each consecutive node */
+			const struct ast_expr *n = expr->u.concat.n[i];
+			if (!capvm_compile_iter(env, p, n)) { return false; }
+		}
+		break;
+	case AST_EXPR_ALT:
+	{
+		if (!ensure_program_capacity(env->alloc, p, 1)) {
+			return false;
+		}
+		assert(expr->u.alt.count > 0);
+
+		if (expr->u.alt.contains_empty_groups) {
+			if (!push_repeated_group_info(env, expr)) {
+				return false;
+			}
+		}
+
+		/* If this ALT node represents a character class (such as a
+		 * rewritten . character's ALT[0x00 - 0x09, 0x0b - 0xff] or
+		 * a rewritten [abc-ef]'s ... , then produce the corresponding
+		 * character class literal. The direct representation of the
+		 * subtree would take several instructions and introduce
+		 * unnecessary splits, increasing memory usage at runtime. */
+		uint64_t cc[4];
+		if (subtree_represents_character_class(expr, cc)) {
+			const uint32_t pos = reserve_program_opcode(p);
+			struct capvm_opcode *op_cc = &p->ops[pos];
+			op_cc->t = CAPVM_OP_CHARCLASS;
+
+			if (expr->re_flags & RE_ICASE) {
+				make_charclass_case_insensitive(cc);
+			}
+			if (!intern_char_class(env, p, cc, &op_cc->u.charclass_id)) {
+				return false;
+			}
+
+			if (expr->u.alt.contains_empty_groups) {
+				pop_repeated_group_info(env, expr);
+			}
+			break;
+		}
+
+		uint32_t active_count = 0;
+		uint32_t last_active;
+		struct alt_flow_info {
+			bool is_active;
+			uint32_t backpatch;
+		};
+		struct alt_flow_info *flow_info = f_calloc(env->alloc,
+		    expr->u.alt.count, sizeof(flow_info[0]));
+		assert(flow_info != NULL);
+
+		for (uint64_t i = 0; i < expr->u.alt.count; i++) {
+			const struct ast_expr *n = expr->u.alt.n[i];
+			if (active_node(n)) {
+				last_active = i;
+				active_count++;
+				flow_info[i].is_active = true;
+			}
+		}
+
+		/* If there are no children active this should terminate
+		 * with an empty program. */
+		LOG(3, "%s: active_count == %d\n", __func__, active_count);
+		if (active_count == 0) {
+			LOG(3, "%s: active_count == 0, skipping\n", __func__);
+
+			for (uint64_t i = 0; i < expr->u.alt.count; i++) {
+				const struct ast_expr *n = expr->u.alt.n[i];
+				capvm_compile_iter_save_groups_in_skipped_subtree(env, p, n);
+				if (n->flags & AST_FLAG_NULLABLE) {
+					break;
+				}
+			}
+
+			f_free(env->alloc, flow_info);
+			if (expr->u.alt.contains_empty_groups) {
+				pop_repeated_group_info(env, expr);
+			}
+
+			/* FIXME: May need distinct error case to not
+			 * leak. There is currently no test reaching
+			 * this and the fuzzer has not produced an input
+			 * that reaches it -- unsatisfiability has probably
+			 * already pruned subtrees that would get here. */
+			return true;
+		} else if (active_count == 1) {
+			/* even if one of the later subtrees is active, an earlier
+			 * subtree can still shadow it. */
+			bool shadowed = false;
+
+			for (uint64_t i = 0; i < expr->u.alt.count; i++) {
+				if (i != last_active) { /* evaluate for empty groups */
+					const struct ast_expr *n = expr->u.alt.n[i];
+					capvm_compile_iter_save_groups_in_skipped_subtree(env, p, n);
+					if (n->flags & AST_FLAG_NULLABLE) {
+						shadowed = true;
+						break;
+					}
+				}
+			}
+
+			if (!shadowed) {
+				LOG(5, "narrowing to last_active %u\n", last_active);
+				assert(last_active < expr->u.alt.count);
+				const struct ast_expr *n = expr->u.alt.n[last_active];
+				if (!capvm_compile_iter(env, p, n)) {
+					return false;
+				}
+				f_free(env->alloc, flow_info);
+				if (expr->u.alt.contains_empty_groups) {
+					pop_repeated_group_info(env, expr);
+				}
+				break;
+			} else {
+				f_free(env->alloc, flow_info);
+				if (expr->u.alt.contains_empty_groups) {
+					pop_repeated_group_info(env, expr);
+				}
+				return true; /* may need distinct error case to not leak */
+			}
+		}
+
+		LOG(3, "%s: compiling AST_EXPR_ALT with %u active nodes, last_active %u\n",
+		    __func__, active_count, last_active);
+
+		/* note: binarized split: for a|b|c, treat this like "a else (b else c)",
+		 * leading to generated code like:
+		 *
+		 * // note: trying each case in order, earlier cases are more greedy
+		 * - split_cont j1
+		 * - split_new j2
+		 * j1:
+		 * - <case a>
+		 * - jmp pos_after_all   // or split pos_after_all, PLUS_BACKPATCH, see below
+		 * j2:
+		 * - split_cont j3
+		 * - split_new j4
+		 * j3:
+		 * - <case b>
+		 * - jmp pos_after_all
+		 * j4:
+		 * //// DO NOT EMIT split instructions here, treat like a final else
+		 * - <case c>
+		 * // fall through to pos_after_all
+		 * pos_after_all:
+		 *
+		 *
+		 * When an ALT case:
+		 * - is nullable (can match the empty string)
+		 * - is the first nullable case (shadowing cases after)
+		 * - is in a subtree of a REPEAT{1,inf} (+) node whose entire subtree is nullable
+		 * then that case's `jmp pos_after_all` should be replaced with
+		 * `split pos_after_all pos_after_repeat_backjmp`, which will need a special
+		 * label for batch-patching by the REPEAT later. This is necessary for cases
+		 * like '^(?:($|x))+$', where the `jmp pos_after_all` would lead to code after
+		 * the ALT that has already been executed at the current input position.
+		 * */
+		for (uint32_t c_i = 0; c_i < expr->u.alt.count; c_i++) {
+			const bool is_final_else_case = c_i == last_active;
+			LOG(3, "%s: %p c_i %u/%zu, is_final_else_case %d\n",
+			    __func__, (void *)expr, c_i, expr->u.alt.count, is_final_else_case);
+			if (!flow_info[c_i].is_active) { continue; }
+
+			if (is_final_else_case) {
+				/* Just add the case for the child node and
+				 * then fall through to pos_after_all. */
+				const struct ast_expr *n = expr->u.alt.n[c_i];
+				LOG(3, "%s: %p recursing...\n", __func__, (void *)expr);
+				if (!capvm_compile_iter(env, p, n)) {
+					return false;
+				}
+				LOG(3, "%s: %p recursing...done (final-else-case)\n", __func__, (void *)expr);
+
+				struct repeated_group_info *rgi = env->repeated_groups;
+				LOG(3 - LOG_REPETITION_CASES,
+				    "%s: ALT %p: contains_empty_groups: %d, outermost_ancestor: %p == %p ? %d\n",
+				    __func__, (void *)expr,
+				    expr->u.alt.contains_empty_groups,
+				    (void *)(rgi ? rgi->outermost_ancestor : NULL),
+				    (void *)expr,
+				    (rgi ? rgi->outermost_ancestor == expr : 0));
+				if (expr->u.alt.contains_empty_groups) {
+					assert(rgi != NULL);
+					LOG(3 - LOG_REPETITION_CASES,
+					    "%s: outermost_ancestor match, count %zu\n", __func__, rgi->count);
+					if (!emit_repeated_groups(env, p)) {
+						return false;
+					}
+				}
+			} else {
+				if (!ensure_program_capacity(env->alloc, p, 2)) {
+					return false;
+				}
+				const uint32_t pos_split_before_case = reserve_program_opcode(p);
+				struct capvm_opcode *op_split_before = &p->ops[pos_split_before_case];
+				op_split_before->t = CAPVM_OP_SPLIT;
+
+				/* greedier branch: trying the next case, in order */
+				op_split_before->u.split.greedy = get_program_offset(p);
+
+				/* less greedy branch: moving on to the next case.
+				 * will backpatch .new to after this case's JMP later */
+				op_split_before->u.split.nongreedy = PENDING_OFFSET_ALT_BACKPATCH_NEW;
+
+				const struct ast_expr *n = expr->u.alt.n[c_i];
+				LOG(3, "%s: %p recursing...\n", __func__, (void *)expr);
+				if (!capvm_compile_iter(env, p, n)) {
+					return false;
+				}
+				LOG(3, "%s: %p recursing...done (non-final)\n", __func__, (void *)expr);
+
+				struct repeated_group_info *rgi = env->repeated_groups;
+				LOG(3 - LOG_REPETITION_CASES,
+				    "%s: ALT %p: contains_empty_groups: %d, outermost_ancestor: %p == %p ? %d\n",
+				    __func__, (void *)expr, expr->u.alt.contains_empty_groups,
+				    (void *)(rgi ? rgi->outermost_ancestor : NULL),
+				    (void *)expr,
+				    (rgi ? rgi->outermost_ancestor == expr : 0));
+				if (expr->u.alt.contains_empty_groups) {
+					assert(rgi != NULL);
+					LOG(3 - LOG_REPETITION_CASES,
+					    "%s: outermost_ancestor match, count %zu\n", __func__, rgi->count);
+					if (!emit_repeated_groups(env, p)) {
+						return false;
+					}
+				}
+
+				/* JMP or SPLIT, plus space after */
+				if (!ensure_program_capacity(env->alloc, p, 2)) {
+					return false;
+				}
+
+				/* Based on analysis, either emit a JMP or SPLIT. */
+				if (n->u.alt.nullable_alt_inside_plus_repeat) {
+					const uint32_t pos_split_after = reserve_program_opcode(p);
+					flow_info[c_i].backpatch = pos_split_after;
+					struct capvm_opcode *op_split_after = &p->ops[pos_split_after];
+					op_split_after->t = CAPVM_OP_SPLIT;
+					op_split_after->u.split.greedy = PENDING_OFFSET_ALT_BACKPATCH_JMP;
+					op_split_after->u.split.nongreedy = PENDING_OFFSET_ALT_BACKPATCH_AFTER_REPEAT_PLUS;
+				} else {
+					const uint32_t pos_jmp_after = reserve_program_opcode(p);
+					flow_info[c_i].backpatch = pos_jmp_after;
+					struct capvm_opcode *op_jmp = &p->ops[pos_jmp_after];
+					op_jmp->t = CAPVM_OP_JMP;
+					op_jmp->u.jmp = PENDING_OFFSET_ALT_BACKPATCH_JMP;
+				}
+
+				/* refresh pointer after possible realloc */
+				op_split_before = &p->ops[pos_split_before_case];
+
+				/* and the original split jumps to after
+				 * this case's JMP */
+				op_split_before->u.split.nongreedy = get_program_offset(p);
+			}
+		}
+
+		/* Ensure there's space for the next instruction, and then
+		 * set every case's JMP suffix to it. */
+		if (!ensure_program_capacity(env->alloc, p, 1)) {
+			return false;
+		}
+		const uint32_t pos_after_all = get_program_offset(p);
+
+		for (size_t i = 0; i < expr->u.alt.count - 1; i++) {
+			const bool is_final_else_case = i == last_active;
+			assert(flow_info[i].backpatch < p->used);
+			if (is_final_else_case || !flow_info[i].is_active) {
+				continue;
+			}
+
+			struct capvm_opcode *op_patch = &p->ops[flow_info[i].backpatch];
+			if (op_patch->t == CAPVM_OP_JMP) {
+				assert(op_patch->u.jmp == PENDING_OFFSET_ALT_BACKPATCH_JMP);
+				op_patch->u.jmp = pos_after_all;
+			} else if (op_patch->t == CAPVM_OP_SPLIT) {
+				assert(op_patch->u.split.greedy == PENDING_OFFSET_ALT_BACKPATCH_JMP);
+				op_patch->u.split.greedy = pos_after_all;
+				/* This will be patched by an ancestor repeat node after returning. */
+				assert(op_patch->u.split.greedy == PENDING_OFFSET_ALT_BACKPATCH_AFTER_REPEAT_PLUS);
+			} else {
+				assert(!"type mismatch");
+			}
+		}
+
+		f_free(env->alloc, flow_info);
+		if (expr->u.alt.contains_empty_groups) {
+			pop_repeated_group_info(env, expr);
+		}
+		break;
+	}
+	case AST_EXPR_LITERAL:
+	{
+		if (!ensure_program_capacity(env->alloc, p, 1)) {
+			return false;
+		}
+		const uint32_t pos = reserve_program_opcode(p);
+		struct capvm_opcode *op = &p->ops[pos];
+
+		if (expr->re_flags & RE_ICASE) {
+			uint64_t cc[4] = { 0 };
+			u64bitset_set(cc, (uint8_t)expr->u.literal.c);
+
+			op->t = CAPVM_OP_CHARCLASS;
+			make_charclass_case_insensitive(cc);
+			if (!intern_char_class(env, p, cc, &op->u.charclass_id)) {
+				return false;
+			}
+		} else {
+			op->t = CAPVM_OP_CHAR;
+			op->u.chr = (uint8_t)expr->u.literal.c;
+		}
+		break;
+	}
+	case AST_EXPR_CODEPOINT:
+		assert(!"not implemented, unreachable");
+		break;
+	case AST_EXPR_REPEAT:
+	{
+		const unsigned min = expr->u.repeat.min;
+		const unsigned max = expr->u.repeat.max;
+		const struct ast_expr *e = expr->u.repeat.e;
+
+		/* collect groups to emit */
+		if (expr->u.repeat.contains_empty_groups) {
+			if (!push_repeated_group_info(env, expr)) {
+				return false;
+			}
+		}
+
+		if (min == 1 && max == 1) { /* {1,1} */
+			/* if repeating exactly once, just defer to subtree,
+			 * but still do the repeated_group_info cleanup below */
+			if (!capvm_compile_iter(env, p, e)) {
+				return false;
+			}
+		} else if (min == 0 && max == 1) { /* ? */
+			/*     split l1, l2
+			 * l1: <subtree>
+			 * l2: <after> */
+			if (!ensure_program_capacity(env->alloc, p, 2)) {
+				return false;
+			}
+
+			const uint32_t pos_split = reserve_program_opcode(p);
+			const uint32_t pos_l1 = get_program_offset(p);
+
+			struct capvm_opcode *op_split = &p->ops[pos_split];
+			op_split->t = CAPVM_OP_SPLIT;
+			op_split->u.split.greedy = pos_l1;
+			op_split->u.split.nongreedy = PENDING_OFFSET_REPEAT_OPTIONAL_NEW;
+
+			if (!capvm_compile_iter(env, p, e)) { return false; }
+
+			if (!ensure_program_capacity(env->alloc, p, 1)) {
+				return false;
+			}
+			op_split = &p->ops[pos_split]; /* refresh pointer */
+
+			const uint32_t after_expr = get_program_offset(p);
+			op_split->u.split.nongreedy = after_expr;
+		} else if (min == 0 && max == AST_COUNT_UNBOUNDED) { /* * */
+			if (!compile_kleene_star(env, p, expr)) {
+				return false;
+			}
+		} else if (min == 1 && max == AST_COUNT_UNBOUNDED) { /* + */
+			/* l1: <subtree>
+			 *     split l1, l2
+			 * l2: <after> */
+			if (!ensure_program_capacity(env->alloc, p, 1)) {
+				return false;
+			}
+			const uint32_t pos_l1 = get_program_offset(p);
+
+			if (!capvm_compile_iter(env, p, e)) { return false; }
+
+			if (!ensure_program_capacity(env->alloc, p, 1)) {
+				return false;
+			}
+
+			/* Only emit the backwards jump for repetition branching
+			 * if the subtree added any instructions. */
+			if (get_program_offset(p) != pos_l1) {
+				if (!ensure_program_capacity(env->alloc, p, 3)) {
+					return false;
+				}
+				const uint32_t pos_split = reserve_program_opcode(p);
+				const uint32_t pos_l2 = get_program_offset(p);
+
+				struct capvm_opcode *op_split = &p->ops[pos_split];
+				op_split->t = CAPVM_OP_SPLIT;
+				op_split->u.split.greedy = pos_l1;
+				op_split->u.split.nongreedy = pos_l2;
+			}
+		} else if (min == 0 && max == 0) { /* {0,0} */
+			/* ignored, except any groups contained within that could match
+			 * empty input still get emitted (unless unsatisfiable). */
+			if (e->flags & AST_FLAG_UNSATISFIABLE) {
+				LOG(3, "%s: repeat{0,0} && UNSATISFIABILE -> skipping\n", __func__);
+				break;
+			}
+
+			/* Unreachable group captures still need to be counted, otherwise
+			 * subsequent ones would get shifted down. */
+			if (!capvm_compile_iter_save_groups_in_skipped_subtree(env, p, e)) { return false; }
+			break;
+		} else { 	/* other bounded count */
+			/* repeat the minimum number of times */
+			for (size_t i = 0; i < min; i++) {
+				if (!capvm_compile_iter(env, p, e)) { return false; }
+			}
+
+			if (max == AST_COUNT_UNBOUNDED) {
+				/* A repeat of {x,inf} should be treated like
+				 * (?:subtree){x} (?:subtree)* , where any numbered
+				 * capture groups inside have the same group ID in
+				 * both copies of the subtree. */
+				if (!compile_kleene_star(env, p, expr)) {
+					return false;
+				}
+			} else {
+				/* then repeat up to the max as <expr>?
+				 *
+				 *     split_cont l1
+				 *     split_new l2
+				 * l1: <subtree>
+				 * l2: <after> */
+				for (size_t i = min; i < max; i++) {
+					if (!ensure_program_capacity(env->alloc, p, 3)) {
+						return false;
+					}
+
+					const uint32_t pos_split = reserve_program_opcode(p);
+					const uint32_t pos_l1 = get_program_offset(p);
+
+					struct capvm_opcode *op_split = &p->ops[pos_split];
+					op_split->t = CAPVM_OP_SPLIT;
+					op_split->u.split.greedy = pos_l1;
+					op_split->u.split.nongreedy = PENDING_OFFSET_REPEAT_OPTIONAL_NEW;
+
+					if (!capvm_compile_iter(env, p, e)) { return false; }
+
+					if (!ensure_program_capacity(env->alloc, p, 1)) {
+						return false;
+					}
+					op_split = &p->ops[pos_split]; /* refresh pointer */
+
+					const uint32_t after_expr = get_program_offset(p);
+					op_split->u.split.nongreedy = after_expr;
+				}
+			}
+		}
+
+		struct repeated_group_info *rgi = env->repeated_groups;
+		LOG(3 - LOG_REPETITION_CASES,
+		    "%s: REPEAT %p: contains_empty_groups: %d, outermost_ancestor: %p == %p ? %d\n",
+		    __func__, (void *)expr, expr->u.repeat.contains_empty_groups,
+		    (void *)(rgi ? rgi->outermost_ancestor : NULL),
+		    (void *)expr,
+		    (rgi ? rgi->outermost_ancestor == expr : 0));
+		if (expr->u.repeat.contains_empty_groups
+		    && rgi != NULL
+		    && rgi->outermost_ancestor == expr) {
+			LOG(3 - LOG_REPETITION_CASES,
+			    "%s: outermost_ancestor match, count %zu\n", __func__, rgi->count);
+			if (!emit_repeated_groups(env, p)) {
+				return false;
+			}
+			pop_repeated_group_info(env, expr);
+		}
+
+		break;
+	}
+	case AST_EXPR_GROUP:
+	{
+		const uint32_t id = expr->u.group.id;
+		const int is_repeated = expr->u.group.repeated;
+
+		/* If the group is nullable and repeated, then move its save
+		 * instructions to the end, since the final iteration matching
+		 * nothing will always clobber any earlier saves. This is a
+		 * workaround for cases that would otherwise incorrectly be
+		 * halted by infinite loop prevention at runtime. */
+		if (is_repeated && ((expr->flags & AST_FLAG_NULLABLE)
+			|| !(expr->flags & AST_FLAG_CAN_CONSUME))) {
+
+			struct repeated_group_info *rgi = env->repeated_groups;
+
+			LOG(3 - LOG_REPETITION_CASES,
+			    "%s: checking repeated group %u (capvm_compile_iter recurse), parent %p\n",
+			    __func__, id, (void *)(rgi ? rgi->outermost_ancestor : NULL));
+			if (!capvm_compile_iter(env, p, expr->u.group.e)) { return false; }
+			LOG(3 - LOG_REPETITION_CASES,
+			    "%s: checking repeated group %u (capvm_compile_iter done), parent %p\n",
+			    __func__, id, (void *)(rgi ? rgi->outermost_ancestor : NULL));
+
+			/* don't emit these here, parent repeat node will add them after. */
+			if (rgi && rgi->outermost_ancestor != NULL) {
+				if (rgi->count == rgi->ceil) {
+					const size_t nceil = (rgi->ceil == 0
+					    ? DEF_REPEATED_GROUPS_CEIL
+					    : 2*rgi->ceil);
+					const struct ast_expr **ngroups = f_realloc(env->alloc,
+					    rgi->groups,
+					    nceil * sizeof(ngroups[0]));
+					if (ngroups == NULL) {
+						return false;
+					}
+					rgi->groups = ngroups;
+					rgi->ceil = nceil;
+				}
+
+				LOG(3 - LOG_REPETITION_CASES,
+				    "%s: adding group %u (%p) to outermost_ancestor %p\n",
+				    __func__, id, (void *)expr,
+				    (void *)rgi->outermost_ancestor);
+				rgi->groups[rgi->count] = expr;
+				rgi->count++;
+			}
+		} else {
+			if (!ensure_program_capacity(env->alloc, p, 1)) {
+				return false;
+			}
+			const uint32_t pos_start = reserve_program_opcode(p);
+			struct capvm_opcode *op = &p->ops[pos_start];
+			op->t = CAPVM_OP_SAVE; /* save capture start */
+			op->u.save = 2*id;
+
+			if (!capvm_compile_iter(env, p, expr->u.group.e)) { return false; }
+
+			if (!ensure_program_capacity(env->alloc, p, 1)) {
+				return false;
+			}
+			const uint32_t pos_end = reserve_program_opcode(p);
+			op = &p->ops[pos_end];
+			op->t = CAPVM_OP_SAVE; /* save capture end */
+			op->u.save = 2*id + 1;
+		}
+
+		if (id > env->max_capture_seen || env->max_capture_seen == NO_CAPTURE_ID) {
+			env->max_capture_seen = id;
+		}
+
+		break;
+	}
+
+	case AST_EXPR_ANCHOR:
+	{
+		if (!ensure_program_capacity(env->alloc, p, 1)) {
+			return false;
+		}
+		const uint32_t pos = reserve_program_opcode(p);
+		struct capvm_opcode *op = &p->ops[pos];
+		op->t = CAPVM_OP_ANCHOR;
+		op->u.anchor = (expr->u.anchor.type == AST_ANCHOR_START
+		    ? CAPVM_ANCHOR_START : CAPVM_ANCHOR_END);
+		break;
+	}
+	case AST_EXPR_SUBTRACT:
+	{
+		uint64_t cc[4];
+		for (size_t i = 0; i < 4; i++) {
+			cc[i] = ~(uint64_t)0;
+		}
+		if (subtree_represents_character_class(expr, cc)) {
+			if (!ensure_program_capacity(env->alloc, p, 1)) {
+				return false;
+			}
+			const uint32_t pos = reserve_program_opcode(p);
+			struct capvm_opcode *op_cc = &p->ops[pos];
+			op_cc->t = CAPVM_OP_CHARCLASS;
+
+			if (expr->re_flags & RE_ICASE) {
+				make_charclass_case_insensitive(cc);
+			}
+
+			if (!intern_char_class(env, p, cc, &op_cc->u.charclass_id)) {
+				return false;
+			}
+		} else {
+			/* FIXME: should return UNSUPPORTED */
+			assert(!"unreachable");
+			return false;
+		}
+		break;
+	}
+	case AST_EXPR_RANGE:
+	{
+		uint64_t cc[4] = { 0 };
+		if (!subtree_represents_character_class(expr, cc)) {
+			dump_endpoint(&expr->u.range.from);
+			dump_pos(&expr->u.range.start);
+			dump_endpoint(&expr->u.range.to);
+			dump_pos(&expr->u.range.end);
+			assert(!"unreachable");
+			return false;
+		}
+
+		if (!ensure_program_capacity(env->alloc, p, 1)) {
+			return false;
+		}
+		const uint32_t pos = reserve_program_opcode(p);
+		struct capvm_opcode *op = &p->ops[pos];
+
+		op->t = CAPVM_OP_CHARCLASS;
+		if (expr->re_flags & RE_ICASE) {
+			make_charclass_case_insensitive(cc);
+		}
+
+		if (!intern_char_class(env, p, cc, &op->u.charclass_id)) {
+			return false;
+		}
+		break;
+	}
+	default:
+		assert(!"matchfail");
+	}
+
+	return true;
+}
+
+static bool
+compile_kleene_star(struct capvm_compile_env *env,
+    struct capvm_program *p, const struct ast_expr *expr)
+{
+	/* Note: min count may be > 0 because this is also
+	 * used for unbounded repetition with a lower count,
+	 * as in `a{3,}`, but in that case the {min}
+	 * repetitions have already been handled by the caller. */
+	assert(expr && expr->type == AST_EXPR_REPEAT &&
+	    expr->u.repeat.max == AST_COUNT_UNBOUNDED);
+
+	/* l1: split l2, l3
+	 * l2: <subtree>
+	 *     jmp_once l1    OR    jmp l1
+	 * l3: <after> */
+	if (!ensure_program_capacity(env->alloc, p, 2)) {
+		return false;
+	}
+
+	const uint32_t pos_l1 = reserve_program_opcode(p);
+	const uint32_t pos_l2 = get_program_offset(p);
+
+	struct capvm_opcode *op_split = &p->ops[pos_l1];
+	op_split->t = CAPVM_OP_SPLIT;
+	op_split->u.split.greedy = PENDING_OFFSET_REPEAT_OPTIONAL_CONT;
+	op_split->u.split.nongreedy = PENDING_OFFSET_REPEAT_OPTIONAL_NEW;
+
+	if (!capvm_compile_iter(env, p, expr->u.repeat.e)) { return false; }
+
+	if (!ensure_program_capacity(env->alloc, p, 2)) {
+		return false;
+	}
+
+	/* It's more expensive to always emit JMP_ONCE because it
+	 * extends the path each iteration, so we could detect when
+	 * it would be safe to use a JMP instead. */
+	if (can_safely_skip_JMP_ONCE(expr)) {
+		const uint32_t pos_jmp = reserve_program_opcode(p);
+		struct capvm_opcode *op_jmp = &p->ops[pos_jmp];
+		op_jmp->t = CAPVM_OP_JMP;
+		op_jmp->u.jmp = pos_l1;
+	} else {
+		const uint32_t pos_jmp_once = reserve_program_opcode(p);
+		struct capvm_opcode *op_jmp_once = &p->ops[pos_jmp_once];
+		op_jmp_once->t = CAPVM_OP_JMP_ONCE;
+		op_jmp_once->u.jmp_once = pos_l1;
+	}
+
+	const uint32_t pos_l3 = get_program_offset(p);
+	op_split = &p->ops[pos_l1]; /* refresh pointer */
+	op_split->u.split.greedy = pos_l2;
+	op_split->u.split.nongreedy = pos_l3;
+	return true;
+}
+
+static bool
+emit_repeated_groups(struct capvm_compile_env *env, struct capvm_program *p)
+{
+	struct repeated_group_info *rgi = env->repeated_groups;
+	for (size_t i = 0; i < rgi->count; i++) {
+		const struct ast_expr *group = rgi->groups[i];
+		assert(group->u.group.repeated);
+		const unsigned id = group->u.group.id;
+		LOG(3 - LOG_REPETITION_CASES,
+		    "%s: checking %zu/%zu: group_id %u\n",
+		    __func__, i, rgi->count, id);
+
+		if (group->flags & (AST_FLAG_ANCHORED_START | AST_FLAG_ANCHORED_END)) {
+			/* if the otherwise empty group contains any anchors,
+			 * then emit a subtree like (^)? so that its capture
+			 * is only set when the anchors would match. */
+			if (!ensure_program_capacity(env->alloc, p, 6)) {
+				return false;
+			}
+
+			/*     split l1, l2
+			 * l1: <optional start anchor>
+			 *     <optional end anchor>
+			 * l2: save (start)
+			 *     save (end)
+			 *     <after> */
+			const uint32_t pos_split = reserve_program_opcode(p);
+			const uint32_t pos_l1 = get_program_offset(p);
+
+			struct capvm_opcode *op_split = &p->ops[pos_split];
+			op_split->t = CAPVM_OP_SPLIT;
+			op_split->u.split.greedy = pos_l1;
+			op_split->u.split.nongreedy = PENDING_OFFSET_REPEAT_OPTIONAL_NEW;
+
+			if (group->flags & AST_FLAG_ANCHORED_START) {
+				const uint32_t pos_start = reserve_program_opcode(p);
+				struct capvm_opcode *op = &p->ops[pos_start];
+				op->t = CAPVM_OP_ANCHOR;
+				op->u.anchor = CAPVM_ANCHOR_START;
+			}
+
+			if (group->flags & AST_FLAG_ANCHORED_END) {
+				const uint32_t pos_end = reserve_program_opcode(p);
+				struct capvm_opcode *op = &p->ops[pos_end];
+				op->t = CAPVM_OP_ANCHOR;
+				op->u.anchor = CAPVM_ANCHOR_END;
+			}
+
+			const uint32_t pos_start = reserve_program_opcode(p);
+			struct capvm_opcode *op = &p->ops[pos_start];
+			op->t = CAPVM_OP_SAVE; /* save capture start */
+			op->u.save = 2*id;
+
+			const uint32_t pos_end = reserve_program_opcode(p);
+			op = &p->ops[pos_end];
+			op->t = CAPVM_OP_SAVE; /* save capture end */
+			op->u.save = 2*group->u.group.id;
+			op->u.save = 2*id + 1;
+
+			const uint32_t after_expr = get_program_offset(p);
+			op_split = &p->ops[pos_split]; /* refresh pointer */
+			op_split->u.split.nongreedy = after_expr;
+		} else {
+			/* simple case, emit SAVE pair */
+			if (!ensure_program_capacity(env->alloc, p, 2)) {
+				return false;
+			}
+			const uint32_t pos_start = reserve_program_opcode(p);
+			struct capvm_opcode *op = &p->ops[pos_start];
+			op->t = CAPVM_OP_SAVE; /* save capture start */
+			op->u.save = 2*id;
+
+			const uint32_t pos_end = reserve_program_opcode(p);
+			op = &p->ops[pos_end];
+			op->t = CAPVM_OP_SAVE; /* save capture end */
+			op->u.save = 2*group->u.group.id;
+			op->u.save = 2*id + 1;
+		}
+	}
+
+	/* clear, because an ALT's subtrees can have distinct repeated groups */
+	rgi->count = 0;
+
+	return true;
+}
+
+static bool
+capvm_compile_iter_save_groups_in_skipped_subtree(struct capvm_compile_env *env,
+	struct capvm_program *p, const struct ast_expr *expr)
+{
+	/* Follow the subtree as far as any expressions that could
+	 * contain GROUPs. Emit any empty groups. This is necessary for
+	 * regexes like /()*^/ and /(x|(x|))^/ whose subtrees are
+	 * otherwise pruned but would still match the empty string
+	 * before ^. */
+	switch (expr->type) {
+	case AST_EXPR_EMPTY:
+	case AST_EXPR_LITERAL:
+	case AST_EXPR_CODEPOINT:
+	case AST_EXPR_ANCHOR:
+	case AST_EXPR_SUBTRACT:
+	case AST_EXPR_RANGE:
+	case AST_EXPR_TOMBSTONE:
+		/* none of these can contain groups */
+		break;
+
+	case AST_EXPR_CONCAT:
+		if (expr->flags & AST_FLAG_UNSATISFIABLE) {
+			return true; /* skip */
+		}
+		for (size_t i = 0; i < expr->u.concat.count; i++) {
+			if (!capvm_compile_iter_save_groups_in_skipped_subtree(env, p, expr->u.concat.n[i])) {
+				return false;
+			}
+		}
+		break;
+	case AST_EXPR_ALT:
+		for (size_t i = 0; i < expr->u.alt.count; i++) {
+			if (!capvm_compile_iter_save_groups_in_skipped_subtree(env, p, expr->u.alt.n[i])) {
+				return false;
+			}
+		}
+		break;
+
+	case AST_EXPR_REPEAT:
+		return capvm_compile_iter_save_groups_in_skipped_subtree(env, p, expr->u.repeat.e);
+
+	case AST_EXPR_GROUP:
+	{
+		const uint32_t id = expr->u.group.id;
+		LOG(5, "%s: recording otherwise skipped group %u\n", __func__, id);
+
+		if (!ensure_program_capacity(env->alloc, p, 2)) {
+			return false;
+		}
+
+		if (id > env->max_capture_seen || env->max_capture_seen == NO_CAPTURE_ID) {
+			env->max_capture_seen = id;
+		}
+
+		const uint32_t pos_start = reserve_program_opcode(p);
+		struct capvm_opcode *op = &p->ops[pos_start];
+		op->t = CAPVM_OP_SAVE; /* save capture start */
+		op->u.save = 2*id;
+
+		const uint32_t pos_end = reserve_program_opcode(p);
+		op = &p->ops[pos_end];
+		op->t = CAPVM_OP_SAVE; /* save capture end */
+		op->u.save = 2*id + 1;
+
+		if (!capvm_compile_iter_save_groups_in_skipped_subtree(env, p, expr->u.group.e)) {
+			return false;
+		}
+
+		break;
+	}
+	default:
+		assert(!"match fail");
+
+	}
+	return true;
+}
+
+static enum re_capvm_compile_ast_res
+capvm_compile(struct capvm_compile_env *env,
+	const struct ast *ast)
+{
+	struct capvm_program *p = f_calloc(env->alloc, 1, sizeof(*p));
+	if (p == NULL) {
+		return RE_CAPVM_COMPILE_AST_ERROR_ALLOC;
+	}
+
+	LOG(3, "%s: has_unanchored: start? %d, end? %d\n", __func__,
+	    ast->has_unanchored_start,
+	    ast->has_unanchored_end);
+
+	/* If the regex has an unanchored start, it gets a `.*` prefix,
+	 * but with the labels swapped so that the unanchored start
+	 * loop is NOT greedy. */
+	if (ast->has_unanchored_start) {
+		if (!ensure_program_capacity(env->alloc, p, 4)) {
+			return RE_CAPVM_COMPILE_AST_ERROR_ALLOC;
+		}
+
+		/* l1: split l3, l2
+		 * l2: .
+		 *     jmp l1
+		 * l3: <after> */
+		const uint32_t l1 = get_program_offset(p);
+		const uint32_t split_pos = reserve_program_opcode(p);
+		struct capvm_opcode *op_split = &p->ops[split_pos];
+
+		const uint32_t l2 = get_program_offset(p);
+		const uint32_t op_cc_pos = reserve_program_opcode(p);
+		struct capvm_opcode *op_cc = &p->ops[op_cc_pos];
+
+		const uint32_t op_jmp_pos = reserve_program_opcode(p);
+		struct capvm_opcode *op_jmp = &p->ops[op_jmp_pos];
+
+		const uint32_t l3 = get_program_offset(p);
+
+		op_split->t = CAPVM_OP_SPLIT;
+		op_split->u.split.greedy = l3;
+		op_split->u.split.nongreedy = l2;
+
+		op_cc->t = CAPVM_OP_CHARCLASS;
+		uint64_t any[4];
+		for (size_t i = 0; i < 4; i++) {
+			any[i] = ~(uint64_t)0;
+		}
+		if (!intern_char_class(env, p, any, &op_cc->u.charclass_id)) {
+			goto cleanup;
+		}
+
+		op_jmp->t = CAPVM_OP_JMP;
+		op_jmp->u.jmp = l1;
+	}
+
+	/* Compile the regex AST, assuming match group 0 is
+	 * explicitly represented. */
+	if (!capvm_compile_iter(env, p, ast->expr)) {
+		goto cleanup;
+	}
+
+	/* Add the unanchored end loop, outside of match group 0 */
+	if (ast->has_unanchored_end) {
+		if (!ensure_program_capacity(env->alloc, p, 4)) {
+			return RE_CAPVM_COMPILE_AST_ERROR_ALLOC;
+		}
+
+		/* l1: split l3, l2
+		 * l2: .
+		 *     jmp l1
+		 * l3: <after, will be MATCH> */
+		const uint32_t l1 = reserve_program_opcode(p);
+		const uint32_t l2 = reserve_program_opcode(p);
+		const uint32_t l_jmp = reserve_program_opcode(p);
+		const uint32_t l3 = get_program_offset(p);
+
+		struct capvm_opcode *op_split = &p->ops[l1];
+
+		struct capvm_opcode *op_any = &p->ops[l2];
+		struct capvm_opcode *op_jmp = &p->ops[l_jmp];
+
+		op_split->t = CAPVM_OP_SPLIT;
+		op_split->u.split.greedy = l3;
+		op_split->u.split.nongreedy = l2;
+
+		op_any->t = CAPVM_OP_CHARCLASS;
+		uint64_t any[4];
+		for (size_t i = 0; i < 4; i++) {
+			any[i] = ~(uint64_t)0;
+		}
+		if (!intern_char_class(env, p, any, &op_any->u.charclass_id)) {
+			goto cleanup;
+		}
+
+		op_jmp->t = CAPVM_OP_JMP;
+		op_jmp->u.jmp = l1;
+	}
+
+	/* add MATCH opcode at end */
+	if (!ensure_program_capacity(env->alloc, p, 1)) {
+		return RE_CAPVM_COMPILE_AST_ERROR_ALLOC;
+	}
+	const uint32_t pos_m = reserve_program_opcode(p);
+	struct capvm_opcode *op_m = &p->ops[pos_m];
+	op_m->t = CAPVM_OP_MATCH;
+
+	/* TODO: populate info about max threads, etc. in p,
+	 * because it should be possible to calculate runtime
+	 * memory limits at compile time. */
+	env->program = p;
+	p->capture_count = (env->max_capture_seen == NO_CAPTURE_ID
+	    ? 0 : env->max_capture_seen + 1);
+
+	if (LOG_CAPVM > 2) {
+		LOG(0, "====\n");
+		fsm_capvm_program_dump(stderr, p);
+		LOG(0, "====\n");
+	}
+
+	/* TODO: it may be worth exposing these static checks as
+	 * something the caller can run at load-time */
+	check_program_for_invalid_labels(p);
+
+	return RE_CAPVM_COMPILE_AST_OK;
+
+cleanup:
+	fsm_capvm_program_free(env->alloc, p);
+	return RE_CAPVM_COMPILE_AST_ERROR_ALLOC;
+}
+
+#define DUMP_AST 0
+#define DUMP_RESULT 0		/* should be 0 in production */
+
+#if DUMP_AST || DUMP_RESULT
+#include <fsm/options.h>
+#include "print.h"
+static struct fsm_options opt = { .group_edges = 1 };
+
+static unsigned
+get_max_capture_id(const struct capvm_program *program)
+{
+	assert(program != NULL);
+	return (program->capture_count == 0
+	    ? 0
+	    : program->capture_base + program->capture_count - 1);
+}
+
+#endif
+
+enum re_capvm_compile_ast_res
+re_capvm_compile_ast(const struct fsm_alloc *alloc,
+	const struct ast *ast,
+	enum re_flags re_flags,
+	struct capvm_program **program)
+{
+#if DUMP_AST
+	if (LOG_CAPVM > 2) {
+		ast_print_dot(stderr, &opt, re_flags, ast);
+		ast_print_tree(stderr, &opt, re_flags, ast);
+	}
+#endif
+
+	struct capvm_compile_env env = {
+		.alloc = alloc,
+		.re_flags = re_flags,
+		.max_capture_seen = NO_CAPTURE_ID,
+	};
+
+	enum re_capvm_compile_ast_res res;
+	res = capvm_compile(&env, ast);
+
+
+	struct repeated_group_info *rgi = env.repeated_groups;
+	while (rgi != NULL) {
+		struct repeated_group_info *prev = rgi->prev;
+		LOG(3 - LOG_REPETITION_CASES,
+		    "%s: rgi cleanup, freeing %p, prev %p\n",
+		    __func__, (void *)rgi, (void *)prev);
+
+		if (rgi->groups != NULL) {
+			f_free(alloc, rgi->groups);
+		}
+		f_free(alloc, rgi);
+		rgi = prev;
+	}
+
+	if (res == RE_CAPVM_COMPILE_AST_OK) {
+#if DUMP_RESULT > 0
+		if (DUMP_RESULT > 1 || getenv("DUMP")) {
+			ast_print_tree(stderr, &opt, re_flags, ast);
+			fsm_capvm_program_dump(stderr, env.program);
+			fprintf(stderr, "%s: max_capture_id %u\n", __func__,
+			    get_max_capture_id(env.program));
+
+		}
+#endif
+
+		*program = env.program;
+	}
+
+	free(env.charclass_htab.buckets);
+
+	return res;
+}
diff --git a/src/libre/re_capvm_compile.h b/src/libre/re_capvm_compile.h
new file mode 100644
index 000000000..b98ac9196
--- /dev/null
+++ b/src/libre/re_capvm_compile.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2022 Scott Vokes
+ *
+ * See LICENCE for the full copyright terms.
+ */
+
+#ifndef RE_CAPVM_COMPILE_H
+#define RE_CAPVM_COMPILE_H
+
+/* The part of the capture VM interface that belongs in
+ * libre rather than libfsm, mostly related to compiling
+ * a libre AST into a capvm_program. */
+
+#include <stdio.h>
+
+#include "ast.h"
+#include <fsm/alloc.h>
+
+struct capvm_program;
+
+enum re_capvm_compile_ast_res {
+	RE_CAPVM_COMPILE_AST_OK,
+	RE_CAPVM_COMPILE_AST_ERROR_ALLOC = -1,
+};
+
+enum re_capvm_compile_ast_res
+re_capvm_compile_ast(const struct fsm_alloc *alloc,
+	const struct ast *ast,
+	enum re_flags re_flags,
+	struct capvm_program **program);
+
+#endif
diff --git a/src/libre/strerror.c b/src/libre/strerror.c
index 009d61df2..d66e750a4 100644
--- a/src/libre/strerror.c
+++ b/src/libre/strerror.c
@@ -20,6 +20,8 @@ re_strerror(enum re_errno e)
 	case RE_EERRNO:       return strerror(errno);
 	case RE_EBADDIALECT:  return "Bad dialect";
 	case RE_EBADGROUP:    return "Bad group";
+	case RE_EUNSUPCAPTUR: return "Cannot support captures in this case";
+	case RE_EUNSUPPPCRE:  return "Unsupported PCRE edge case";
 
 	case RE_ENEGRANGE:    return "Negative group range";
 	case RE_ENEGCOUNT:    return "Negative count range";
diff --git a/src/lx/parser.act b/src/lx/parser.act
index 78254e123..66d0591f0 100644
--- a/src/lx/parser.act
+++ b/src/lx/parser.act
@@ -388,7 +388,7 @@
 		assert(@a != NULL);
 		assert(@a->p != NULL);
 
-		@r = re_comp(RE_NATIVE, act_agetc, @a, act_state->opt, @f, &err);
+		@r = re_comp(RE_NATIVE, act_agetc, @a, act_state->opt, @f | RE_NOCAPTURE, &err);
 		if (@r == NULL) {
 			assert(err.e != RE_EBADDIALECT);
 			/* TODO: pass filename for .lx source */
diff --git a/src/re/main.c b/src/re/main.c
index 77f147518..6e44d675a 100644
--- a/src/re/main.c
+++ b/src/re/main.c
@@ -18,6 +18,7 @@
 
 #include <fsm/fsm.h>
 #include <fsm/bool.h>
+#include <fsm/capture.h>
 #include <fsm/pred.h>
 #include <fsm/print.h>
 #include <fsm/options.h>
@@ -48,6 +49,10 @@
  * TODO: flags; -r for RE_REVERSE, etc
  */
 
+static int
+exec_with_captures(struct fsm *fsm,
+	int (*fsm_getc)(void *opaque), void *opaque, fsm_state_t *end);
+
 struct match {
 	fsm_end_id_t i;
 	const char *s;
@@ -613,6 +618,10 @@ parse_flags(const char *arg, enum re_flags *flags)
 			*flags = *flags | RE_EXTENDED;
 			break;
 
+		case 'C':
+			*flags = *flags | RE_NOCAPTURE;
+			break;
+
 		/* others? */
 
 		default:
@@ -642,6 +651,7 @@ main(int argc, char *argv[])
 	int patterns;
 	int ambig;
 	int makevm;
+	int resolve_captures;
 	size_t generate_bounds = 0;
 
 	struct fsm_dfavm *vm;
@@ -665,6 +675,7 @@ main(int argc, char *argv[])
 	patterns  = 0;
 	ambig     = 0;
 	makevm    = 0;
+	resolve_captures = 0;
 	print_fsm = NULL;
 	print_ast = NULL;
 	query     = NULL;
@@ -675,7 +686,7 @@ main(int argc, char *argv[])
 	{
 		int c;
 
-		while (c = getopt(argc, argv, "h" "acCwXe:E:G:k:" "bi" "sq:r:l:F:" "upMmnftxyz"), c != -1) {
+		while (c = getopt(argc, argv, "h" "acCwXe:E:G:k:" "bi" "sq:r:l:F:" "upMmnRftxyz"), c != -1) {
 			switch (c) {
 			case 'a': opt.anonymous_states  = 0;          break;
 			case 'c': opt.consolidate_edges = 0;          break;
@@ -714,6 +725,7 @@ main(int argc, char *argv[])
 			case 't': isliteral = 1; break;
 			case 'z': patterns  = 1; break;
 			case 'M': makevm    = 1; break;
+			case 'R': resolve_captures = 1; break;
 
 			case 'G':
 				generate_bounds = strtoul(optarg, NULL, 10);
@@ -1242,8 +1254,10 @@ main(int argc, char *argv[])
 
 					if (vm != NULL) {
 						e = fsm_vm_match_file(vm, f);
+					} else if (resolve_captures) {
+						assert(!"todo");
 					} else {
-						e = fsm_exec(fsm, fsm_fgetc, f, &state, NULL);
+						e = fsm_exec(fsm, fsm_fgetc, f, &state);
 					}
 
 					fclose(f);
@@ -1254,8 +1268,10 @@ main(int argc, char *argv[])
 
 					if (vm != NULL) {
 						e = fsm_vm_match_buffer(vm, s, strlen(s));
+					} else if (resolve_captures) {
+						e = exec_with_captures(fsm, fsm_sgetc, &s, &state);
 					} else {
-						e = fsm_exec(fsm, fsm_sgetc, &s, &state, NULL);
+						e = fsm_exec(fsm, fsm_sgetc, &s, &state);
 					}
 				}
 
@@ -1288,3 +1304,56 @@ main(int argc, char *argv[])
 		return r;
 	}
 }
+
+static int
+exec_with_captures(struct fsm *fsm,
+	int (*fsm_getc)(void *opaque), void *opaque, fsm_state_t *end)
+{
+	int c;
+	size_t ceil = 16;
+	size_t used = 0;
+	unsigned char *buf = malloc(ceil);
+	size_t i;
+	size_t capture_ceil;
+	struct fsm_capture *captures;
+	int res;
+
+	while (c = fsm_getc(opaque), c != EOF) {
+		if (used == ceil - 1) {
+			const size_t nceil = 2*ceil;
+			unsigned char *nbuf = realloc(buf, nceil);
+			if (nbuf == NULL) {
+				free(buf);
+				return -1;
+			}
+			ceil = nceil;
+			buf = nbuf;
+		}
+		buf[used] = c;
+		used++;
+	}
+	buf[used] = '\0';
+
+	capture_ceil = fsm_capture_ceiling(fsm);
+
+	captures = malloc(capture_ceil * sizeof(captures[0]));
+	if (captures == NULL) {
+		free(buf);
+		return -1;
+	}
+
+	res = fsm_exec_with_captures(fsm, buf, used,
+	    end, captures, capture_ceil);
+	if (res == 1) {
+		for (i = 0; i < capture_ceil; i++) {
+			printf("-- %zu: %zd,%zd\n",
+			    i, captures[i].pos[0], captures[i].pos[1]);
+		}
+	} else {
+		printf("-- no match\n");
+	}
+
+	free(buf);
+	free(captures);
+	return res;
+}
diff --git a/src/retest/main.c b/src/retest/main.c
index b6b4c52f7..e01c93e7c 100644
--- a/src/retest/main.c
+++ b/src/retest/main.c
@@ -393,7 +393,7 @@ parse_escapes(char *s, char **errpos, int *lenp)
 
 					ndig++;
 				} else {
-					s[j++] = ccode;
+					s[j++] = (char)ccode;
 					st = ST_BARE;
 
 					if (!hexcurly) {
diff --git a/tests/aho_corasick/Makefile b/tests/aho_corasick/Makefile
index 5748ddd5c..6eef421bf 100644
--- a/tests/aho_corasick/Makefile
+++ b/tests/aho_corasick/Makefile
@@ -33,7 +33,7 @@ ${TEST_OUTDIR.tests/aho_corasick}/out${n}a.re: ${TEST_SRCDIR.tests/aho_corasick}
 	< ${.ALLSRC:M*.txt} > $@
 
 ${TEST_OUTDIR.tests/aho_corasick}/out${n}a.fsm: ${RE} ${TEST_OUTDIR.tests/aho_corasick}/out${n}a.re
-	${RE} -r native -py ${.ALLSRC:M*.re} \
+	${RE} -FC -r native -py ${.ALLSRC:M*.re} \
 	> $@
 
 ${TEST_OUTDIR.tests/aho_corasick}/got${n}a.fsm: ${AC_TEST} ${TEST_SRCDIR.tests/aho_corasick}/in${n}.txt
@@ -48,7 +48,7 @@ ${TEST_OUTDIR.tests/aho_corasick}/out${n}l.re: ${TEST_SRCDIR.tests/aho_corasick}
 	< ${.ALLSRC:M*.txt} > $@
 
 ${TEST_OUTDIR.tests/aho_corasick}/out${n}l.fsm: ${RE} ${TEST_OUTDIR.tests/aho_corasick}/out${n}l.re
-	${RE} -r native -py ${.ALLSRC:M*.re} \
+	${RE} -FC -r native -py ${.ALLSRC:M*.re} \
 	> $@
 
 ${TEST_OUTDIR.tests/aho_corasick}/got${n}l.fsm: ${AC_TEST} ${TEST_SRCDIR.tests/aho_corasick}/in${n}.txt
@@ -63,7 +63,7 @@ ${TEST_OUTDIR.tests/aho_corasick}/out${n}r.re: ${TEST_SRCDIR.tests/aho_corasick}
 	< ${.ALLSRC:M*.txt} > $@
 
 ${TEST_OUTDIR.tests/aho_corasick}/out${n}r.fsm: ${RE} ${TEST_OUTDIR.tests/aho_corasick}/out${n}r.re
-	${RE} -r native -py ${.ALLSRC:M*.re} \
+	${RE} -FC -r native -py ${.ALLSRC:M*.re} \
 	> $@
 
 ${TEST_OUTDIR.tests/aho_corasick}/got${n}r.fsm: ${AC_TEST} ${TEST_SRCDIR.tests/aho_corasick}/in${n}.txt
@@ -79,7 +79,7 @@ ${TEST_OUTDIR.tests/aho_corasick}/out${n}u.re: ${TEST_SRCDIR.tests/aho_corasick}
 	< ${.ALLSRC:M*.txt} > $@
 
 ${TEST_OUTDIR.tests/aho_corasick}/out${n}u.fsm: ${RE} ${TEST_OUTDIR.tests/aho_corasick}/out${n}u.re
-	${RE} -r native -py ${.ALLSRC:M*.re} \
+	${RE} -FC -r native -py ${.ALLSRC:M*.re} \
 	> $@
 
 ${TEST_OUTDIR.tests/aho_corasick}/got${n}u.fsm: ${AC_TEST} ${TEST_SRCDIR.tests/aho_corasick}/in${n}.txt
diff --git a/tests/capture/Makefile b/tests/capture/Makefile
index 53d63ff2b..bdee42df4 100644
--- a/tests/capture/Makefile
+++ b/tests/capture/Makefile
@@ -7,18 +7,20 @@ TEST_OUTDIR.tests/capture = ${BUILD}/tests/capture
 .for n in ${TEST.tests/capture:T:R:C/^capture//}
 test:: ${TEST_OUTDIR.tests/capture}/res${n}
 SRC += ${TEST_SRCDIR.tests/capture}/capture${n}.c
-CFLAGS.${TEST_SRCDIR.tests/capture}/capture${n}.c = -UNDEBUG
+CFLAGS.${TEST_SRCDIR.tests/capture}/capture${n}.c = -UNDEBUG -std=c99
 
 ${TEST_OUTDIR.tests/capture}/run${n}: ${TEST_OUTDIR.tests/capture}/capture${n}.o ${TEST_OUTDIR.tests/capture}/captest.o
-	${CC} ${CFLAGS} -o ${TEST_OUTDIR.tests/capture}/run${n} ${TEST_OUTDIR.tests/capture}/capture${n}.o ${TEST_OUTDIR.tests/capture}/captest.o ${BUILD}/lib/libfsm.a
+	${CC} ${CFLAGS} -o ${TEST_OUTDIR.tests/capture}/run${n} ${TEST_OUTDIR.tests/capture}/capture${n}.o ${TEST_OUTDIR.tests/capture}/captest.o ${BUILD}/lib/libfsm.a ${BUILD}/lib/libre.a
 
 ${TEST_OUTDIR.tests/capture}/res${n}: ${TEST_OUTDIR.tests/capture}/run${n}
 	( ${TEST_OUTDIR.tests/capture}/run${n} 1>&2 && echo PASS || echo FAIL ) > ${TEST_OUTDIR.tests/capture}/res${n}
 
-.for lib in ${LIB:Mlibfsm}
+.for lib in ${LIB:Mlibfsm} ${LIB:Mlibre}
 ${TEST_OUTDIR.tests/capture}/run${n}: ${BUILD}/lib/${lib:R}.a
 .endfor
 .endfor
 
+CFLAGS.${TEST_SRCDIR.tests/capture}/captest.c += -std=c99
+
 ${TEST_OUTDIR.tests/capture}/captest.o: tests/capture/captest.c
 	${CC} ${CFLAGS} -c -o ${TEST_OUTDIR.tests/capture}/captest.o tests/capture/captest.c
diff --git a/tests/capture/captest.c b/tests/capture/captest.c
index dc66f81d4..bf453e3ad 100644
--- a/tests/capture/captest.c
+++ b/tests/capture/captest.c
@@ -1,19 +1,19 @@
 #include "captest.h"
 
-#include <fsm/alloc.h>
-#include <fsm/capture.h>
-#include <fsm/pred.h>
-
-#if CAPTEST_RUN_SINGLE_LOG
 #include <fsm/print.h>
-#endif
+#include <fsm/bool.h>
+
+#include <errno.h>
+
+/* for fsm_capvm_program_exec */
+#include "../../src/libfsm/capture_vm.h"
 
-#define FAIL(MSG)					\
-	fprintf(stderr, "FAIL: %s:%d -- %s\n",	\
-	    __FILE__, __LINE__, MSG);		\
-	exit(EXIT_FAILURE)
+struct captest_input {
+	const char *string;
+	size_t pos;
+};
 
-int
+static int
 captest_getc(void *opaque)
 {
 	struct captest_input *input = opaque;
@@ -22,189 +22,483 @@ captest_getc(void *opaque)
 	return res == 0 ? EOF : res;
 }
 
-int
-captest_run_single(const struct captest_single_fsm_test_info *info)
-{
-	size_t i;
-	struct captest_input input;
-	fsm_state_t end;
-	int exec_res;
-	struct fsm_capture got_captures[MAX_TEST_CAPTURES];
-	struct fsm_capture exp_captures[MAX_TEST_CAPTURES];
-	size_t capture_count = 0;
-	struct fsm *fsm = captest_fsm_of_string(info->string, 0);
+static struct fsm_options options = {
+	.group_edges = 1,
+};
 
-	input.string = info->string;
-	input.pos = 0;
+#define MAX_INPUT_WITH_NL_LENGTH 1000
+static char
+input_with_nl[MAX_INPUT_WITH_NL_LENGTH];
 
-	if (fsm == NULL) {
-		FAIL("fsm_of_string");
+enum captest_run_case_res
+captest_run_case(const struct captest_case_single *testcase,
+    int verbosity, bool trailing_newline, FILE *prog_output)
+{
+	bool dump_captures = false;
+	enum captest_run_case_res res = CAPTEST_RUN_CASE_ERROR;
+	struct re_err err;
+
+	if (verbosity == DUMP_PROGRAMS_VERBOSITY) {
+		assert(prog_output != NULL);
+	} else {
+		assert(prog_output == NULL);
 	}
 
-	for (i = 0; i < MAX_TEST_CAPTURES; i++) {
-		exp_captures[i].pos[0] = FSM_CAPTURE_NO_POS;
-		exp_captures[i].pos[1] = FSM_CAPTURE_NO_POS;
+	if (verbosity > 0) {
+		printf("/%s/ <- \"%s%s\": ",
+		    testcase->regex, testcase->input,
+		    trailing_newline ? "\\n" : "");
 	}
 
-	for (i = 0; i < MAX_SINGLE_FSM_TEST_PATHS; i++) {
-		const struct captest_single_fsm_test_path *path =
-		    &info->paths[i];
-		if (path->start == 0 && path->end == 0 && i > 0) {
-			break;	/* end of list */
-		}
+	/* build regex */
+	const enum re_flags flags = 0;
+	struct captest_input comp_input = {
+		.string = testcase->regex,
+	};
 
-		/* no zero-width captures */
-		assert(path->end > path->start);
+	struct fsm *fsm = re_comp(RE_PCRE,
+		    captest_getc, &comp_input,
+		    &options, flags, &err);
 
-		if (!fsm_capture_set_path(fsm, i,
-			path->start, path->end)) {
-			fprintf(stderr,
-			    "failed to set capture path %lu\n", i);
-			FAIL("fsm_capture_set_path");
+	if (testcase->match == SHOULD_REJECT_AS_UNSUPPORTED) {
+		if (fsm != NULL) {
+			fsm_free(fsm);
+			return CAPTEST_RUN_CASE_FAIL;
 		}
+		return CAPTEST_RUN_CASE_PASS;
+	}
 
-		exp_captures[i].pos[0] = path->start;
-		exp_captures[i].pos[1] = path->end;
+	assert(fsm != NULL);
 
-		capture_count = i + 1;
+	if (!fsm_determinise(fsm)) {
+		return CAPTEST_RUN_CASE_ERROR;
 	}
 
-	{
-		const unsigned count = fsm_countcaptures(fsm);
-		const unsigned expected = capture_count;
-		if (count != expected) {
-			fprintf(stderr, "expected %u, got %u\n",
-			    expected, count);
-			FAIL("countcaptures");
-		}
+	if (!fsm_minimise(fsm)) {
+		return CAPTEST_RUN_CASE_ERROR;
 	}
 
-#if CAPTEST_RUN_SINGLE_LOG
-	fsm_print_fsm(stderr, fsm);
-	fsm_capture_dump(stderr, "fsm", fsm);
-#endif
+	if (verbosity > 3) {
+		fsm_print_fsm(stdout, fsm);
+	}
 
-	exec_res = fsm_exec(fsm, captest_getc, &input, &end, got_captures);
-	if (exec_res != 1) { FAIL("exec_res"); }
-	if (end != strlen(info->string)) { FAIL("exec end pos"); }
+	if (trailing_newline) {
+		const size_t length = strlen(testcase->input);
+		assert(length + 1 < MAX_INPUT_WITH_NL_LENGTH);
+		memcpy(input_with_nl, testcase->input,
+		    length);
+		input_with_nl[length] = '\n';
+		input_with_nl[length + 1] = '\0';
+	}
 
-	{
-		fsm_end_id_t id_buf[1] = { ~0 };
-		enum fsm_getendids_res gres;
-		size_t written;
-		if (1 != fsm_getendidcount(fsm, end)) {
-			FAIL("did not have exactly one end ID");
+	const char *input = trailing_newline
+	    ? input_with_nl
+	    : testcase->input;
+	assert(input != NULL);
+	const size_t length = strlen(input);
+
+	fsm_state_t end;	/* unused but required by API */
+	struct fsm_capture capture_buf[MAX_CAPTEST_SINGLE_CAPTURE_PAIRS];
+	const size_t capture_buf_length = MAX_CAPTEST_SINGLE_CAPTURE_PAIRS;
+
+	/* Initialize with values that are distinct from FSM_CAPTURE_NO_POS
+	 * and will stand out visually. Should never see these. */
+	for (size_t i = 0; i < MAX_CAPTEST_SINGLE_CAPTURE_PAIRS; i++) {
+		capture_buf[i].pos[0] = 88888888;
+		capture_buf[i].pos[1] = 99999999;
+	}
+
+	/* If verbosity is exactly DUMP_PROGRAMS_VERBOSITY, then print out capture info and pass. */
+	if (verbosity == DUMP_PROGRAMS_VERBOSITY) {
+		assert(prog_output != NULL);
+		if (!trailing_newline) {
+			const char *match_str = testcase->match == SHOULD_MATCH ? "SHOULD_MATCH"
+			    : testcase->match == SHOULD_NOT_MATCH ? "SHOULD_NOT_MATCH"
+			    : testcase->match == SHOULD_REJECT_AS_UNSUPPORTED ? "SHOULD_REJECT_AS_UNSUPPORTED"
+			    : "ERROR";
+			fprintf(prog_output, "regex \"%s\", input \"%s\", match %s, no_nl %d, count %zu:",
+			    testcase->regex, testcase->input, match_str, testcase->no_nl,
+			    testcase->count);
+			for (size_t i = 0; i < testcase->count; i++) {
+				fprintf(prog_output, " %zu:[%zd, %zd]",
+				    i, testcase->expected[i].pos[0], testcase->expected[i].pos[1]);
+			}
+			fprintf(prog_output, "\n");
+			fsm_capture_dump(prog_output, "capture_info", fsm);
 		}
+		fsm_free(fsm);
+		return CAPTEST_RUN_CASE_PASS;
+	}
+
+	/* first, execute with a capture buffer that is one cell too small and check for an error */
+	const size_t capture_ceil = fsm_capture_ceiling(fsm);
+	assert(capture_ceil > 0);
+	const size_t insufficient_capture_buf_length = capture_ceil - 1;
+	errno = 0;
+	int exec_res = fsm_exec_with_captures(fsm,
+	    (const unsigned char *)input, length, &end, capture_buf, insufficient_capture_buf_length);
+	assert(exec_res == -1);
+	assert(errno == EINVAL);
+	errno = 0;
+
+	/* then, execute and check result & captures */
+	exec_res = fsm_exec_with_captures(fsm,
+	    (const unsigned char *)input, length, &end, capture_buf, capture_buf_length);
+	if (exec_res == -1) {
+		perror("fsm_exec_with_captures");
+		return CAPTEST_RUN_CASE_ERROR;
+	}
 
-		gres = fsm_getendids(fsm, end, 1, id_buf, &written);
-		if (gres != FSM_GETENDIDS_FOUND) {
-			FAIL("failed to get end IDs");
+	if (testcase->match == SHOULD_NOT_MATCH) { /* expect match failure */
+		res = (exec_res == 0
+		    ? CAPTEST_RUN_CASE_PASS
+		    : CAPTEST_RUN_CASE_FAIL);
+	} else if (exec_res == 0) {
+		res = CAPTEST_RUN_CASE_FAIL; /* didn't match, should have */
+	} else {
+		res = CAPTEST_RUN_CASE_PASS;
+		if (verbosity > 1) {
+			dump_captures = true;
 		}
 
-		if (0 != id_buf[0]) {
-			FAIL("failed to get end ID of 0");
+		/* check captures against expected */
+		for (size_t i = 0; i < testcase->count; i++) {
+			if (testcase->expected[i].pos[0] != capture_buf[i].pos[0] ||
+			    testcase->expected[i].pos[1] != capture_buf[i].pos[1]) {
+				res = CAPTEST_RUN_CASE_FAIL;
+				dump_captures = true;
+			}
 		}
 	}
 
-	for (i = 0; i < capture_count; i++) {
-#if CAPTEST_RUN_SINGLE_LOG
-		fprintf(stderr, "captest: capture %lu: exp (%ld, %ld), got (%ld, %ld)\n",
-		    i, exp_captures[i].pos[0], exp_captures[i].pos[1],
-		    got_captures[i].pos[0], got_captures[i].pos[1]);
-#endif
-		if (got_captures[i].pos[0] != exp_captures[i].pos[0]) {
-			fprintf(stderr, "capture[%lu].pos[0]: exp %lu, got %lu\n",
-			    i, exp_captures[i].pos[0],
-			    got_captures[i].pos[0]);
-			FAIL("capture mismatch");
+	switch (res) {
+	case CAPTEST_RUN_CASE_PASS:
+		if (verbosity > 0) {
+			printf("pass\n");
 		}
-		if (got_captures[i].pos[1] != exp_captures[i].pos[1]) {
-			fprintf(stderr, "capture[%lu].pos[1]: exp %lu, got %lu\n",
-			    i, exp_captures[i].pos[1],
-			    got_captures[i].pos[1]);
-			FAIL("capture mismatch");
+		break;
+	case CAPTEST_RUN_CASE_FAIL:
+		if (verbosity == 0) {
+			printf("/%s/ <- \"%s%s\": FAIL\n",
+			    testcase->regex, testcase->input,
+			    trailing_newline ? "\\n" : "");
+		}
+		if (verbosity > 0) {
+			printf("FAIL\n");
+		}
+		break;
+	case CAPTEST_RUN_CASE_ERROR:
+		printf("ERROR\n");
+		break;
+	}
+
+	if (dump_captures) {
+		for (size_t i = 0; i < testcase->count; i++) {
+			printf("exp %zd, %zd, got %zd, %zd%s\n",
+			    testcase->expected[i].pos[0], testcase->expected[i].pos[1],
+			    capture_buf[i].pos[0], capture_buf[i].pos[1],
+			    (testcase->expected[i].pos[0] != capture_buf[i].pos[0] ||
+				testcase->expected[i].pos[1] != capture_buf[i].pos[1])
+			    ? " *" : "");
 		}
 	}
 
 	fsm_free(fsm);
 
-	return 0;
+	return res;
 }
 
-struct fsm *
-captest_fsm_of_string(const char *string, unsigned end_id)
+enum captest_run_case_res
+captest_run_case_multi(const struct captest_case_multi *testcase,
+    int verbosity, bool trailing_newline, FILE *prog_output,
+    struct captest_case_multi_result *result)
 {
-	struct fsm *fsm = captest_fsm_with_options();
-	const size_t length = strlen(string);
-	size_t i;
+	struct re_err err;
+	const enum re_flags flags = 0;
 
-	if (fsm == NULL) {
-		return NULL;
+	struct captest_case_multi_result ignored_result;
+	if (result == NULL) {
+		result = &ignored_result;
 	}
+	memset(result, 0x00, sizeof(*result));
 
-	if (!fsm_addstate_bulk(fsm, length + 1)) {
-		goto cleanup;
+	if (verbosity == DUMP_PROGRAMS_VERBOSITY) {
+		assert(prog_output != NULL);
+	} else {
+		assert(prog_output == NULL);
 	}
-	fsm_setstart(fsm, 0);
 
-	for (i = 0; i < length; i++) {
-		if (!fsm_addedge_literal(fsm, i, i + 1, string[i])) {
+	/* build each regex, combining them and keeping track of capture offsets */
+	struct fsm *fsms[testcase->regex_count];
+	struct fsm_combined_base_pair bases[testcase->regex_count];
+	struct fsm *combined_fsm = NULL;
+
+	for (size_t i = 0; i < testcase->regex_count; i++) {
+		fsms[i] = NULL;
+	}
+
+	/* compile each individually */
+	for (size_t i = 0; i < testcase->regex_count; i++) {
+		struct captest_input comp_input = {
+			.string = testcase->regexes[i],
+		};
+
+		if (verbosity > 1) {
+			fprintf(stderr, "%s: compiling \"%s\"\n",
+			    __func__, comp_input.string);
+		}
+
+		struct fsm *fsm = re_comp(RE_PCRE,
+		    captest_getc, &comp_input,
+		    &options, flags, &err);
+		assert(fsm != NULL);
+
+		if (!fsm_determinise(fsm)) {
 			goto cleanup;
 		}
+
+		if (!fsm_minimise(fsm)) {
+			goto cleanup;
+		}
+
+		if (verbosity > 3) {
+			char tag_buf[16] = { 0 };
+			snprintf(tag_buf, sizeof(tag_buf), "fsm[%zu]", i);
+
+			fprintf(stderr, "==== fsm[%zu]\n", i);
+			fsm_print_fsm(stderr, fsm);
+			fsm_capture_dump(stderr, tag_buf, fsm);
+		}
+
+		fsms[i] = fsm;
+	}
+
+	combined_fsm = fsm_union_array(testcase->regex_count, fsms, bases);
+	assert(combined_fsm != NULL);
+	if (verbosity > 0) {
+		fprintf(stderr, "%s: combined_fsm: %d states after fsm_union_array\n",
+		    __func__, fsm_countstates(combined_fsm));
+	}
+	if (verbosity > 1) {
+		for (size_t i = 0; i < testcase->regex_count; i++) {
+			fprintf(stderr, "%s: base[%zu]: state %d, capture %u\n",
+			    __func__, i, bases[i].state, bases[i].capture);
+		}
 	}
-	fsm_setend(fsm, length, 1);
 
-	if (!fsm_setendid(fsm, end_id)) {
+	if (!fsm_determinise(combined_fsm)) {
 		goto cleanup;
 	}
+	if (verbosity > 0) {
+		fprintf(stderr, "%s: combined_fsm: %d states after determinise\n",
+		    __func__, fsm_countstates(combined_fsm));
+	}
+
+	if (!fsm_minimise(combined_fsm)) {
+		goto cleanup;
+	}
+	if (verbosity > 0) {
+		fprintf(stderr, "%s: combined_fsm: %d states after minimise\n",
+		    __func__, fsm_countstates(combined_fsm));
+	}
+
+	/* If verbosity is exactly 9, then print out capture info and pass. */
+	if (verbosity == DUMP_PROGRAMS_VERBOSITY) {
+		fsm_capture_dump(prog_output, "capture_info", combined_fsm);
+		fsm_free(combined_fsm);
+		return CAPTEST_RUN_CASE_PASS;
+	}
+
+	if (verbosity > 3) {
+		fprintf(stderr, "==== combined\n");
+		fsm_print_fsm(stderr, combined_fsm);
+		fsm_capture_dump(stderr, "combined", combined_fsm);
+	}
+
+	/* for each input, execute and check result */
+	const struct multi_case_input_info *info;
+	for (info = &testcase->inputs[0]; info->input != NULL; info++) {
+		if (trailing_newline) {
+			const size_t length = strlen(info->input);
+			assert(length + 1 < MAX_INPUT_WITH_NL_LENGTH);
+			memcpy(input_with_nl, info->input,
+			    length);
+			input_with_nl[length] = '\n';
+			input_with_nl[length + 1] = '\0';
+		}
+
+		const char *input = trailing_newline
+		    ? input_with_nl
+		    : info->input;
+		assert(input != NULL);
+		const size_t length = strlen(input);
+
+		if (verbosity > 1) {
+			fprintf(stderr, "%s: input: %s\n", __func__, input);
+		}
+
+		fsm_state_t end;	/* unused but required by API */
+		struct fsm_capture capture_buf[MAX_CAPTEST_MULTI_CAPTURE_PAIRS];
+		const size_t capture_buf_length = MAX_CAPTEST_MULTI_CAPTURE_PAIRS;
+		for (size_t i = 0; i < capture_buf_length; i++) {
+			capture_buf[i].pos[0] = (size_t)-2;
+			capture_buf[i].pos[1] = (size_t)-3;
+		}
+
+		/* execute and check result & captures */
+		int exec_res = fsm_exec_with_captures(combined_fsm,
+		    (const unsigned char *)input, length, &end, capture_buf, capture_buf_length);
+		if (exec_res == -1) {
+			perror("fsm_exec_with_captures");
+			return CAPTEST_RUN_CASE_ERROR;
+		}
+
+		/* The .regex field should be in ascending order so we know
+		 * when we've reached the all-0 suffix of expected[]. */
+		uint8_t prev_regex = 0;
+		for (const struct case_multi_expected *exp = &info->expected[0];
+		     exp->regex >= prev_regex; exp++) {
+			prev_regex = exp->regex;
+			bool match = true;
+			const unsigned capture_base = bases[exp->regex].capture;
+			const unsigned capture_id = capture_base + exp->capture;
+			assert(capture_id < MAX_CAPTEST_MULTI_CAPTURE_PAIRS);
+			const size_t exp_s = exp->pos[0];
+			const size_t exp_e = exp->pos[1];
+			const size_t got_s = capture_buf[capture_id].pos[0];
+			const size_t got_e = capture_buf[capture_id].pos[1];
+			if (exp_s == got_s && exp_e == got_e) {
+				result->pass++;
+			} else {
+				match = false;
+				result->fail++;
+			}
+
+			if (!match || verbosity > 2) {
+				fprintf(stderr, "%s: regex %u, capture %u (%u + base %u), exp (%zd, %zd), got (%zd, %zd)%s\n",
+				    __func__, exp->regex,
+				    capture_id, exp->capture, capture_base,
+				    exp_s, exp_e, got_s, got_e,
+				    match ? "" : " *** mismatch ***");
+			}
+		}
+	}
+
+	fsm_free(combined_fsm);
 
-	return fsm;
+	/* this could populate a result struct so it can pass/fail multiple inputs */
+
+	return result->fail == 0
+	    ? CAPTEST_RUN_CASE_PASS
+	    : CAPTEST_RUN_CASE_FAIL;
 
 cleanup:
-	fsm_free(fsm);
-	return NULL;
-}
+	if (combined_fsm != NULL) {
+		fsm_free(combined_fsm);
+	} else {
+		for (size_t i = 0; i < testcase->regex_count; i++) {
+			if (fsms[i] != NULL) {
+				fsm_free(fsms[i]);
+			}
+		}
+	}
 
-static struct fsm_options options;
+	return CAPTEST_RUN_CASE_ERROR;
+}
 
-struct fsm *
-captest_fsm_with_options(void)
+static struct capvm_program *
+get_program_copy(const struct captest_case_program *testcase)
 {
-	struct fsm *fsm = NULL;
+	static struct capvm_program prog;
+	static struct capvm_opcode ops[MAX_PROGRAM_OPS + 1] = { 0 };
+	static struct capvm_char_class cc_sets[MAX_PROGRAM_CHAR_CLASSES] = { 0 };
+
+	memset(&prog, 0x00, sizeof(prog));
+
+	memcpy(ops, testcase->ops,
+	    MAX_PROGRAM_OPS * sizeof(testcase->ops[0]));
+	memcpy(cc_sets, testcase->char_class,
+	    MAX_PROGRAM_CHAR_CLASSES * sizeof(testcase->char_class[0]));
+
+	assert(testcase->expected.count < MAX_CAPTEST_PROGRAM_CAPTURE_PAIRS);
+	prog.capture_count = testcase->expected.count;
+	prog.capture_base = testcase->expected.base;
+
+	uint32_t max_cc_used = (uint32_t)-1;
+
+	prog.used = MAX_PROGRAM_OPS;
+	for (size_t i = 0; i < MAX_PROGRAM_OPS; i++) {
+		const struct capvm_opcode *op = &testcase->ops[i];
+		if (op->t == CAPVM_OP_CHAR && op->u.chr == 0x00) {
+			prog.used = i;
+			break;
+		} else if (op->t == CAPVM_OP_CHARCLASS) {
+			if (max_cc_used == (uint32_t)-1 || op->u.charclass_id > max_cc_used) {
+				assert(op->u.charclass_id < MAX_PROGRAM_CHAR_CLASSES);
+				max_cc_used = op->u.charclass_id;
+			}
+		}
+	}
+
+	prog.ceil = MAX_PROGRAM_OPS;
+	prog.ops = ops;
+
+	prog.char_classes.sets = cc_sets;
+	prog.char_classes.count = max_cc_used == (uint32_t)-1 ? 0 : max_cc_used + 1;
+	prog.char_classes.ceil = MAX_PROGRAM_CHAR_CLASSES;
 
-	/* We currently don't need to set anything custom on this. */
-	fsm = fsm_new(&options);
-	return fsm;
+	return &prog;
 }
 
-int
-captest_check_single_end_id(const struct fsm *fsm, fsm_state_t end_state,
-    unsigned expected_end_id, const char **msg)
+enum captest_run_case_res
+captest_run_case_program(const struct captest_case_program *testcase,
+    int verbosity)
 {
-	fsm_end_id_t id_buf[1] = { ~0 };
-	enum fsm_getendids_res gres;
-	size_t written;
-	const char *unused;
+	(void)verbosity;
 
-	if (msg == NULL) {
-		msg = &unused;
+	/* copy program */
+	const size_t input_length = strlen(testcase->input);
+	struct fsm_capture capture_buf[MAX_CAPTEST_PROGRAM_CAPTURE_PAIRS];
+	const size_t capture_buf_length = MAX_CAPTEST_PROGRAM_CAPTURE_PAIRS;
+
+	/* Initialize with FSM_CAPTURE_NO_POS, as the caller would */
+	for (size_t i = 0; i < capture_buf_length; i++) {
+		capture_buf[i].pos[0] = FSM_CAPTURE_NO_POS;
+		capture_buf[i].pos[1] = FSM_CAPTURE_NO_POS;
 	}
 
-	if (1 != fsm_getendidcount(fsm, end_state)) {
-		*msg = "did not have exactly one end ID";
-		return 0;
+	struct capvm_program *program = get_program_copy(testcase);
+
+	if (verbosity > 2) {
+		fsm_capvm_program_dump(stderr, program);
 	}
 
-	gres = fsm_getendids(fsm, end_state, 1, id_buf, &written);
-	if (gres != FSM_GETENDIDS_FOUND) {
-		*msg = "failed to get end IDs";
-		return 0;
+	fsm_capvm_program_exec(program, (const uint8_t *)testcase->input, input_length,
+	    capture_buf, capture_buf_length);
+
+	bool dump_captures = false;
+	enum captest_run_case_res res = CAPTEST_RUN_CASE_PASS;
+
+	/* check captures against expected */
+	for (size_t i = 0; i < testcase->expected.count; i++) {
+		if (testcase->expected.captures[i].pos[0] != capture_buf[i].pos[0] ||
+		    testcase->expected.captures[i].pos[1] != capture_buf[i].pos[1]) {
+			res = CAPTEST_RUN_CASE_FAIL;
+			dump_captures = true;
+		}
 	}
 
-	if (expected_end_id != id_buf[0]) {
-		*msg = "failed to get expected end ID";
-		return 0;
+	if (dump_captures) {
+		for (size_t i = 0; i < testcase->expected.count; i++) {
+			printf("exp %zd, %zd, got %zd, %zd%s\n",
+			    testcase->expected.captures[i].pos[0],
+			    testcase->expected.captures[i].pos[1],
+			    capture_buf[i].pos[0], capture_buf[i].pos[1],
+			    (testcase->expected.captures[i].pos[0] != capture_buf[i].pos[0] ||
+				testcase->expected.captures[i].pos[1] != capture_buf[i].pos[1])
+			    ? " *" : "");
+		}
 	}
 
-	return 1;
+	return res;
 }
diff --git a/tests/capture/captest.h b/tests/capture/captest.h
index a9debff6c..53b30cec0 100644
--- a/tests/capture/captest.h
+++ b/tests/capture/captest.h
@@ -1,56 +1,123 @@
 /*
- * Copyright 2020 Scott Vokes
+ * Copyright 2022 Scott Vokes
  *
  * See LICENCE for the full copyright terms.
  */
+
 #ifndef CAPTEST_H
 #define CAPTEST_H
 
 #include <assert.h>
 #include <stdlib.h>
 #include <string.h>
+#include <stdbool.h>
+#include <stdint.h>
 #include <stdio.h>
 
 #include <fsm/fsm.h>
 #include <fsm/capture.h>
 #include <fsm/options.h>
+#include <fsm/print.h>
 
-#define MAX_SINGLE_FSM_TEST_PATHS 8
-#define MAX_TEST_CAPTURES 8
+#include <re/re.h>
 
-#define CAPTEST_RUN_SINGLE_LOG 0
+/* for captest_run_case_program, to evaluate handwritten programs */
+#include "../../src/libfsm/capture_vm_program.h"
+#include "../../src/libfsm/capture_vm.h"
 
-#ifndef LOG_INTERMEDIATE_FSMS
-#define LOG_INTERMEDIATE_FSMS 0
-#endif
+#define MAX_CAPTEST_SINGLE_CAPTURE_PAIRS 16
+#define MAX_CAPTEST_MULTI_CAPTURE_PAIRS 16
+#define MAX_CAPTEST_PROGRAM_CAPTURE_PAIRS 16
+
+/* position representing no match */
+#define POS_NONE { (size_t)-1, (size_t)-1 }
+
+/* If verbosity is set to this (with -vvvvvvvvv) then dump all the
+ * compiled programs to 'prog_output'. */
+#define DUMP_PROGRAMS_VERBOSITY 9
 
-struct captest_single_fsm_test_info {
-	const char *string;
-	struct captest_single_fsm_test_path {
-		fsm_state_t start;
-		fsm_state_t end;
-	} paths[MAX_SINGLE_FSM_TEST_PATHS];
+enum captest_match {
+	SHOULD_MATCH = 0,	/* implied, set by designated initializer */
+	SHOULD_NOT_MATCH = 1,
+	SHOULD_REJECT_AS_UNSUPPORTED = 2,
+	SHOULD_SKIP = 3,
 };
 
-struct captest_input {
-	const char *string;
-	size_t pos;
+struct captest_case_single {
+	const char *regex;
+	const char *input;
+	enum captest_match match;
+	bool no_nl;		/* do not retry with trailing newline */
+
+	size_t count;
+	struct fsm_capture expected[MAX_CAPTEST_SINGLE_CAPTURE_PAIRS];
 };
 
-int
-captest_run_single(const struct captest_single_fsm_test_info *info);
+/* Same as captest_case_single, but these expect multiple (possibly overlapping)
+ * regexes to be combined before checking the match/capture behavior. */
+#define MAX_REGEXES 4
+#define MAX_INPUTS 8
+#define MAX_CAPTEST_MULTI_EXPECTED 8
+struct captest_case_multi {
+	uint8_t regex_count;
+	const char *regexes[MAX_REGEXES];
+	enum captest_match match;
+	bool no_nl;
+
+	struct multi_case_input_info {
+		const char *input; /* first NULL input = end of list */
+		struct case_multi_expected {
+			uint8_t regex;	 /* expected: ascending order */
+			uint8_t capture; /* 0 is default */
+			size_t pos[2];
+		} expected[MAX_CAPTEST_MULTI_EXPECTED];
+	} inputs[MAX_INPUTS];
+};
 
-int
-captest_getc(void *opaque);
+struct captest_case_multi_result {
+	size_t pass;
+	size_t fail;
+};
+
+#define MAX_PROGRAM_CHAR_CLASSES 4
+#define MAX_PROGRAM_OPS 32
+struct captest_case_program {
+	const char *input;
+
+	struct capvm_char_class char_class[MAX_PROGRAM_CHAR_CLASSES];
+
+	struct {
+		uint32_t count;
+		uint32_t base;
+		struct fsm_capture captures[MAX_CAPTEST_PROGRAM_CAPTURE_PAIRS];
+	} expected;
+
+	/* termined by 0'd record, { .t == CAPVM_OP_CHAR, .u.chr = 0x00 } */
+	struct capvm_opcode ops[MAX_PROGRAM_OPS];
+};
+
+enum captest_run_case_res {
+	CAPTEST_RUN_CASE_PASS,
+	CAPTEST_RUN_CASE_FAIL,
+	CAPTEST_RUN_CASE_ERROR,
+};
+enum captest_run_case_res
+captest_run_case(const struct captest_case_single *testcase,
+    int verbosity, bool trailing_newline, FILE *prog_output);
 
-struct fsm *
-captest_fsm_with_options(void);
+enum captest_run_case_res
+captest_run_case_multi(const struct captest_case_multi *testcase,
+    int verbosity, bool trailing_newline, FILE *prog_output,
+    struct captest_case_multi_result *result);
 
-struct fsm *
-captest_fsm_of_string(const char *string, unsigned end_id);
+/* This should probably only be used for evaluating specific
+ * hand-written programs for development, because we only care
+ * about supporting the kinds of programs that could be produced
+ * by compiling from valid regexes. In other words, this is not
+ * a stable public interface. */
+enum captest_run_case_res
+captest_run_case_program(const struct captest_case_program *testcase,
+    int verbosity);
 
-int
-captest_check_single_end_id(const struct fsm *fsm, fsm_state_t end_state,
-    unsigned expected_end_id, const char **msg);
 
 #endif
diff --git a/tests/capture/capture0.c b/tests/capture/capture0.c
deleted file mode 100644
index 4e7d0e3fa..000000000
--- a/tests/capture/capture0.c
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright 2020 Scott Vokes
- *
- * See LICENCE for the full copyright terms.
- */
-
-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-
-#include <fsm/fsm.h>
-#include <fsm/capture.h>
-
-#include "captest.h"
-
-/* /a(bcd)e/ */
-
-int main(void) {
-	struct captest_single_fsm_test_info test_info = {
-		"abcde",
-		{
-			{ 1, 4 },
-		}
-	};
-	return captest_run_single(&test_info);
-}
diff --git a/tests/capture/capture1.c b/tests/capture/capture1.c
deleted file mode 100644
index e9fe53ab9..000000000
--- a/tests/capture/capture1.c
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright 2020 Scott Vokes
- *
- * See LICENCE for the full copyright terms.
- */
-
-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-
-#include <fsm/fsm.h>
-#include <fsm/capture.h>
-
-#include "captest.h"
-/* (a(b(c))) */
-
-int main(void) {
-	struct captest_single_fsm_test_info test_info = {
-		"abc",
-		{
-			{ 0, 3 },
-			{ 1, 3 },
-			{ 2, 3 },
-		}
-	};
-	return captest_run_single(&test_info);
-}
diff --git a/tests/capture/capture2.c b/tests/capture/capture2.c
deleted file mode 100644
index 20a1c1bac..000000000
--- a/tests/capture/capture2.c
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright 2020 Scott Vokes
- *
- * See LICENCE for the full copyright terms.
- */
-
-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-
-#include <fsm/fsm.h>
-#include <fsm/capture.h>
-
-#include "captest.h"
-
-/* (a(b((c))(d))) */
-
-int main(void) {
-	struct captest_single_fsm_test_info test_info = {
-		"abcd",
-		{
-			{ 0, 4 },
-			{ 1, 4 },
-			{ 2, 3 },
-			{ 2, 3 },
-			{ 3, 4 },
-		}
-	};
-	return captest_run_single(&test_info);
-}
diff --git a/tests/capture/capture3.c b/tests/capture/capture3.c
deleted file mode 100644
index 9d4d284ab..000000000
--- a/tests/capture/capture3.c
+++ /dev/null
@@ -1,221 +0,0 @@
-/*
- * Copyright 2020 Scott Vokes
- *
- * See LICENCE for the full copyright terms.
- */
-
-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-
-#include <fsm/fsm.h>
-#include <fsm/bool.h>
-#include <fsm/capture.h>
-#include <fsm/print.h>
-
-#include "captest.h"
-
-/* Combine 3 fully disjoint FSMs:
- *
- * - 0: "(a(b))"
- * - 1: "(cd(e))"
- * - 2: "(fgh(i))"
- *
- * Shift the captures for 1 and 2 forward and use/combine
- * opaques on them to track which one(s) matched.
- *
- * This tracking of which DFA matched should be more directly
- * supported by the API later. */
-
-static void
-check(const struct fsm *fsm, const char *string,
-    unsigned end_id, unsigned capture_base);
-
-static void
-det_and_min(const char *tag, struct fsm *fsm);
-
-int main(void) {
-	struct fsm *f_ab = captest_fsm_of_string("ab", 0);
-	struct fsm *f_cde = captest_fsm_of_string("cde", 1);
-	struct fsm *f_fghi = captest_fsm_of_string("fghi", 2);
-	struct fsm *f_all = NULL;
-	unsigned captures;
-
-	struct fsm_combined_base_pair bases[3];
-	struct fsm *fsms[3];
-
-	assert(f_ab);
-	assert(f_cde);
-	assert(f_fghi);
-
-	/* set captures */
-#define SET_CAPTURE(FSM, STATE, CAPTURE, TYPE)				\
-	if (!fsm_set_capture_action(FSM, STATE, CAPTURE, TYPE)) {	\
-		fprintf(stderr, "failed to set capture on line %d\n",	\
-		    __LINE__);						\
-		exit(EXIT_FAILURE);					\
-	}
-
-	/* (a(b)) */
-	if (!fsm_capture_set_path(f_ab, 0, 0, 2)) {
-		exit(EXIT_FAILURE);
-	}
-	if (!fsm_capture_set_path(f_ab, 1, 1, 2)) {
-		exit(EXIT_FAILURE);
-	}
-
-	/* (cd(e)) */
-	if (!fsm_capture_set_path(f_cde, 0, 0, 3)) {
-		exit(EXIT_FAILURE);
-	}
-	if (!fsm_capture_set_path(f_cde, 1, 2, 3)) {
-		exit(EXIT_FAILURE);
-	}
-
-	/* (fgh(i)) */
-	if (!fsm_capture_set_path(f_fghi, 0, 0, 4)) {
-		exit(EXIT_FAILURE);
-	}
-	if (!fsm_capture_set_path(f_fghi, 1, 3, 4)) {
-		exit(EXIT_FAILURE);
-	}
-
-#if LOG_INTERMEDIATE_FSMS
-	fprintf(stderr, "\n=== f_ab...\n");
-	fsm_print_fsm(stderr, f_ab);
-	fsm_capture_dump(stderr, "#### f_ab", f_ab);
-
-	fprintf(stderr, "\n=== f_cde...\n");
-	fsm_print_fsm(stderr, f_cde);
-	fsm_capture_dump(stderr, "#### f_cde", f_cde);
-
-	fprintf(stderr, "\n=== f_fghi...\n");
-	fsm_print_fsm(stderr, f_fghi);
-	fsm_capture_dump(stderr, "#### f_fghi", f_fghi);
-#endif
-
-	/* determinise and minimise each before unioning */
-	det_and_min("ab", f_ab);
-	det_and_min("cde", f_cde);
-	det_and_min("fghi", f_fghi);
-
-	/* union them */
-	fsms[0] = f_ab;
-	fsms[1] = f_cde;
-	fsms[2] = f_fghi;
-
-	f_all = fsm_union_array(3, fsms, bases);
-	assert(f_all != NULL);
-
-#if LOG_INTERMEDIATE_FSMS
-	fprintf(stderr, "=== unioned f_ab with f_cde... (CB ab: %u, cde: %u)\n",
-	    bases[0].capture, bases[1].capture);
-	fsm_print_fsm(stderr, f_all);
-	fsm_capture_dump(stderr, "#### f_all", f_all);
-#endif
-
-#if LOG_INTERMEDIATE_FSMS
-	fprintf(stderr, "=== unioned f_all with f_fghi... (CB fghi: %u), %u captures\n",
-	    bases[2].capture, fsm_countcaptures(f_all));
-	fsm_print_fsm(stderr, f_all);
-	fsm_capture_dump(stderr, "#### f_all #2", f_all);
-#endif
-
-	if (!fsm_determinise(f_all)) {
-		fprintf(stderr, "NOPE %d\n", __LINE__);
-		exit(EXIT_FAILURE);
-	}
-
-#if LOG_INTERMEDIATE_FSMS
-	fprintf(stderr, "==== after determinise\n");
-	fsm_print_fsm(stderr, f_all);
-	fsm_capture_dump(stderr, "#### f_all", f_all);
-#endif
-
-	captures = fsm_countcaptures(f_all);
-	if (captures != 6) {
-		fprintf(stderr, "expected 6 captures, got %u\n", captures);
-		exit(EXIT_FAILURE);
-	}
-
-	check(f_all, "ab", 0, bases[0].capture);
-	check(f_all, "cde", 1, bases[1].capture);
-	check(f_all, "fghi", 2, bases[2].capture);
-
-
-	fsm_free(f_all);
-
-	return 0;
-}
-
-static void
-det_and_min(const char *tag, struct fsm *fsm)
-{
-	if (!fsm_determinise(fsm)) {
-		fprintf(stderr, "Failed to determise '%s'\n", tag);
-		exit(EXIT_FAILURE);
-	}
-
-	if (!fsm_minimise(fsm)) {
-		fprintf(stderr, "Failed to minimise '%s'\n", tag);
-		exit(EXIT_FAILURE);
-	}
-
-#if LOG_INTERMEDIATE_FSMS
-	fprintf(stderr, "==== after det_and_min: '%s'\n", tag);
-	fsm_print_fsm(stderr, fsm);
-	fsm_capture_dump(stderr, tag, fsm);
-#endif
-
-}
-
-static void
-check(const struct fsm *fsm, const char *string,
-    unsigned end_id, unsigned capture_base)
-{
-	int exec_res;
-	size_t i;
-	struct captest_input input;
-	fsm_state_t end;
-	struct fsm_capture captures[MAX_TEST_CAPTURES];
-	const size_t length = strlen(string);
-	const unsigned cb = capture_base; /* alias */
-
-	input.string = string;
-	input.pos = 0;
-
-	for (i = 0; i < MAX_TEST_CAPTURES; i++) {
-		captures[i].pos[0] = FSM_CAPTURE_NO_POS;
-		captures[i].pos[1] = FSM_CAPTURE_NO_POS;
-	}
-
-	exec_res = fsm_exec(fsm, captest_getc, &input, &end, captures);
-	if (exec_res != 1) {
-		fprintf(stderr, "fsm_exec: %d for '%s', expected 1\n",
-		    exec_res, string);
-		exit(EXIT_FAILURE);
-	}
-
-	/* check end ID */
-	{
-		const char *msg;
-		if (!captest_check_single_end_id(fsm, end, end_id, &msg)) {
-			fprintf(stderr, "%s\n", msg);
-			exit(EXIT_FAILURE);
-		}
-	}
-
-	/* check captures */
-	if (0) {
-		fprintf(stderr, "captures for '%s' (cb %u): [%ld, %ld], [%ld, %ld]\n",
-		    string, capture_base,
-		    captures[0 + cb].pos[0], captures[0 + cb].pos[1],
-		    captures[1 + cb].pos[0], captures[1 + cb].pos[1]);
-	}
-
-	assert(captures[0 + cb].pos[0] == 0);
-	assert(captures[0 + cb].pos[1] == length);
-	assert(captures[1 + cb].pos[0] == length - 1);
-	assert(captures[1 + cb].pos[1] == length);
-}
diff --git a/tests/capture/capture4.c b/tests/capture/capture4.c
deleted file mode 100644
index 170cbe8b0..000000000
--- a/tests/capture/capture4.c
+++ /dev/null
@@ -1,242 +0,0 @@
-/*
- * Copyright 2020 Scott Vokes
- *
- * See LICENCE for the full copyright terms.
- */
-
-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-
-#include <fsm/fsm.h>
-#include <fsm/bool.h>
-#include <fsm/capture.h>
-#include <fsm/pred.h>
-#include <fsm/print.h>
-
-#include "captest.h"
-
-/* Combine 2 mostly overlapping FSMs:
- * - 0: "(abc)"
- * - 1: "(ab*c)"
- * and check for false positives in the match.
- */
-
-static struct fsm *
-build_and_combine(unsigned *cb_a, unsigned *cb_b);
-
-static void
-det_and_min(const char *tag, struct fsm *fsm);
-
-static struct fsm *
-build_ab_c(void);
-
-static void
-check(const struct fsm *fsm, const char *string,
-    unsigned expected_ends,
-    unsigned cb_a, size_t pa_0, size_t pa_1,
-    unsigned cb_b, size_t pb_0, size_t pb_1);
-
-int main(void) {
-	unsigned cb_abc, cb_ab_c;
-	struct fsm *f_all = build_and_combine(&cb_abc, &cb_ab_c);
-	unsigned captures;
-	const unsigned exp_0 = 1U << 0;
-	const unsigned exp_1 = 1U << 1;
-
-	captures = fsm_countcaptures(f_all);
-	if (captures != 2) {
-		fprintf(stderr, "expected 2 captures, got %u\n", captures);
-		exit(EXIT_FAILURE);
-	}
-
-	#define NO_POS FSM_CAPTURE_NO_POS
-	check(f_all, "abc",	/* captures 0 and 1 */
-	    exp_0 | exp_1,
-	    cb_abc, 0, 3,
-	    cb_ab_c, 0, 3);
-	check(f_all, "ac",	/* only capture 1 */
-	    exp_1,
-	    cb_abc, NO_POS, NO_POS,
-	    cb_ab_c, 0, 2);
-	check(f_all, "abbc",	/* only capture 1 */
-	    exp_1,
-	    cb_abc, NO_POS, NO_POS,
-	    cb_ab_c, 0, 4);
-
-	fsm_free(f_all);
-
-	return 0;
-}
-
-static struct fsm *
-build_and_combine(unsigned *cb_a, unsigned *cb_b)
-{
-	struct fsm *f_abc = captest_fsm_of_string("abc", 0);
-	struct fsm *f_ab_c = build_ab_c();
-	struct fsm *f_all;
-	struct fsm_combine_info ci;
-
-	assert(f_abc);
-	assert(f_ab_c);
-
-	if (!fsm_capture_set_path(f_abc, 0, 0, 3)) {
-		exit(EXIT_FAILURE);
-	}
-	if (!fsm_capture_set_path(f_ab_c, 0, 0, 3)) {
-		exit(EXIT_FAILURE);
-	}
-
-#if LOG_INTERMEDIATE_FSMS
-	fprintf(stderr, "==================== abc \n");
-	fsm_print_fsm(stderr, f_abc);
-	fsm_capture_dump(stderr, "abc", f_abc);
-
-	fprintf(stderr, "==================== ab*c \n");
-	fsm_print_fsm(stderr, f_ab_c);
-	fsm_capture_dump(stderr, "ab*c", f_ab_c);
-#endif
-
-	det_and_min("abc", f_abc);
-	det_and_min("ab*c", f_ab_c);
-
-	/* union them */
-	f_all = fsm_union(f_abc, f_ab_c, &ci);
-	assert(f_all != NULL);
-
-	*cb_a = ci.capture_base_a;
-	*cb_b = ci.capture_base_b;
-
-#if LOG_INTERMEDIATE_FSMS
-	fprintf(stderr, "==================== post-union \n");
-	fsm_print_fsm(stderr, f_all);
-	fsm_capture_dump(stderr, "capture_actions", f_all);
-	fprintf(stderr, "====================\n");
-#endif
-
-	if (!fsm_determinise(f_all)) {
-		exit(EXIT_FAILURE);
-	}
-
-#if LOG_INTERMEDIATE_FSMS
-	fprintf(stderr, "==================== post-det \n");
-	fsm_print_fsm(stderr, f_all);
-	fsm_capture_dump(stderr, "capture_actions", f_all);
-	fprintf(stderr, "====================\n");
-#endif
-
-	return f_all;
-}
-
-static void
-det_and_min(const char *tag, struct fsm *fsm)
-{
-	if (!fsm_determinise(fsm)) {
-		fprintf(stderr, "Failed to determise '%s'\n", tag);
-		exit(EXIT_FAILURE);
-	}
-
-	if (!fsm_minimise(fsm)) {
-		fprintf(stderr, "Failed to minimise '%s'\n", tag);
-		exit(EXIT_FAILURE);
-	}
-
-#if LOG_INTERMEDIATE_FSMS
-	fprintf(stderr, "==== after det_and_min: '%s'\n", tag);
-	fsm_print_fsm(stderr, fsm);
-	fsm_capture_dump(stderr, tag, fsm);
-#endif
-
-}
-
-static struct fsm *
-build_ab_c(void)
-{
-	struct fsm *fsm = captest_fsm_with_options();
-	assert(fsm != NULL);
-
-	if (!fsm_addstate_bulk(fsm, 4)) { goto fail; }
-
-	fsm_setstart(fsm, 0);
-	if (!fsm_addedge_literal(fsm, 0, 1, 'a')) { goto fail; }
-
-	if (!fsm_addedge_literal(fsm, 1, 2, 'b')) { goto fail; }
-	if (!fsm_addedge_literal(fsm, 1, 3, 'c')) { goto fail; }
-
-	if (!fsm_addedge_literal(fsm, 2, 2, 'b')) { goto fail; }
-	if (!fsm_addedge_literal(fsm, 2, 3, 'c')) { goto fail; }
-
-	fsm_setend(fsm, 3, 1);
-	if (!fsm_setendid(fsm, 1)) {
-		goto fail;
-	}
-
-	return fsm;
-
-fail:
-	exit(EXIT_FAILURE);
-}
-
-static void
-check(const struct fsm *fsm, const char *string,
-    unsigned expected_ends,
-    unsigned cb_a, size_t pa_0, size_t pa_1,
-    unsigned cb_b, size_t pb_0, size_t pb_1)
-{
-	int exec_res;
-	size_t i;
-	struct captest_input input;
-	fsm_state_t end;
-	struct fsm_capture captures[MAX_TEST_CAPTURES];
-
-	fprintf(stderr, "#### check '%s', exp: ends 0x%u, c%u: (%ld, %ld), c%u: %ld, %ld)\n",
-	    string, expected_ends,
-	    cb_a, pa_0, pa_1,
-	    cb_b, pb_0, pb_1);
-
-	input.string = string;
-	input.pos = 0;
-
-	for (i = 0; i < MAX_TEST_CAPTURES; i++) {
-		captures[i].pos[0] = FSM_CAPTURE_NO_POS;
-		captures[i].pos[1] = FSM_CAPTURE_NO_POS;
-	}
-
-	exec_res = fsm_exec(fsm, captest_getc, &input, &end, captures);
-	if (exec_res != 1) {
-		fprintf(stderr, "fsm_exec: %d\n", exec_res);
-		exit(EXIT_FAILURE);
-	}
-
-	/* check captures */
-	fprintf(stderr, "captures for '%s': [%ld, %ld], [%ld, %ld]\n",
-	    string,
-	    captures[0].pos[0], captures[0].pos[1],
-	    captures[1].pos[0], captures[1].pos[1]);
-	assert(captures[cb_a].pos[0] == pa_0);
-	assert(captures[cb_a].pos[1] == pa_1);
-	assert(captures[cb_b].pos[0] == pb_0);
-	assert(captures[cb_b].pos[1] == pb_1);
-
-	{
-		enum fsm_getendids_res gres;
-		fsm_end_id_t id_buf[2];
-		size_t written;
-		gres = fsm_getendids(fsm, end, 2, id_buf, &written);
-		if (gres != FSM_GETENDIDS_FOUND) {
-			assert(!"fsm_getendids failed");
-		}
-
-		if (expected_ends == 0x2) {
-			assert(written == 1);
-			assert(id_buf[0] == 1);
-		} else if (expected_ends == 0x3) {
-			assert(written == 2);
-			assert(id_buf[0] == 0);
-			assert(id_buf[1] == 1);
-		} else {
-			assert(!"test not handled");
-		}
-	}
-}
diff --git a/tests/capture/capture5.c b/tests/capture/capture5.c
deleted file mode 100644
index b3a4be3ee..000000000
--- a/tests/capture/capture5.c
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * Copyright 2020 Scott Vokes
- *
- * See LICENCE for the full copyright terms.
- */
-
-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-
-#include <fsm/fsm.h>
-#include <fsm/bool.h>
-#include <fsm/capture.h>
-#include <fsm/pred.h>
-#include <fsm/print.h>
-
-#define LOG_INTERMEDIATE_FSMS 0
-#include "captest.h"
-
-/* Check that self edges are handled properly in the
- * capture action analysis.
- *
- * The DFA corresponds to /a(b*)(c)/. */
-
-static struct fsm *
-build(void);
-
-static void
-check(struct fsm *f, const char *input,
-    unsigned pa_0, unsigned pa_1,
-    unsigned pb_0, unsigned pb_1);
-
-int main(void) {
-	struct fsm *f = build();
-	unsigned captures;
-	assert(f != NULL);
-
-	captures = fsm_countcaptures(f);
-	if (captures != 2) {
-		fprintf(stderr, "expected 2 captures, got %u\n", captures);
-		exit(EXIT_FAILURE);
-	}
-
-	check(f, "ac",
-	    1, 1,
-	    1, 2);
-	check(f, "abc",
-	    1, 2,
-	    2, 3);
-	check(f, "abbc",
-	    1, 3,
-	    3, 4);
-
-	fsm_free(f);
-
-	return 0;
-}
-
-static struct fsm *
-build(void)
-{
-	struct fsm *fsm = captest_fsm_with_options();
-
-	if (!fsm_addstate_bulk(fsm, 4)) { goto fail; }
-
-	fsm_setstart(fsm, 0);
-	if (!fsm_addedge_literal(fsm, 0, 1, 'a')) { goto fail; }
-
-	if (!fsm_addedge_literal(fsm, 1, 1, 'b')) { goto fail; }
-	if (!fsm_addedge_literal(fsm, 1, 2, 'c')) { goto fail; }
-
-	fsm_setend(fsm, 2, 1);
-
-	if (!fsm_capture_set_path(fsm, 0, 1, 1)) { goto fail; }
-	if (!fsm_capture_set_path(fsm, 1, 1, 2)) { goto fail; }
-
-#if LOG_INTERMEDIATE_FSMS
-	fprintf(stderr, "==== built\n");
-	fsm_print_fsm(stderr, fsm);
-	fsm_capture_dump(stderr, "built", fsm);
-#endif
-
-	if (!fsm_determinise(fsm)) {
-		fprintf(stderr, "Failed to determise\n");
-		exit(EXIT_FAILURE);
-	}
-
-#if LOG_INTERMEDIATE_FSMS
-	fprintf(stderr, "==== after det\n");
-	fsm_print_fsm(stderr, fsm);
-	fsm_capture_dump(stderr, "after det", fsm);
-#endif
-
-	if (!fsm_minimise(fsm)) {
-		fprintf(stderr, "Failed to minimise\n");
-		exit(EXIT_FAILURE);
-	}
-
-#if LOG_INTERMEDIATE_FSMS
-	fprintf(stderr, "==== after min\n");
-	fsm_print_fsm(stderr, fsm);
-	fsm_capture_dump(stderr, "after min", fsm);
-#endif
-	return fsm;
-
-fail:
-	exit(EXIT_FAILURE);
-}
-
-static void
-check(struct fsm *fsm, const char *string,
-    unsigned pa_0, unsigned pa_1,
-    unsigned pb_0, unsigned pb_1)
-{
-	int exec_res;
-	size_t i;
-	struct captest_input input;
-	fsm_state_t end;
-	struct fsm_capture captures[MAX_TEST_CAPTURES];
-
-	fprintf(stderr, "#### check '%s', exp: c%u: (%u, %u), c%u: %u, %u)\n",
-	    string,
-	    0, pa_0, pa_1,
-	    1, pb_0, pb_1);
-
-	input.string = string;
-	input.pos = 0;
-
-	for (i = 0; i < MAX_TEST_CAPTURES; i++) {
-		captures[i].pos[0] = FSM_CAPTURE_NO_POS;
-		captures[i].pos[1] = FSM_CAPTURE_NO_POS;
-	}
-
-	exec_res = fsm_exec(fsm, captest_getc, &input, &end, captures);
-	if (exec_res != 1) {
-		fprintf(stderr, "fsm_exec: %d\n", exec_res);
-		exit(EXIT_FAILURE);
-	}
-
-	/* check captures */
-	fprintf(stderr, "captures for '%s': [%ld, %ld], [%ld, %ld]\n",
-	    string,
-	    captures[0].pos[0], captures[0].pos[1],
-	    captures[1].pos[0], captures[1].pos[1]);
-	assert(captures[0].pos[0] == pa_0);
-	assert(captures[0].pos[1] == pa_1);
-	assert(captures[1].pos[0] == pb_0);
-	assert(captures[1].pos[1] == pb_1);
-}
diff --git a/tests/capture/capture_concat1.c b/tests/capture/capture_concat1.c
deleted file mode 100644
index ee9c8aaab..000000000
--- a/tests/capture/capture_concat1.c
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * Copyright 2020 Scott Vokes
- *
- * See LICENCE for the full copyright terms.
- */
-
-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-
-#include <fsm/fsm.h>
-#include <fsm/capture.h>
-#include <fsm/bool.h>
-
-#include "captest.h"
-
-/* concat /(ab)/ and /(cde)/ */
-
-static struct fsm *
-build(unsigned *cb_a, unsigned *cb_b);
-
-static void
-check(const struct fsm *fsm, const char *input, unsigned end_id,
-    unsigned cb_ab, size_t exp_start_ab, size_t exp_end_ab,
-    unsigned cb_cde, size_t exp_start_cde, size_t exp_end_cde);
-
-int main(void) {
-	unsigned cb_ab, cb_cde; /* capture base */
-	struct fsm *abcde = build(&cb_ab, &cb_cde);
-
-	check(abcde, "abcde", 1,
-	    cb_ab, 0, 2,
-	    cb_cde, 2, 5);
-
-	fsm_free(abcde);
-
-	return EXIT_SUCCESS;
-}
-
-static struct fsm *
-build(unsigned *cb_a, unsigned *cb_b)
-{
-	struct fsm *ab = captest_fsm_of_string("ab", 0);
-	struct fsm *cde = captest_fsm_of_string("cde", 1);
-	struct fsm *abcde;
-	struct fsm_combine_info ci;
-	size_t cc_ab, cc_cde, cc_abcde;
-
-	assert(ab);
-	assert(cde);
-
-	if (!fsm_capture_set_path(ab, 0, 0, 2)) {
-		assert(!"path 0");
-	}
-	if (!fsm_capture_set_path(cde, 0, 0, 3)) {
-		assert(!"path 1");
-	}
-
-	cc_ab = fsm_countcaptures(ab);
-	assert(cc_ab == 1);
-
-	cc_cde = fsm_countcaptures(cde);
-	assert(cc_cde == 1);
-
-	abcde = fsm_concat(ab, cde, &ci);
-	assert(abcde);
-	*cb_a = ci.capture_base_a;
-	*cb_b = ci.capture_base_b;
-
-	cc_abcde = fsm_countcaptures(abcde);
-	assert(cc_abcde == cc_ab + cc_cde);
-
-#if LOG_INTERMEDIATE_FSMS
-	fprintf(stderr, "==== after concat: cb_ab %u, cb_cde %u\n",
-	    *cb_a, *cb_b);
-	fsm_print_fsm(stderr, abcde);
-
-	fsm_capture_dump(stderr, "#### after concat", abcde);
-
-	fprintf(stderr, "==== determinise\n");
-#endif
-
-	if (!fsm_determinise(abcde)) {
-		assert(!"determinise");
-	}
-
-#if LOG_INTERMEDIATE_FSMS
-	fprintf(stderr, "==== after determinise\n");
-	fsm_print_fsm(stderr, abcde);
-
-	assert(fsm_countcaptures(abcde) == cc_abcde);
-
-	fsm_capture_dump(stderr, "#### after det", abcde);
-#endif
-
-	assert(fsm_countcaptures(abcde) == cc_abcde);
-	return abcde;
-}
-
-static void
-check(const struct fsm *fsm, const char *input, unsigned end_id,
-    unsigned cb_ab, size_t exp_start_ab, size_t exp_end_ab,
-    unsigned cb_cde, size_t exp_start_cde, size_t exp_end_cde)
-{
-	struct captest_input ci;
-	fsm_state_t end;
-	int exec_res;
-	struct fsm_capture captures[MAX_TEST_CAPTURES];
-
-	ci.string = input;
-	ci.pos = 0;
-
-	exec_res = fsm_exec(fsm, captest_getc, &ci, &end, captures);
-	if (exec_res != 1) {
-		fprintf(stderr, "exec_res: %d\n", exec_res);
-		exit(EXIT_FAILURE);
-	}
-
-	{
-		const char *msg;
-		if (!captest_check_single_end_id(fsm, end, end_id, &msg)) {
-			fprintf(stderr, "%s\n", msg);
-			exit(EXIT_FAILURE);
-		}
-	}
-
-	assert(captures[cb_ab].pos[0] == exp_start_ab);
-	assert(captures[cb_ab].pos[1] == exp_end_ab);
-
-	assert(captures[cb_cde].pos[0] == exp_start_cde);
-	assert(captures[cb_cde].pos[1] == exp_end_cde);
-}
diff --git a/tests/capture/capture_concat2.c b/tests/capture/capture_concat2.c
deleted file mode 100644
index a8f070c7e..000000000
--- a/tests/capture/capture_concat2.c
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * Copyright 2020 Scott Vokes
- *
- * See LICENCE for the full copyright terms.
- */
-
-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-
-#include <fsm/fsm.h>
-#include <fsm/capture.h>
-#include <fsm/bool.h>
-
-#include "captest.h"
-
-/* concat /(abc)/ and /(de)/ */
-
-static struct fsm *
-build(unsigned *cb_a, unsigned *cb_b);
-
-static void
-check(const struct fsm *fsm, const char *input, unsigned end_id,
-    unsigned cb_ab, size_t exp_start_ab, size_t exp_end_ab,
-    unsigned cb_cde, size_t exp_start_cde, size_t exp_end_cde);
-
-int main(void) {
-	unsigned cb_abc, cb_de; /* capture base */
-	struct fsm *abcde = build(&cb_abc, &cb_de);
-
-	check(abcde, "abcde", 1,
-	    cb_abc, 0, 3,
-	    cb_de, 3, 5);
-
-	fsm_free(abcde);
-
-	return EXIT_SUCCESS;
-}
-
-static struct fsm *
-build(unsigned *cb_a, unsigned *cb_b)
-{
-	struct fsm *abc = captest_fsm_of_string("abc", 0);
-	struct fsm *de = captest_fsm_of_string("de", 1);
-	struct fsm *abcde;
-	struct fsm_combine_info ci;
-	size_t cc_abc, cc_de, cc_abcde;
-
-	assert(abc);
-	assert(de);
-
-	if (!fsm_capture_set_path(abc, 0, 0, 3)) {
-		assert(!"path 0");
-	}
-	if (!fsm_capture_set_path(de, 0, 0, 2)) {
-		assert(!"path 1");
-	}
-
-	cc_abc = fsm_countcaptures(abc);
-	assert(cc_abc == 1);
-
-	cc_de = fsm_countcaptures(de);
-	assert(cc_de == 1);
-
-	abcde = fsm_concat(abc, de, &ci);
-	assert(abcde);
-	*cb_a = ci.capture_base_a;
-	*cb_b = ci.capture_base_b;
-
-	cc_abcde = fsm_countcaptures(abcde);
-	assert(cc_abcde == cc_abc + cc_de);
-
-#if LOG_INTERMEDIATE_FSMS
-	fprintf(stderr, "==== after concat: cb_abc %u, cb_de %u\n",
-	    *cb_a, *cb_b);
-	fsm_print_fsm(stderr, abcde);
-
-	fsm_capture_dump(stderr, "#### after concat", abcde);
-
-	fprintf(stderr, "==== determinise\n");
-#endif
-
-	if (!fsm_determinise(abcde)) {
-		assert(!"determinise");
-	}
-
-#if LOG_INTERMEDIATE_FSMS
-	fprintf(stderr, "==== after determinise\n");
-	fsm_print_fsm(stderr, abcde);
-
-	assert(fsm_countcaptures(abcde) == cc_abcde);
-
-	fsm_capture_dump(stderr, "#### after det", abcde);
-#endif
-
-	assert(fsm_countcaptures(abcde) == cc_abcde);
-	return abcde;
-}
-
-static void
-check(const struct fsm *fsm, const char *input, unsigned end_id,
-    unsigned cb_abc, size_t exp_start_abc, size_t exp_end_abc,
-    unsigned cb_de, size_t exp_start_de, size_t exp_end_de)
-{
-	struct captest_input ci;
-	fsm_state_t end;
-	int exec_res;
-	struct fsm_capture captures[MAX_TEST_CAPTURES];
-
-	ci.string = input;
-	ci.pos = 0;
-
-	exec_res = fsm_exec(fsm, captest_getc, &ci, &end, captures);
-	if (exec_res != 1) {
-		fprintf(stderr, "exec_res: %d\n", exec_res);
-		exit(EXIT_FAILURE);
-	}
-
-	{
-		const char *msg;
-		if (!captest_check_single_end_id(fsm, end, end_id, &msg)) {
-			fprintf(stderr, "%s\n", msg);
-			exit(EXIT_FAILURE);
-		}
-	}
-
-	assert(captures[cb_abc].pos[0] == exp_start_abc);
-	assert(captures[cb_abc].pos[1] == exp_end_abc);
-
-	assert(captures[cb_de].pos[0] == exp_start_de);
-	assert(captures[cb_de].pos[1] == exp_end_de);
-}
diff --git a/tests/capture/capture_long_trail.c b/tests/capture/capture_long_trail.c
deleted file mode 100644
index 349717b0f..000000000
--- a/tests/capture/capture_long_trail.c
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright 2020 Scott Vokes
- *
- * See LICENCE for the full copyright terms.
- */
-
-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-
-#include <fsm/fsm.h>
-#include <fsm/capture.h>
-
-#include "captest.h"
-/* a(bcdefghijklmnopqrstuvwxy)z
- * This is long enough to exercise growing the trail for
- * capture action analysis. */
-
-int main(void) {
-	struct captest_single_fsm_test_info test_info = {
-		"abcdefghijklmnopqrstuvwxyz",
-		{
-			{ 1, 25 },
-		}
-	};
-	return captest_run_single(&test_info);
-}
diff --git a/tests/capture/capture_test_case_list.c b/tests/capture/capture_test_case_list.c
new file mode 100644
index 000000000..6b72d1018
--- /dev/null
+++ b/tests/capture/capture_test_case_list.c
@@ -0,0 +1,1944 @@
+#include "captest.h"
+
+#include <getopt.h>
+
+#define NO_POS FSM_CAPTURE_NO_POS
+
+const struct captest_case_single single_cases[] = {
+	{
+		.regex = "^",
+		.input = "",
+		.count = 1, .expected = {
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "$",
+		.input = "",
+		.count = 1, .expected = {
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "$^",
+		.input = "",
+		.count = 1, .expected = {
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "$^", .input = "x",
+		.match = SHOULD_NOT_MATCH,
+	},
+	{
+		.regex = "()*",
+		.input = "",
+		.count = 2, .expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "()*",
+		.input = "x",
+		.count = 2, .expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "^$",
+		.input = "",
+		.count = 1, .expected = {
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "^($|($)|(($))|((($))))",
+		.input = "",
+		.count = 2, .expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "^(((($)))|(($))|($)|$)",
+		.input = "",
+		.count = 5, .expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "^((((a$)))|((b$))|(c$)|d$)",
+		.input = "a",
+		.count = 5, .expected = {
+			{ .pos = {0, 1}, },
+			{ .pos = {0, 1}, },
+			{ .pos = {0, 1}, },
+			{ .pos = {0, 1}, },
+			{ .pos = {0, 1}, },
+		},
+	},
+	{
+		.regex = "^((((a$)))|((b$))|(c$)|d$)",
+		.input = "b",
+		.count = 7, .expected = {
+			{ .pos = {0, 1}, },
+			{ .pos = {0, 1}, },
+			{ .pos = {-1, -1}, },
+			{ .pos = {-1, -1}, },
+			{ .pos = {-1, -1}, },
+			{ .pos = {0, 1}, },
+			{ .pos = {0, 1}, },
+		},
+	},
+	{
+		.regex = "^((((b$)))|((b$))|(c$)|d$)",
+		.input = "b",
+		.count = 5, .expected = {
+			{ .pos = {0, 1}, },
+			{ .pos = {0, 1}, },
+			{ .pos = {0, 1}, },
+			{ .pos = {0, 1}, },
+			{ .pos = {0, 1}, },
+		},
+	},
+	{
+		.regex = "^((x?))*$",
+		.input = "x",
+		.count = 3, .expected = {
+			{ .pos = {0, 1}, },
+			{ .pos = {1, 1}, },
+			{ .pos = {1, 1}, },
+		},
+	},
+	{
+		.regex = "^((x?)*)*$",
+		.input = "",
+		.count = 3, .expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "^((x?)*)*$",
+		.input = "xxxxx",
+		.count = 3, .expected = {
+			{ .pos = {0, 5}, },
+			{ .pos = {5, 5}, },
+			{ .pos = {5, 5}, },
+		},
+	},
+	{
+		.regex = "xx*x",
+		.input = "xx",
+		.count = 1, .expected = {
+			{ .pos = {0, 2}, },
+		},
+	},
+	{
+		.regex = "^(x?)*$",
+		.input = "xx",
+		.count = 2, .expected = {
+			{ .pos = {0, 2}, },
+			{ .pos = {2, 2}, },
+		},
+	},
+	{
+		.regex = "^(x?)*$",
+		.input = "xxx",
+		.count = 2, .expected = {
+			{ .pos = {0, 3}, },
+			{ .pos = {3, 3}, },
+		},
+	},
+	{
+		.regex = "^(x?)+$",
+		.input = "xx",
+		.count = 2, .expected = {
+			{ .pos = {0, 2}, },
+			{ .pos = {2, 2}, },
+		},
+	},
+	{
+		.regex = "^(x?)+$",
+		.input = "xxx",
+		.count = 2, .expected = {
+			{ .pos = {0, 3}, },
+			{ .pos = {3, 3}, },
+		},
+	},
+	{
+		.regex = "^x(z?)*y$",
+		.input = "xy",
+		.count = 2, .expected = {
+			{ .pos = {0, 2}, },
+			{ .pos = {1, 1}, },
+		},
+	},
+	{
+		.regex = "()|x",
+		.input = "",
+		.count = 2, .expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "()|x",
+		.input = "x",
+		.count = 2, .expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "x|()",
+		.input = "x",
+		.count = 1, .expected = {
+			{ .pos = {0, 1}, },
+		},
+	},
+	{
+		.regex = "x|()",
+		.input = "",
+		.count = 2, .expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "$|",
+		.input = "x",
+		.count = 1, .expected = {
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = ".|$^",
+		.input = "",
+		.count = 1, .expected = {
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = ".|$^",
+		.input = "x",
+		.count = 1, .expected = {
+			{ .pos = {0, 1}, },
+		},
+	},
+	{
+		.regex = "$^|.",
+		.input = "",
+		.count = 1, .expected = {
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "$^|.",
+		.input = "x",
+		.count = 1, .expected = {
+			{ .pos = {0, 1}, },
+		},
+	},
+	{
+		.regex = "$$$^|...",
+		.input = "xxx",
+		.count = 1, .expected = {
+			{ .pos = {0, 3}, },
+		},
+	},
+	{
+		.regex = "x?$x?^x?|x?$x?^x?",
+		.input = "",
+		.count = 1, .expected = {
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "[^x]", .input = "",
+		.no_nl = true,
+		.match = SHOULD_NOT_MATCH,
+	},
+	{
+		.regex = "[^x]",
+		.input = "\n",
+		.count = 1, .expected = {
+			{ .pos = {0, 1}, },
+		},
+	},
+	{
+		.regex = ".$()",
+		.input = "x",
+		.count = 2, .expected = {
+			{ .pos = {0, 1}, },
+			{ .pos = {1, 1}, },
+		},
+	},
+	{
+		.regex = ".$()", .input = "",
+		.match = SHOULD_NOT_MATCH,
+	},
+	{
+		.regex = "^.$()",
+		.input = "x",
+		.count = 2, .expected = {
+			{ .pos = {0, 1}, },
+			{ .pos = {1, 1}, },
+		},
+	},
+	{
+		.regex = "^.$()", .input = "",
+		.match = SHOULD_NOT_MATCH,
+	},
+	{
+		.regex = "$(x?)(y?)(z?)",
+		.input = "a",
+		.count = 4, .expected = {
+			{ .pos = {1, 1}, },
+			{ .pos = {1, 1}, },
+			{ .pos = {1, 1}, },
+			{ .pos = {1, 1}, },
+		},
+	},
+	{
+		.regex = ".$(x?)(y?)(z?)",
+		.input = "a",
+		.count = 4, .expected = {
+			{ .pos = {0, 1}, },
+			{ .pos = {1, 1}, },
+			{ .pos = {1, 1}, },
+			{ .pos = {1, 1}, },
+		},
+	},
+	{
+		.regex = "[^y]",
+		.input = "xx",
+		.count = 1, .expected = {
+			{ .pos = {0, 1}, },
+		},
+	},
+	{
+		.regex = ".",
+		.input = "xx",
+		.count = 1, .expected = {
+			{ .pos = {0, 1}, },
+		},
+	},
+	{
+		.regex = "(x)+",
+		.input = "xxx",
+		.count = 2, .expected = {
+			{ .pos = {0, 3}, },
+			{ .pos = {2, 3}, },
+		},
+	},
+	{
+		.regex = "^(x)*.",
+		.input = "xx",
+		.count = 2, .expected = {
+			{ .pos = {0, 2}, },
+			{ .pos = {0, 1}, },
+		},
+	},
+	{
+		.regex = "^(x)*.",
+		.input = "xy",
+		.count = 2, .expected = {
+			{ .pos = {0, 2}, },
+			{ .pos = {0, 1}, },
+		},
+	},
+	{
+		.regex = "a.b(c)*",
+		.input = "axbc",
+		.count = 2, .expected = {
+			{ .pos = {0, 4}, },
+			{ .pos = {3, 4}, },
+		},
+	},
+	{
+		.regex = "^x?^",
+		.input = "",
+		.count = 1, .expected = {
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "^x?^",
+		.input = "x",
+		.count = 1, .expected = {
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "$(^)",
+		.input = "",
+		.count = 2, .expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "($)",
+		.input = "",
+		.count = 2, .expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "($$$)",
+		.input = "",
+		.count = 2, .expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "$x?^", .input = "x",
+		.match = SHOULD_NOT_MATCH,
+	},
+	{
+		.regex = "$(^)*",
+		.input = "",
+		.count = 2, .expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "$(^)*",
+		.input = "x",
+		.count = 1, .expected = {
+			{ .pos = {1, 1}, },
+		},
+	},
+	{
+		.regex = "$()*",
+		.input = "",
+		.count = 2, .expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "$()*",
+		.input = "x",
+		.count = 2, .expected = {
+			{ .pos = {1, 1}, },
+			{ .pos = {1, 1}, },
+		},
+	},
+	{
+		.regex = "^$^", .input = "x",
+		.match = SHOULD_NOT_MATCH,
+	},
+	{
+		.regex = "$^$", .input = "x",
+		.match = SHOULD_NOT_MATCH,
+	},
+	{
+		.regex = "$y?^x*", .input = "x",
+		.match = SHOULD_NOT_MATCH,
+	},
+	{
+		.regex = "x|$^",
+		.input = "x",
+		.count = 1, .expected = {
+			{ .pos = {0, 1}, },
+		},
+	},
+	{
+		.regex = "x|$^", .input = "y",
+		.match = SHOULD_NOT_MATCH,
+	},
+	{
+		.regex = "x|$^$^",
+		.input = "x",
+		.count = 1, .expected = {
+			{ .pos = {0, 1}, },
+		},
+	},
+	{
+		.regex = "x|$^$^", .input = "y",
+		.match = SHOULD_NOT_MATCH,
+	},
+	{
+		.regex = "$^|x",
+		.input = "x",
+		.count = 1, .expected = {
+			{ .pos = {0, 1}, },
+		},
+	},
+	{
+		.regex = "$^|x", .input = "y",
+		.match = SHOULD_NOT_MATCH,
+	},
+	{
+		.regex = "$^$^|x",
+		.input = "x",
+		.count = 1, .expected = {
+			{ .pos = {0, 1}, },
+		},
+	},
+	{
+		.regex = "$^$^|x", .input = "y",
+		.match = SHOULD_NOT_MATCH,
+	},
+	{
+		.regex = "^$|.",
+		.input = "x",
+		.count = 1, .expected = {
+			{ .pos = {0, 1}, },
+		},
+	},
+	{
+		.regex = "x|^$^$", .input = "y",
+		.match = SHOULD_NOT_MATCH,
+	},
+	{
+		.regex = "^$^$|x", .input = "y",
+		.match = SHOULD_NOT_MATCH,
+	},
+	{
+		.regex = "$|^|a$",
+		.input = "x",
+		.count = 1, .expected = {
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "[^a]x", .input = "x",
+		.match = SHOULD_NOT_MATCH,
+	},
+	{
+		.regex = "[^a]x",
+		.input = "xx",
+		.count = 1, .expected = {
+			{ .pos = {0, 2}, },
+		},
+	},
+	{
+		.regex = "a(b|c$)d", .input = "ac",
+		.match = SHOULD_NOT_MATCH,
+	},
+	{
+		.regex = "a(^b|c)d", .input = "bd",
+		.match = SHOULD_NOT_MATCH,
+	},
+	{
+		.regex = "(a|b|)*",
+		.input = "",
+		.count = 2, .expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "xx*y$",
+		.input = "x_xxy",
+		.count = 1, .expected = {
+			{ .pos = {2, 5}, },
+		},
+	},
+	{
+		.regex = "(|.$)*",
+		.input = "x",
+		.count = 2, .expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "(.$)*x", .input = "y",
+		.match = SHOULD_NOT_MATCH,
+	},
+	{
+		.regex = "(.$)*",
+		.input = "x",
+		.count = 2, .expected = {
+			{ .pos = {0, 1}, },
+			{ .pos = {0, 1}, },
+		},
+	},
+	{
+		.regex = "^(|.$)*",
+		.input = "x",
+		.count = 2, .expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "^(|.$)*$",
+		.input = "x",
+		.count = 2, .expected = {
+			{ .pos = {0, 1}, },
+			{ .pos = {1, 1}, },
+		},
+	},
+	{
+		.regex = "x|y(^)", .input = "",
+		.match = SHOULD_NOT_MATCH,
+	},
+	{
+		.regex = "(?:x*.|^$).", .input = "",
+		.match = SHOULD_NOT_MATCH,
+	},
+	{
+		.regex = "(?:x|^$)x", .input = "",
+		.match = SHOULD_NOT_MATCH,
+	},
+	{
+		.regex = "()+x",
+		.input = "x",
+		.count = 2, .expected = {
+			{ .pos = {0, 1}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "($$)^",
+		.input = "",
+		.count = 2, .expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "$($|$a)",
+		.input = "",
+		.count = 2, .expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "^(?i)abc$",
+		.input = "AbC",
+		.count = 1, .expected = {
+			{ .pos = {0, 3}, },
+		},
+	},
+	{
+		.regex = "^(?i)ab(?-i)c$", .input = "AbC",
+		.match = SHOULD_NOT_MATCH,
+	},
+	{
+		.regex = "^(?i)ab(?-i)c$",
+		.input = "Abc",
+		.count = 1, .expected = {
+			{ .pos = {0, 3}, },
+		},
+	},
+	{
+		.regex = "^(?i)a[b]c$",
+		.input = "ABC",
+		.count = 1, .expected = {
+			{ .pos = {0, 3}, },
+		},
+	},
+	{
+		.regex = "^(?i)a[^b]c$", .input = "ABC",
+		.match = SHOULD_NOT_MATCH,
+	},
+	{
+		.regex = "^(?i)a[bx]c$",
+		.input = "ABC",
+		.count = 1, .expected = {
+			{ .pos = {0, 3}, },
+		},
+	},
+	{
+		.regex = "^(?i)a[b-c]c$",
+		.input = "ABC",
+		.count = 1, .expected = {
+			{ .pos = {0, 3}, },
+		},
+	},
+	{
+		.regex = "(a()b)+a",
+		.input = "a!aba",
+		.count = 3, .expected = {
+			{ .pos = {2, 5}, },
+			{ .pos = {2, 4}, },
+			{ .pos = {3, 3}, },
+		},
+	},
+	{
+		.regex = "^^[^]]",
+		.input = "\n",
+		.count = 1, .expected = {
+			{ .pos = {0, 1}, },
+		},
+	},
+	{
+		.regex = "x(x()y)*",
+		.input = "xxy",
+		.count = 3, .expected = {
+			{ .pos = {0, 3}, },
+			{ .pos = {1, 3}, },
+			{ .pos = {2, 2}, },
+		},
+	},
+	{
+		.regex = "x(()x)*",
+		.input = "xx",
+		.count = 3, .expected = {
+			{ .pos = {0, 2}, },
+			{ .pos = {1, 2}, },
+			{ .pos = {1, 1}, },
+		},
+	},
+	{
+		.regex = "b(x*x*a()*y)*(a)a*",
+		.input = "ba",
+		.count = 4, .expected = {
+			{ .pos = {0, 2}, },
+			{ .pos = {-1, -1}, },
+			{ .pos = {-1, -1}, },
+			{ .pos = {1, 2}, },
+		},
+	},
+	{
+		.regex = "a(().x)*ab",
+		.input = "a.a.aaxab",
+		.count = 3, .expected = {
+			{ .pos = {4, 9}, },
+			{ .pos = {5, 7}, },
+			{ .pos = {5, 5}, },
+		},
+	},
+	{
+		.regex = "ab(b()*()*)*()*z",
+		.input = "a!abz",
+		.count = 5, .expected = {
+			{ .pos = {2, 5}, },
+			{ .pos = {-1, -1}, },
+			{ .pos = {-1, -1}, },
+			{ .pos = {-1, -1}, },
+			{ .pos = {4, 4}, },
+		},
+	},
+	{
+		.regex = "^x(y?z*)*$",
+		.input = "x",
+		.count = 2, .expected = {
+			{ .pos = {0, 1}, },
+			{ .pos = {1, 1}, },
+		},
+	},
+	{
+		.regex = "^(y?z*)*$",
+		.input = "",
+		.count = 2, .expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "^(x|$x?)*$",
+		.input = "",
+		.count = 2, .expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "^(^|$x)*$",
+		.input = "",
+		.count = 2, .expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "((x?)*(x?)*(x?)*(x?)*(x?)*(x?)*(x?)*(x?)*(x?)*(x?)*(x?)*(x?)*(x?)*)*y$",
+		.input = "xxxxxxxxxxy",
+		.count = 15, .expected = {
+			{ .pos = {0, 11}, },
+			{ .pos = {10, 10}, },
+			{ .pos = {10, 10}, },
+			{ .pos = {10, 10}, },
+			{ .pos = {10, 10}, },
+			{ .pos = {10, 10}, },
+			{ .pos = {10, 10}, },
+			{ .pos = {10, 10}, },
+			{ .pos = {10, 10}, },
+			{ .pos = {10, 10}, },
+			{ .pos = {10, 10}, },
+			{ .pos = {10, 10}, },
+			{ .pos = {10, 10}, },
+			{ .pos = {10, 10}, },
+			{ .pos = {10, 10}, },
+		},
+	},
+	{
+		.regex = "^a$",
+		.input = "a",
+		.count = 1, .expected = {
+			{ .pos = {0, 1}, },
+		},
+	},
+	{
+		.regex = "^a(bcd)e$",
+		.input = "abcde",
+		.count = 2, .expected = {
+			{ .pos = {0, 5}, },
+			{ .pos = {1, 4}, },
+		},
+	},
+	{
+		.regex = "^(a(b((c))(d)))$",
+		.input = "abcd",
+		.count = 6, .expected = {
+			{ .pos = {0, 4}, },
+			{ .pos = {0, 4}, },
+			{ .pos = {1, 4}, },
+			{ .pos = {2, 3}, },
+			{ .pos = {2, 3}, },
+			{ .pos = {3, 4}, },
+		},
+	},
+	{
+		.regex = "^(a(b(c)))$",
+		.input = "abc",
+		.count = 4, .expected = {
+			{ .pos = {0, 3}, },
+			{ .pos = {0, 3}, },
+			{ .pos = {1, 3}, },
+			{ .pos = {2, 3}, },
+		},
+	},
+	{
+		.regex = "^a(b*)(c)$",
+		.input = "ac",
+		.count = 3, .expected = {
+			{ .pos = {0, 2}, },
+			{ .pos = {1, 1}, },
+			{ .pos = {1, 2}, },
+		},
+	},
+	{
+		.regex = "^a(b*)(c)$",
+		.input = "abc",
+		.count = 3, .expected = {
+			{ .pos = {0, 3}, },
+			{ .pos = {1, 2}, },
+			{ .pos = {2, 3}, },
+		},
+	},
+	{
+		.regex = "^a(b*)(c)$",
+		.input = "abbc",
+		.count = 3, .expected = {
+			{ .pos = {0, 4}, },
+			{ .pos = {1, 3}, },
+			{ .pos = {3, 4}, },
+		},
+	},
+	{
+		.regex = "^(ab*c)$",
+		.input = "ac",
+		.count = 2, .expected = {
+			{ .pos = {0, 2}, },
+			{ .pos = {0, 2}, },
+		},
+	},
+	{
+		.regex = "^(ab*c)$",
+		.input = "abc",
+		.count = 2, .expected = {
+			{ .pos = {0, 3}, },
+			{ .pos = {0, 3}, },
+		},
+	},
+	{
+		.regex = "^(ab*c)$",
+		.input = "abbc",
+		.count = 2, .expected = {
+			{ .pos = {0, 4}, },
+			{ .pos = {0, 4}, },
+		},
+	},
+	{
+		.regex = "^(a*)",
+		.input = "",
+		.count = 2, .expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "^(a*)",
+		.input = "x",
+		.count = 2, .expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "^(a*)",
+		.input = "a",
+		.count = 2, .expected = {
+			{ .pos = {0, 1}, },
+			{ .pos = {0, 1}, },
+		},
+	},
+	{
+		.regex = "^(a*)",
+		.input = "ax",
+		.count = 2, .expected = {
+			{ .pos = {0, 1}, },
+			{ .pos = {0, 1}, },
+		},
+	},
+	{
+		.regex = "^a*",
+		.input = "",
+		.count = 1, .expected = {
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "^a*",
+		.input = "a",
+		.count = 1, .expected = {
+			{ .pos = {0, 1}, },
+		},
+	},
+	{
+		.regex = "^a*",
+		.input = "ax",
+		.count = 1, .expected = {
+			{ .pos = {0, 1}, },
+		},
+	},
+	{
+		.regex = ".|",
+		.input = "",
+		.count = 1, .expected = {
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "()*^",
+		.input = "",
+		.count = 2, .expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "(((())))*^",
+		.input = "",
+		.count = 5, .expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+
+	{
+		.regex = "(x|(x|))^",
+		.input = "",
+		.count = 3, .expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = ".*(x|())^",
+		.input = "",
+		.count = 3, .expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "(()|(()|x)^|x)^",
+		.input = "",
+		.count = 3,
+		.expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+
+	{
+		.regex = "x^()()|()",
+		.input = "",
+		.count = 4,
+		.expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {NO_POS, NO_POS}, },
+			{ .pos = {NO_POS, NO_POS}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "y^()|()^x",
+		.input = "x",
+		.count = 3,
+		.expected = {
+			{ .pos = {0, 1}, },
+			{ .pos = {NO_POS, NO_POS}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "()$a|()",
+		.input = "",
+		.count = 3,
+		.expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {NO_POS, NO_POS}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "()$z|(x)$",
+		.input = "x",
+		.count = 3,
+		.expected = {
+			{ .pos = {0, 1}, },
+			{ .pos = {NO_POS, NO_POS}, },
+			{ .pos = {0, 1}, },
+		},
+	},
+
+	{
+		/* long enough to exercise the USE_COLLAPSED_ZERO_PREFIX optimization */
+		.regex = "a*(ba*)c$",
+		.input = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaac",
+		.count = 2,
+		.expected = {
+			{ .pos = {101, 303}, },
+			{ .pos = {201, 302}, },
+		},
+	},
+
+	/* regression: losing the first character on the transition from
+	 * the unanchored start loop to the capture */
+	{
+		.regex = "aa+b$",
+		.input = "aXaXaaab",
+		.count = 1,
+		.expected = {
+			{ .pos = {4, 8}, },
+		},
+	},
+	{
+		.regex = "aa*b$",
+		.input = "aXaXaaab",
+		.count = 1,
+		.expected = {
+			{ .pos = {4, 8}, },
+		},
+	},
+	{
+		.regex = "!!!+$",
+		.input = "!\"!\"!\"!!!!",
+		.count = 1,
+		.expected = {
+			{ .pos = {6, 10}, },
+		},
+	},
+
+	/* new fuzzer regressions */
+	{
+		/* PCRE does not set the first capture, which is unsatisfiable */
+		.regex = "^(.^)*^(a*)",
+		.input = "",
+		.count = 3,
+		.expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {NO_POS, NO_POS}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		/* similar to the previous case, but with different anchoring */
+		.regex = "(a)*(^)*^",
+		.input = "",
+		.count = 3,
+		.expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {NO_POS, NO_POS}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "^(.a)*^(.a)",
+		.input = "!a",
+		.count = 3,
+		.expected = {
+			{ .pos = {0, 2}, },
+			{ .pos = {NO_POS, NO_POS}, },
+			{ .pos = {0, 2}, },
+		},
+	},
+	{
+		.regex = "(A)*^()*^",
+		.input = "",
+		.count = 3,
+		.expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {NO_POS, NO_POS}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+
+	{
+		.regex = "(a(b*)*|)*bc",
+		.input = "b!bc",
+		.count = 3,
+		.expected = {
+			{ .pos = {2, 4}, },
+			{ .pos = {2, 2}, },
+			{ .pos = {NO_POS, NO_POS}, },
+		},
+	},
+	{
+		.regex = "^(a(b*)*|)*bc$",
+		.input = "bc",
+		.count = 3,
+		.expected = {
+			{ .pos = {0, 2}, },
+			{ .pos = {0, 0}, },
+			{ .pos = {NO_POS, NO_POS}, },
+		},
+	},
+	{
+		.regex = "(|a((b*)*b*))*",
+		.input = "",
+		.count = 4,
+		.expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+			{ .pos = {NO_POS, NO_POS}, },
+			{ .pos = {NO_POS, NO_POS}, },
+		},
+	},
+	{
+		/* simplified version of the above */
+		.regex = "^(|a(b*)*)*$",
+		.input = "",
+		.count = 3,
+		.expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+			{ .pos = {NO_POS, NO_POS}, },
+		},
+	},
+	{
+		/* zero repetitions should not set the capture */
+		.regex = "^(a)*$",
+		.input = "",
+		.count = 2,
+		.expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {NO_POS, NO_POS}, },
+		},
+	},
+	{
+		.regex = "^(a)*(^)$",
+		.input = "",
+		.count = 3,
+		.expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {NO_POS, NO_POS}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		/* raw fuzzer output */
+	        .regex = "()((()(^|$|$^|^|$|$^^|$|$^|^|$|$^^^^|^|(|)($)|)+|^^|^|(|)($)|)+|)($)()+",
+		.input = "",
+		.count = 12,
+		.match = SHOULD_REJECT_AS_UNSUPPORTED,
+		.expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+			{ .pos = {NO_POS, NO_POS}, },
+			{ .pos = {NO_POS, NO_POS}, },
+			{ .pos = {NO_POS, NO_POS}, },
+			{ .pos = {NO_POS, NO_POS}, },
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "(^|())+()",
+		.input = "",
+		.count = 4,
+		.match = SHOULD_REJECT_AS_UNSUPPORTED,
+		.expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+			{ .pos = {NO_POS, NO_POS}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "(?:(^|^$)+|)+",
+		.input = "",
+		.match = SHOULD_REJECT_AS_UNSUPPORTED,
+		.count = 2,
+		.expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "^((|)($)|)+a$",
+		.input = "a",
+		.match = SHOULD_REJECT_AS_UNSUPPORTED,
+		.count = 4,
+		.expected = {
+			{ .pos = {0, 1}, },
+			{ .pos = {0, 0}, },
+			{ .pos = {NO_POS, NO_POS}, },
+			{ .pos = {NO_POS, NO_POS}, },
+		},
+	},
+	{
+		.regex = "^(($)|)+a$",
+		.input = "a",
+		.match = SHOULD_REJECT_AS_UNSUPPORTED,
+		.count = 3,
+		.expected = {
+			{ .pos = {0, 1}, },
+			{ .pos = {0, 0}, },
+			{ .pos = {NO_POS, NO_POS}, },
+		},
+	},
+	{
+		.regex = "^(|(|x))*$",
+		.input = "x",
+		.count = 3,
+		.expected = {
+			{ .pos = {0, 1}, },
+			{ .pos = {1, 1}, },
+			{ .pos = {0, 1}, },
+		},
+	},
+	{
+		/* same as the previous but without outer capture */
+		.regex = "^(?:|(|x))*$",
+		.input = "x",
+		.count = 2,
+		.expected = {
+			{ .pos = {0, 1}, },
+			{ .pos = {0, 1}, },
+		},
+	},
+	{
+		.regex = "(((($)|)+|)a|)+",
+		.input = "",
+		.match = SHOULD_REJECT_AS_UNSUPPORTED,
+		.count = 5,
+		.expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+			{ .pos = {NO_POS, NO_POS}, },
+			{ .pos = {NO_POS, NO_POS}, },
+			{ .pos = {NO_POS, NO_POS}, },
+		},
+	},
+
+	{
+		.regex = "^(|(|(|x)))*$",
+		.input = "x",
+		.count = 4,
+		.expected = {
+			{ .pos = {0, 1}, },
+			{ .pos = {1, 1}, },
+			{ .pos = {0, 1}, },
+			{ .pos = {0, 1}, },
+		},
+	},
+
+
+	{
+		.regex = "^(?:(?:(x?)^)y?)+$",
+		.input = "",
+		.count = 2,
+		.expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "^(?:^())+$",
+		.input = "",
+		.count = 2,
+		.expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "^(?:($|x))+$",
+		.input = "x",
+
+		.match = SHOULD_REJECT_AS_UNSUPPORTED,
+
+		.count = 2,
+		.expected = {
+			{ .pos = {0, 1}, },
+			{ .pos = {1, 1}, },
+		},
+	},
+	{
+		.regex = "^(($)|x)+$",
+		.input = "x",
+		.match = SHOULD_REJECT_AS_UNSUPPORTED,
+		.count = 3,
+		.expected = {
+			{ .pos = {0, 1}, },
+			{ .pos = {1, 1}, },
+			{ .pos = {1, 1}, },
+		},
+	},
+	{
+		.regex = "^(?:()?^()?)+$",
+		.input = "",
+		.count = 3,
+		.expected = {
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+			{ .pos = {0, 0}, },
+		},
+	},
+	{
+		.regex = "^(?:($|x)())+$",
+		.input = "x",
+		.match = SHOULD_REJECT_AS_UNSUPPORTED,
+		.count = 3,
+		.expected = {
+			{ .pos = {0, 1}, },
+			{ .pos = {1, 1}, },
+			{ .pos = {1, 1}, },
+		},
+	},
+
+	{
+		.regex = "()~((|)($)|%)+",
+		.input = "~%",
+		.match = SHOULD_REJECT_AS_UNSUPPORTED,
+		.count = 5,
+		.expected = {
+			{ .pos = {0, 2}, },
+			{ .pos = {0, 0}, },
+			{ .pos = {2, 2}, },
+			{ .pos = {2, 2}, },
+			{ .pos = {2, 2}, },
+		},
+	},
+
+	{
+		/* (slightly) reduced version of the previous */
+		.regex = "^(()($)|x)+$",
+		.input = "x",
+		.match = SHOULD_REJECT_AS_UNSUPPORTED,
+		.count = 4,
+		.expected = {
+			{ .pos = {0, 1}, },
+			{ .pos = {1, 1}, },
+			{ .pos = {1, 1}, },
+			{ .pos = {1, 1}, },
+		},
+	},
+
+	{
+		.regex = "a|_$[^b]",
+		.input = "a",
+		.count = 1,
+		.expected = {
+			{ .pos = {0, 1}, },
+		},
+	},
+
+	{
+	        .regex = "\\z",
+		.input = "",
+		.count = 1,
+		.match = SHOULD_REJECT_AS_UNSUPPORTED,
+	},
+};
+
+const struct captest_case_multi multi_cases[] = {
+	{
+		.regex_count = 4,
+		.regexes = {
+			"^aa$",	  /* exactly two 'a's */
+			"^a*",    /* zero or more 'a's followed by anything */
+			"^ab?$",  /* 'a' and optionally 'b' */
+			"a*$",    /* anything ending in zero or more 'a's */
+		},
+		.inputs = {
+			{
+				.input = "",
+				.expected = {
+					{ .regex = 0, .pos = POS_NONE },
+					{ .regex = 1, .pos = { 0, 0 } },
+					{ .regex = 2, .pos = POS_NONE },
+					{ .regex = 3, .pos = { 0, 0 } },
+				},
+			},
+
+			{
+				.input = "a",
+				.expected = {
+					{ .regex = 0, .pos = POS_NONE },
+					{ .regex = 1, .pos = { 0, 1 } },
+					{ .regex = 2, .pos = { 0, 1 } },
+					{ .regex = 3, .pos = { 0, 1 } },
+				},
+			},
+
+			{
+				.input = "aa",
+				.expected = {
+					{ .regex = 0, .pos = { 0, 2 } },
+					{ .regex = 1, .pos = { 0, 2 } },
+					{ .regex = 2, .pos = POS_NONE },
+					{ .regex = 3, .pos = { 0, 2 } },
+				},
+			},
+
+			{
+				.input = "aaa",
+				.expected = {
+					{ .regex = 0, .pos = POS_NONE },
+					{ .regex = 1, .pos = { 0, 3 } },
+					{ .regex = 2, .pos = POS_NONE },
+					{ .regex = 3, .pos = { 0, 3 } },
+				},
+			},
+
+			{
+				.input = "ba",
+				.expected = {
+					{ .regex = 0, .pos = POS_NONE },
+					{ .regex = 1, .pos = { 0, 0 } },
+					{ .regex = 2, .pos = POS_NONE },
+					{ .regex = 3, .pos = { 1, 2 } },
+				},
+			},
+
+			{
+				.input = "ab",
+				.expected = {
+					{ .regex = 0, .pos = POS_NONE },
+					{ .regex = 1, .pos = { 0, 1 } },
+					{ .regex = 2, .pos = { 0, 2 } },
+					{ .regex = 3, .pos = { 2, 2 } },
+				},
+			},
+
+			{
+				.input = NULL,
+			},
+		},
+	},
+
+	{
+		.regex_count = 3,
+		.regexes = {
+			"a(b?)*c",
+			"(ab)(c)",
+			"ab+(c)",
+		},
+		.inputs = {
+			{
+				.input = "",
+				.expected = {
+					{ .regex = 0, .capture = 0, .pos = POS_NONE },
+					{ .regex = 0, .capture = 1, .pos = POS_NONE },
+					{ .regex = 1, .capture = 0, .pos = POS_NONE },
+					{ .regex = 1, .capture = 1, .pos = POS_NONE },
+					{ .regex = 1, .capture = 2, .pos = POS_NONE },
+					{ .regex = 2, .capture = 0, .pos = POS_NONE },
+					{ .regex = 2, .capture = 1, .pos = POS_NONE },
+				},
+			},
+			{
+				.input = "abc",
+				.expected = {
+					{ .regex = 0, .capture = 0, .pos = {0, 3} },
+					{ .regex = 0, .capture = 1, .pos = {2, 2} },
+					{ .regex = 1, .capture = 0, .pos = {0, 3} },
+					{ .regex = 1, .capture = 1, .pos = {0, 2} },
+					{ .regex = 1, .capture = 2, .pos = {2, 3} },
+					{ .regex = 2, .capture = 0, .pos = {0, 3} },
+					{ .regex = 2, .capture = 1, .pos = {2, 3} },
+				},
+			},
+		},
+	},
+	{
+		/* fuzzer regression: This led to an execution path in fsm_union_array,
+		 * fsm_union, fsm_merge, merge that did not init or otherwise set the
+		 * `struct fsm_combine_info`, leading to an out of range offset for
+		 * the capture base. */
+		.regex_count = 3,
+		.regexes = {
+			".",
+			".^",
+			"^^_",
+		},
+		.inputs = {
+			{
+				.input = "",
+				.expected = {
+					{ .regex = 0, .pos = POS_NONE },
+					{ .regex = 1, .pos = POS_NONE },
+					{ .regex = 2, .pos = POS_NONE },
+				},
+			},
+			{
+				.input = "_",
+				.expected = {
+					{ .regex = 0, .pos = { 0, 1 } },
+					{ .regex = 1, .pos = { 0, 1 } },
+					{ .regex = 2, .pos = { 0, 1 } },
+				},
+			},
+		},
+	},
+
+	{
+		/* This checks that minimisation doesn't incorrectly
+		 * merge these and lead to capture false positives. */
+		.regex_count = 2,
+		.regexes = {
+			"^a(b)c$", /* exactly one 'b' */
+			"^a(b*)c$", /* any number of 'b's */
+		},
+		.inputs = {
+			{
+				.input = "",
+				.expected = {
+					{ .regex = 0, .capture = 0, .pos = POS_NONE },
+					{ .regex = 0, .capture = 1, .pos = POS_NONE },
+					{ .regex = 1, .capture = 0, .pos = POS_NONE },
+					{ .regex = 1, .capture = 1, .pos = POS_NONE },
+				},
+			},
+			{
+				.input = "a",
+				.expected = {
+					{ .regex = 0, .capture = 0, .pos = POS_NONE },
+					{ .regex = 0, .capture = 1, .pos = POS_NONE },
+					{ .regex = 1, .capture = 0, .pos = POS_NONE },
+					{ .regex = 1, .capture = 1, .pos = POS_NONE },
+				},
+			},
+			{
+				.input = "ab",
+				.expected = {
+					{ .regex = 0, .capture = 0, .pos = POS_NONE },
+					{ .regex = 0, .capture = 1, .pos = POS_NONE },
+					{ .regex = 1, .capture = 0, .pos = POS_NONE },
+					{ .regex = 1, .capture = 1, .pos = POS_NONE },
+				},
+			},
+			{
+				.input = "ac",
+				.expected = {
+					{ .regex = 0, .capture = 0, .pos = POS_NONE },
+					{ .regex = 0, .capture = 1, .pos = POS_NONE },
+					{ .regex = 1, .capture = 0, .pos = { 0, 2 } },
+					{ .regex = 1, .capture = 1, .pos = { 1, 1 } },
+				},
+			},
+			{
+				.input = "abc",
+				.expected = {
+					{ .regex = 0, .capture = 0, .pos = {0, 3 } },
+					{ .regex = 0, .capture = 1, .pos = {1, 2 } },
+					{ .regex = 1, .capture = 0, .pos = { 0, 3 } },
+					{ .regex = 1, .capture = 1, .pos = { 1, 2 } },
+				},
+			},
+			{
+				.input = "abbc",
+				.expected = {
+					{ .regex = 0, .capture = 0, .pos = POS_NONE },
+					{ .regex = 0, .capture = 1, .pos = POS_NONE },
+					{ .regex = 1, .capture = 0, .pos = { 0, 4 } },
+					{ .regex = 1, .capture = 1, .pos = { 1, 3 } },
+				},
+			},
+
+			{
+				.input = NULL,
+			},
+		},
+	}
+};
+
+
+static struct captest_case_program program_cases[] = {
+	{
+		.input = "",
+		.char_class = {
+			{ .octets = { ~0, ~0, ~0, ~0 }}, /* 0x00 <= x <= 0xff */
+		},
+		.expected = {
+			.count = 4,
+			.captures = {
+				{ .pos = {0, 0}, },
+				{ .pos = {0, 0}, },
+				{ .pos = {NO_POS, NO_POS}, },
+				{ .pos = {0, 0}, },
+			},
+		},
+
+		.ops = {
+			{ .t = CAPVM_OP_SPLIT, .u.split = { .greedy = 3, .nongreedy = 1 }},
+			{ .t = CAPVM_OP_CHARCLASS, .u.charclass_id = 0 },
+			{ .t = CAPVM_OP_JMP, .u.jmp = 0 },
+			{ .t = CAPVM_OP_SAVE, .u.save = 0 },
+			{ .t = CAPVM_OP_SPLIT, .u.split = { .greedy = 5, .nongreedy = 7 }},
+			{ .t = CAPVM_OP_ANCHOR, .u.anchor = CAPVM_ANCHOR_START },
+
+			{ .t = CAPVM_OP_JMP, .u.jmp = 9 }, /* jump after |() */
+			{ .t = CAPVM_OP_SAVE, .u.save = 4 },
+			{ .t = CAPVM_OP_SAVE, .u.save = 5 },
+
+			{ .t = CAPVM_OP_SPLIT, .u.split = { .greedy = 4, .nongreedy = 10 }},
+
+			{ .t = CAPVM_OP_SAVE, .u.save = 2 },
+			{ .t = CAPVM_OP_SAVE, .u.save = 3 },
+			{ .t = CAPVM_OP_SAVE, .u.save = 6 },
+			{ .t = CAPVM_OP_SAVE, .u.save = 7 },
+			{ .t = CAPVM_OP_SAVE, .u.save = 1 },
+			{ .t = CAPVM_OP_SPLIT, .u.split = { .greedy = 18, .nongreedy = 16 }},
+			{ .t = CAPVM_OP_CHARCLASS, .u.charclass_id = 0 },
+			{ .t = CAPVM_OP_JMP, .u.jmp = 15 },
+			{ .t = CAPVM_OP_MATCH },
+		},
+	},
+
+
+	{
+		/* correcting compilation of '^(?:($|x))+$' */
+		.input = "x",
+		.expected = {
+			.count = 2,
+			.captures = {
+				{ .pos = {0, 1}, },
+				{ .pos = {1, 1}, },
+			},
+		},
+
+		.ops = {
+			[0] = { .t = CAPVM_OP_SAVE, .u.save = 0 },
+			[1] = { .t = CAPVM_OP_ANCHOR, .u.anchor = CAPVM_ANCHOR_START },
+			[2] = { .t = CAPVM_OP_SAVE, .u.save = 2 },
+			[3] = { .t = CAPVM_OP_SPLIT, .u.split = { .greedy = 4, .nongreedy = 6 }},
+			[4] = { .t = CAPVM_OP_ANCHOR, .u.anchor = CAPVM_ANCHOR_END },
+
+		        /* [5] = { .t = CAPVM_OP_JMP, .u.jmp = 7 }, */
+		        [5] = { .t = CAPVM_OP_SPLIT, .u.split = { .greedy = 7, .nongreedy = 9 }},
+
+			[6] = { .t = CAPVM_OP_CHAR, .u.chr = 'x' },
+			[7] = { .t = CAPVM_OP_SAVE, .u.save = 3 },
+			[8] = { .t = CAPVM_OP_SPLIT, .u.split = { .greedy = 2, .nongreedy = 9 }},
+			[9] = { .t = CAPVM_OP_ANCHOR, .u.anchor = CAPVM_ANCHOR_END },
+			[10] = { .t = CAPVM_OP_SAVE, .u.save = 1 },
+			[11] = { .t = CAPVM_OP_MATCH },
+		},
+	},
+};
+
+#define NO_FILTER ((size_t)-1)
+struct options {
+	size_t filter;
+	int verbosity;
+	bool track_timing;
+	FILE *prog_output;
+	enum groups {
+		GROUP_SINGLE = 0x01,
+		GROUP_MULTI = 0x02,
+		GROUP_PROGRAMS = 0x04,
+		GROUP_ALL = 0xff,
+	} group;
+};
+
+static void
+print_usage(FILE *f, const char *progname)
+{
+	fprintf(f, "%s: [-h] [-v] [-s | -m | -p] [-f <id>] [-t]\n", progname);
+	fprintf(f, "    -h: print this usage info\n");
+	fprintf(f, "    -v: increase verbosity (can repeat: -vvv)\n");
+	fprintf(f, "    -f <id>: just run a specific test, by numeric ID\n");
+	fprintf(f, "    -s: only single casse\n");
+	fprintf(f, "    -m: only multi cases\n");
+	fprintf(f, "    -p: only program cases\n");
+	fprintf(f, "    -t: print timing info\n");
+}
+
+static void
+get_options(struct options *opt, int argc, char **argv)
+{
+	const char *progname = argv[0];
+	int c;
+	while (c = getopt(argc, argv, "hf:mpstv"), c != -1) {
+		switch (c) {
+		case 'h':
+			print_usage(stdout, progname);
+			exit(EXIT_SUCCESS);
+			break;
+		case 'v':
+			opt->verbosity++;
+			break;
+		case 'f':
+			opt->filter = atol(optarg);
+			break;
+		case 't':
+			opt->track_timing = true;
+			break;
+		case 'p':
+			opt->group = GROUP_PROGRAMS;
+			break;
+		case 's':
+			opt->group = GROUP_SINGLE;
+			break;
+		case 'm':
+			opt->group = GROUP_MULTI;
+			break;
+		case '?':
+		default:
+			print_usage(stderr, progname);
+			exit(EXIT_FAILURE);
+		}
+	}
+}
+
+int main(int argc, char **argv) {
+	size_t pass = 0;
+	size_t fail = 0;
+	size_t skip = 0;
+	size_t nth = 0;
+
+	struct options options = {
+		.filter = NO_FILTER,
+		.verbosity = 0,
+		.group = GROUP_ALL,
+	};
+	get_options(&options, argc, argv);
+
+	if (options.verbosity == DUMP_PROGRAMS_VERBOSITY) {
+		options.prog_output = fopen("prog_output", "w");
+		assert(options.prog_output != NULL);
+	}
+
+	/* avoid an extra layer of indentation here */
+	if (!(options.group & GROUP_SINGLE)) { goto after_single; }
+
+	printf("-- single cases without trailing newline\n");
+	const size_t single_case_count = sizeof(single_cases)/sizeof(single_cases[0]);
+	for (size_t c_i = 0; c_i < single_case_count; c_i++) {
+		const size_t cur = nth++;
+		if (options.filter != NO_FILTER && options.filter != cur) {
+			continue;
+		}
+
+		if (options.verbosity > 0) {
+			printf("%zu: ", cur);
+			if (options.verbosity > 2) {
+				fflush(stdout);
+			}
+		}
+
+		if (options.verbosity == DUMP_PROGRAMS_VERBOSITY) {
+			fprintf(options.prog_output, "\n\n==== test_case %zu\n", c_i);
+		}
+
+		const struct captest_case_single *t = &single_cases[c_i];
+
+		if (t->match == SHOULD_SKIP) {
+			printf("%zd: SKIP (regex \"%s\", input \"%s\")\n",
+			    cur, t->regex, t->input);
+			skip++;
+			continue;
+		}
+
+		enum captest_run_case_res res = captest_run_case(t, options.verbosity, false, options.prog_output);
+
+		switch (res) {
+		case CAPTEST_RUN_CASE_PASS:
+			pass++;
+			break;
+		case CAPTEST_RUN_CASE_FAIL:
+			if (options.verbosity == 0) {
+				printf("-- test case %zd (regex \"%s\", input \"%s\")\n", cur, t->regex, t->input);
+			}
+			fail++;
+			break;
+		case CAPTEST_RUN_CASE_ERROR:
+			assert(!"error");
+			return EXIT_FAILURE;
+		}
+	}
+
+	/* second pass, adding a trailing newline to input */
+	printf("-- single cases with trailing newline\n");
+	for (size_t c_i = 0; c_i < single_case_count; c_i++) {
+		const size_t cur = nth++;
+		if (options.filter != NO_FILTER && options.filter != cur) {
+			continue;
+		}
+
+		const struct captest_case_single *t = &single_cases[c_i];
+		if (t->no_nl) { continue; }
+		if (t->match == SHOULD_SKIP) {
+			printf("%zd: SKIP (regex \"%s\", input \"%s\\n\")\n",
+			    cur, t->regex, t->input);
+			skip++;
+			continue;
+		}
+
+		if (options.verbosity > 0) {
+			printf("%zu: ", cur);
+			if (options.verbosity > 2) {
+				fflush(stdout);
+			}
+		}
+
+		enum captest_run_case_res res = captest_run_case(t, options.verbosity, true, options.prog_output);
+
+		switch (res) {
+		case CAPTEST_RUN_CASE_PASS:
+			pass++;
+			break;
+		case CAPTEST_RUN_CASE_FAIL:
+			if (options.verbosity == 0) {
+				printf("-- test case %zd (regex \"%s\", input \"%s\\n\")\n", cur, t->regex, t->input);
+			}
+			fail++;
+			break;
+		case CAPTEST_RUN_CASE_ERROR:
+			assert(!"error");
+			return EXIT_FAILURE;
+		}
+	}
+after_single:
+
+	/* multi-regex tests */
+	if (!(options.group & GROUP_MULTI)) { goto after_multi; }
+
+	printf("-- multi-regex cases\n");
+	const size_t multi_case_count = sizeof(multi_cases)/sizeof(multi_cases[0]);
+	for (size_t c_i = 0; c_i < multi_case_count; c_i++) {
+		const size_t cur = nth++;
+		if ((options.filter != NO_FILTER && options.filter != cur)) {
+			continue;
+		}
+
+		const struct captest_case_multi *t = &multi_cases[c_i];
+		if (t->match == SHOULD_SKIP) {
+			printf("%zu: SKIP (multi)\n", c_i);
+			skip++;
+			continue;
+		}
+
+		if (options.verbosity > 0) {
+			printf("%zu: ", cur);
+		}
+
+		struct captest_case_multi_result result;
+		enum captest_run_case_res res = captest_run_case_multi(t,
+		    options.verbosity, false, options.prog_output, &result);
+
+		pass += result.pass;
+		fail += result.fail;
+
+		switch (res) {
+		case CAPTEST_RUN_CASE_PASS:
+			if (options.verbosity > 0) {
+				printf("pass\n");
+			}
+			break;
+		case CAPTEST_RUN_CASE_FAIL:
+			if (options.verbosity > 0) {
+				printf("FAIL\n");
+			} else {
+				printf("-- test case %zd\n", cur);
+			}
+			break;
+		case CAPTEST_RUN_CASE_ERROR:
+			assert(!"error");
+			return EXIT_FAILURE;
+		}
+	}
+after_multi:
+
+	/* hardcoded programs */
+	if (!(options.group & GROUP_PROGRAMS)) { goto after_programs; }
+
+	const size_t prog_case_count = sizeof(program_cases)/sizeof(program_cases[0]);
+	for (size_t c_i = 0; c_i < prog_case_count; c_i++) {
+		const size_t cur = nth++;
+		if ((options.filter != NO_FILTER && options.filter != cur)) {
+			continue;
+		}
+
+		const struct captest_case_program *t = &program_cases[c_i];
+
+		if (options.verbosity > 0) {
+			printf("%zu: ", cur);
+		}
+
+		enum captest_run_case_res res = captest_run_case_program(t,
+		    options.verbosity);
+
+		switch (res) {
+		case CAPTEST_RUN_CASE_PASS:
+			if (options.verbosity > 0) {
+				printf("pass\n");
+			}
+			pass++;
+			break;
+		case CAPTEST_RUN_CASE_FAIL:
+			fail++;
+			if (options.verbosity > 0) {
+				printf("FAIL\n");
+			} else if (options.verbosity == 0) {
+				printf("-- test case %zd\n", cur);
+			}
+			break;
+		case CAPTEST_RUN_CASE_ERROR:
+			assert(!"error");
+			return EXIT_FAILURE;
+		}
+	}
+after_programs:
+
+	printf("-- pass %zu, fail %zu, skip %zu\n", pass, fail, skip);
+
+	return fail > 0
+	    ? EXIT_FAILURE
+	    : EXIT_SUCCESS;
+}
diff --git a/tests/capture/capture_union1.c b/tests/capture/capture_union1.c
deleted file mode 100644
index 5d9bd2920..000000000
--- a/tests/capture/capture_union1.c
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Copyright 2020 Scott Vokes
- *
- * See LICENCE for the full copyright terms.
- */
-
-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-
-#include <fsm/fsm.h>
-#include <fsm/bool.h>
-#include <fsm/capture.h>
-#include <fsm/print.h>
-
-#include "captest.h"
-
-/* union /(ab)/ and /(cde)/ */
-
-static struct fsm *
-build(unsigned *cb_a, unsigned *cb_b);
-
-static void
-check(const struct fsm *fsm, const char *input,
-    unsigned end_id, unsigned exp_capture_id,
-    size_t exp_start, size_t exp_end);
-
-int main(void) {
-	unsigned cb_ab, cb_cde; /* capture base */
-	struct fsm *abcde = build(&cb_ab, &cb_cde);
-
-	check(abcde, "ab", 0, cb_ab, 0, 2);
-	check(abcde, "cde", 1, cb_cde, 0, 3);
-
-	fsm_free(abcde);
-
-	return EXIT_SUCCESS;
-}
-
-static struct fsm *
-build(unsigned *cb_a, unsigned *cb_b)
-{
-	struct fsm *ab = captest_fsm_of_string("ab", 0);
-	struct fsm *cde = captest_fsm_of_string("cde", 1);
-	struct fsm *abcde;
-	struct fsm_combine_info ci;
-	size_t cc_ab, cc_cde, cc_abcde;
-
-	assert(ab);
-	assert(cde);
-
-	if (!fsm_capture_set_path(ab, 0, 0, 2)) {
-		assert(!"path 0");
-	}
-	if (!fsm_capture_set_path(cde, 0, 0, 3)) {
-		assert(!"path 1");
-	}
-
-	cc_ab = fsm_countcaptures(ab);
-	assert(cc_ab == 1);
-
-	cc_cde = fsm_countcaptures(cde);
-	assert(cc_cde == 1);
-
-	abcde = fsm_union(ab, cde, &ci);
-	assert(abcde);
-	*cb_a = ci.capture_base_a;
-	*cb_b = ci.capture_base_b;
-
-	cc_abcde = fsm_countcaptures(abcde);
-	assert(cc_abcde == cc_ab + cc_cde);
-
-#if LOG_INTERMEDIATE_FSMS
-	fprintf(stderr, "==== after union: cb_ab %u, cb_cde %u\n",
-	    *cb_a, *cb_b);
-	fsm_print_fsm(stderr, abcde);
-
-	fsm_capture_dump(stderr, "#### after union", abcde);
-
-	fprintf(stderr, "==== determinise\n");
-#endif
-
-	if (!fsm_determinise(abcde)) {
-		assert(!"determinise");
-	}
-
-#if LOG_INTERMEDIATE_FSMS
-	fprintf(stderr, "==== after determinise\n");
-	fsm_print_fsm(stderr, abcde);
-
-	assert(fsm_countcaptures(abcde) == cc_abcde);
-
-	fsm_capture_dump(stderr, "#### after det", abcde);
-#endif
-
-	assert(fsm_countcaptures(abcde) == cc_abcde);
-	return abcde;
-}
-
-static void
-check(const struct fsm *fsm, const char *input,
-    unsigned end_id, unsigned exp_capture_id,
-    size_t exp_start, size_t exp_end)
-{
-	struct captest_input ci;
-	fsm_state_t end;
-	int exec_res;
-	struct fsm_capture got_captures[MAX_TEST_CAPTURES];
-
-	ci.string = input;
-	ci.pos = 0;
-
-	exec_res = fsm_exec(fsm, captest_getc, &ci, &end, got_captures);
-	if (exec_res != 1) {
-		fprintf(stderr, "exec_res: %d\n", exec_res);
-		exit(EXIT_FAILURE);
-	}
-
-	{
-		const char *msg;
-		if (!captest_check_single_end_id(fsm, end, end_id, &msg)) {
-			fprintf(stderr, "%s\n", msg);
-			exit(EXIT_FAILURE);
-		}
-	}
-
-	if (got_captures[exp_capture_id].pos[0] != exp_start) {
-		fprintf(stderr, "capture[%u].pos[0]: exp %lu, got %lu\n",
-		    exp_capture_id, exp_start,
-		    got_captures[exp_capture_id].pos[0]);
-		exit(EXIT_FAILURE);
-	}
-	if (got_captures[exp_capture_id].pos[1] != exp_end) {
-		fprintf(stderr, "capture[%u].pos[1]: exp %lu, got %lu\n",
-		    exp_capture_id, exp_end,
-		    got_captures[exp_capture_id].pos[1]);
-		exit(EXIT_FAILURE);
-	}
-}
diff --git a/tests/capture/capture_union2.c b/tests/capture/capture_union2.c
deleted file mode 100644
index 7fab2f18d..000000000
--- a/tests/capture/capture_union2.c
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright 2020 Scott Vokes
- *
- * See LICENCE for the full copyright terms.
- */
-
-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-
-#include <fsm/fsm.h>
-#include <fsm/bool.h>
-#include <fsm/capture.h>
-#include <fsm/print.h>
-
-#include "captest.h"
-
-/* union /(abcd)/ and /(abed)/ */
-
-static struct fsm *
-build(unsigned *cb_a, unsigned *cb_b);
-
-static void
-check(const struct fsm *fsm, const char *input,
-    unsigned end_id, unsigned exp_capture_id,
-    size_t exp_start, size_t exp_end);
-
-int main(void) {
-	unsigned cb_abcd, cb_abed;
-	struct fsm *fsm = build(&cb_abcd, &cb_abed);
-
-	check(fsm, "abcd", 0, cb_abcd, 0, 4);
-	check(fsm, "abed", 1, cb_abed, 0, 4);
-
-	fsm_free(fsm);
-
-	return EXIT_SUCCESS;
-}
-
-static struct fsm *
-build(unsigned *cb_a, unsigned *cb_b)
-{
-	struct fsm *abcd = captest_fsm_of_string("abcd", 0);
-	struct fsm *abed = captest_fsm_of_string("abed", 1);
-	struct fsm *res;
-
-	assert(abcd);
-	assert(abed);
-
-	if (!fsm_capture_set_path(abcd, 0, 0, 4)) {
-		assert(!"path 0");
-	}
-	if (!fsm_capture_set_path(abed, 0, 0, 4)) {
-		assert(!"path 1");
-	}
-
-	{
-		struct fsm *fsms[2];
-		struct fsm_combined_base_pair bases[2];
-		fsms[0] = abcd;
-		fsms[1] = abed;
-		res = fsm_union_array(2, fsms, bases);
-		assert(res);
-		*cb_a = bases[0].capture;
-		*cb_b = bases[1].capture;
-	}
-
-	if (!fsm_determinise(res)) {
-		assert(!"determinise");
-	}
-
-	assert(fsm_countcaptures(res) == 2);
-
-	return res;
-}
-
-static void
-check(const struct fsm *fsm, const char *input,
-    unsigned end_id, unsigned exp_capture_id,
-    size_t exp_start, size_t exp_end)
-{
-	struct captest_input ci;
-	fsm_state_t end;
-	int exec_res;
-	struct fsm_capture got_captures[MAX_TEST_CAPTURES];
-
-	ci.string = input;
-	ci.pos = 0;
-
-	exec_res = fsm_exec(fsm, captest_getc, &ci, &end, got_captures);
-	if (exec_res != 1) {
-		fprintf(stderr, "exec_res: %d\n", exec_res);
-		exit(EXIT_FAILURE);
-	}
-
-	{
-		const char *msg;
-		if (!captest_check_single_end_id(fsm, end, end_id, &msg)) {
-			fprintf(stderr, "%s\n", msg);
-			exit(EXIT_FAILURE);
-		}
-	}
-
-	if (got_captures[exp_capture_id].pos[0] != exp_start) {
-		fprintf(stderr, "capture[%u].pos[0]: exp %lu, got %lu\n",
-		    exp_capture_id, exp_start,
-		    got_captures[exp_capture_id].pos[0]);
-		exit(EXIT_FAILURE);
-	}
-	if (got_captures[exp_capture_id].pos[1] != exp_end) {
-		fprintf(stderr, "capture[%u].pos[1]: exp %lu, got %lu\n",
-		    exp_capture_id, exp_end,
-		    got_captures[exp_capture_id].pos[1]);
-		exit(EXIT_FAILURE);
-	}
-}
diff --git a/tests/endids/endids2_union_many_endids.c b/tests/endids/endids2_union_many_endids.c
index 8e39ca93d..47af96dc9 100644
--- a/tests/endids/endids2_union_many_endids.c
+++ b/tests/endids/endids2_union_many_endids.c
@@ -167,6 +167,7 @@ int main(void)
 		if (fsm == NULL) {
 			fsm = new;
 		} else {
+			/* TODO: this could use fsm_union_array instead */
 			fsm = fsm_union(fsm, new, NULL);
 			assert(fsm != NULL);
 		}
@@ -283,5 +284,3 @@ int main(void)
 
 	return EXIT_SUCCESS;
 }
-
-
diff --git a/tests/endids/utils.c b/tests/endids/utils.c
index 79777e825..85ff7a660 100644
--- a/tests/endids/utils.c
+++ b/tests/endids/utils.c
@@ -9,7 +9,7 @@ match_string(const struct fsm *fsm, const char *s, fsm_state_t *end_ptr, fsm_end
 	fsm_state_t end = 0;
 	int ret;
 
-	ret = fsm_exec(fsm, fsm_sgetc, &s, &end, NULL);
+	ret = fsm_exec(fsm, fsm_sgetc, &s, &end);
 	if (ret == 1) {
 		size_t num_endids;
 
diff --git a/tests/idmap/Makefile b/tests/idmap/Makefile
new file mode 100644
index 000000000..aee01f565
--- /dev/null
+++ b/tests/idmap/Makefile
@@ -0,0 +1,19 @@
+.include "../../share/mk/top.mk"
+
+TEST.tests/idmap != ls -1 tests/idmap/idmap*.c
+TEST_SRCDIR.tests/idmap = tests/idmap
+TEST_OUTDIR.tests/idmap = ${BUILD}/tests/idmap
+
+.for n in ${TEST.tests/idmap:T:R:C/^idmap//}
+INCDIR.${TEST_SRCDIR.tests/idmap}/idmap${n}.c += src/adt
+.endfor
+
+.for n in ${TEST.tests/idmap:T:R:C/^idmap//}
+test:: ${TEST_OUTDIR.tests/idmap}/res${n}
+SRC += ${TEST_SRCDIR.tests/idmap}/idmap${n}.c
+CFLAGS.${TEST_SRCDIR.tests/idmap}/idmap${n}.c += -UNDEBUG -D_DEFAULT_SOURCE -std=c99
+${TEST_OUTDIR.tests/idmap}/run${n}: ${TEST_OUTDIR.tests/idmap}/idmap${n}.o ${BUILD}/lib/adt.o
+	${CC} ${CFLAGS} ${CFLAGS.${TEST_SRCDIR.tests/idmap}/idmap${n}.c} -o ${TEST_OUTDIR.tests/idmap}/run${n} ${TEST_OUTDIR.tests/idmap}/idmap${n}.o ${BUILD}/lib/adt.o
+${TEST_OUTDIR.tests/idmap}/res${n}: ${TEST_OUTDIR.tests/idmap}/run${n}
+	( ${TEST_OUTDIR.tests/idmap}/run${n} 1>&2 && echo PASS || echo FAIL ) > ${TEST_OUTDIR.tests/idmap}/res${n}
+.endfor
diff --git a/tests/idmap/idmap_basic.c b/tests/idmap/idmap_basic.c
new file mode 100644
index 000000000..c7a18856b
--- /dev/null
+++ b/tests/idmap/idmap_basic.c
@@ -0,0 +1,137 @@
+/*
+ * Copyright 2021 Scott Vokes
+ *
+ * See LICENCE for the full copyright terms.
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <adt/idmap.h>
+
+#define DEF_LIMIT 10
+#define DEF_SEED 0
+
+/* Thes numbers were chose to get a reasonable variety,
+ * but also some duplicated values as the input grows. */
+#define MAX_GEN_VALUES 23
+#define ID_MASK ((1 << 9) - 1)
+#define VALUE_MASK ((1 << 10) - 1)
+
+static int
+dump_cb(fsm_state_t state_id, unsigned value, void *opaque)
+{
+	/* fprintf(stderr, " -- state %d, value %u\n", state_id, value); */
+	assert(state_id <= ID_MASK);
+	assert(value <= VALUE_MASK);
+	(void)opaque;
+	return 1;
+}
+
+static int
+cmp_u(const void *pa, const void *pb)
+{
+	const unsigned a = *(unsigned *)pa;
+	const unsigned b = *(unsigned *)pb;
+	return a < b ? -1 : a > b ? 1 : 0;
+}
+
+int main(int argc, char **argv) {
+	const size_t limit = (argc > 1 ? atoi(argv[1]) : DEF_LIMIT);
+	const unsigned seed = (argc > 2 ? atoi(argv[2]) : DEF_SEED);
+
+	(void)argc;
+	(void)argv;
+	struct idmap *m = idmap_new(NULL);
+
+	srandom(seed);
+
+	/* Fill the table with random data */
+	for (size_t id_i = 0; id_i < limit; id_i++) {
+		const fsm_state_t id = (fsm_state_t)(random() & ID_MASK);
+		const size_t value_count = random() % MAX_GEN_VALUES;
+
+		for (size_t v_i = 0; v_i < value_count; v_i++) {
+			const unsigned v = random() & VALUE_MASK;
+			if (!idmap_set(m, id, v)) {
+				assert(!"failed to set");
+			}
+		}
+	}
+
+	idmap_iter(m, dump_cb, NULL);
+
+	srandom(seed);
+
+	size_t got_buf_ceil = MAX_GEN_VALUES;
+	unsigned *got_buf = malloc(got_buf_ceil * sizeof(got_buf[0]));
+	assert(got_buf != NULL);
+
+	/* Reset the PRNG and read back the same data. */
+	for (size_t id_i = 0; id_i < limit; id_i++) {
+		const fsm_state_t id = (fsm_state_t)(random() & ID_MASK);
+		const size_t generated_value_count = random() % MAX_GEN_VALUES;
+
+		/* Note: This can occasionally differ from
+		 * generated_value_count, because the same id or values
+		 * may have been generated more than once. As long as
+		 * all the values match, it's fine. */
+		const size_t value_count = idmap_get_value_count(m, id);
+
+		if (value_count > got_buf_ceil) {
+			size_t nceil = got_buf_ceil;
+			while (nceil <= value_count) {
+				nceil *= 2;
+			}
+			free(got_buf);
+			got_buf = malloc(nceil * sizeof(got_buf[0]));
+			assert(got_buf != NULL);
+			got_buf_ceil = nceil;
+		}
+
+		size_t written;
+		if (!idmap_get(m, id,
+			got_buf_ceil * sizeof(got_buf[0]), got_buf,
+			&written)) {
+			assert(!"failed to get");
+		}
+		assert(written == value_count);
+
+		unsigned gen_buf[MAX_GEN_VALUES];
+
+		for (size_t v_i = 0; v_i < generated_value_count; v_i++) {
+			const unsigned v = random() & VALUE_MASK;
+			gen_buf[v_i] = v;
+		}
+		qsort(gen_buf, generated_value_count, sizeof(gen_buf[0]), cmp_u);
+
+		/* Every generated value should appear in the buffer.
+		 * There may be more in the buffer; ignore them. */
+		size_t v_i = 0;
+		for (size_t gen_i = 0; gen_i < generated_value_count; gen_i++) {
+			int found = 0;
+			const unsigned gv = gen_buf[gen_i];
+			assert(value_count <= got_buf_ceil);
+			/* got_buf should be sorted, so we can pick up where we left off */
+			while (v_i < value_count) {
+				if (gv == got_buf[v_i]) {
+					/* Intentionally don't increment v_i on match,
+					 * because gen_buf can repeat values. */
+					found = 1;
+					break;
+				}
+				v_i++;
+			}
+			if (!found) {
+				fprintf(stderr, "NOT FOUND: state %d -- value: %u\n",
+				    id, gv);
+				return EXIT_FAILURE;
+			}
+		}
+	}
+
+	free(got_buf);
+	idmap_free(m);
+	return EXIT_SUCCESS;
+}
diff --git a/tests/ir/Makefile b/tests/ir/Makefile
index 0009c45ec..566d1add8 100755
--- a/tests/ir/Makefile
+++ b/tests/ir/Makefile
@@ -9,7 +9,7 @@ RE=${BUILD}/bin/re
 .for n in ${TEST.tests/ir:T:Mout*.json:R:C/^out//}
 
 ${TEST_OUTDIR.tests/ir}/got${n}.json: ${TEST_SRCDIR.tests/ir}/in${n}.re
-	${RE} -pl irjson -y ${.ALLSRC:M*.re} \
+	${RE} -FC -pl irjson -y ${.ALLSRC:M*.re} \
 	> $@
 
 ${TEST_OUTDIR.tests/ir}/res${n}: \
diff --git a/tests/minimise/minimise_test_case_list.c b/tests/minimise/minimise_test_case_list.c
index 1386f0dcc..7299d2fa1 100644
--- a/tests/minimise/minimise_test_case_list.c
+++ b/tests/minimise/minimise_test_case_list.c
@@ -22,7 +22,6 @@ const char *test_cases[] = {
 	"(?:a+|b)a+",
 	"(?:a*ba)+",
 	"(?:a|cd)+e?x",
-	"-> 1 'a';",
 	"(?:abc|def)+",
 	"(?:abc|def)*",
 	"(?:b|a*)",
@@ -81,7 +80,7 @@ check_minimisation(const char *pattern)
 		.offset = 0
 	};
 
-	fsm = re_comp(RE_PCRE, scanner_next, &s, &opt, RE_MULTI, &err);
+	fsm = re_comp(RE_PCRE, scanner_next, &s, &opt, RE_MULTI | RE_NOCAPTURE, &err);
 	assert(fsm != NULL);
 	if (!fsm_determinise(fsm)) {
 		return 0;
diff --git a/tests/native/Makefile b/tests/native/Makefile
index 8712e1588..fbca0ca69 100755
--- a/tests/native/Makefile
+++ b/tests/native/Makefile
@@ -9,11 +9,11 @@ RE=${BUILD}/bin/re
 .for n in ${TEST.tests/native:T:Mout*.fsm:R:C/^out//}
 
 ${TEST_OUTDIR.tests/native}/got${n}.fsm: ${TEST_SRCDIR.tests/native}/in${n}.re
-	${RE} -r native -py ${.ALLSRC:M*.re} \
+	${RE} -FC -r native -py ${.ALLSRC:M*.re} \
 	> $@
 
 ${TEST_OUTDIR.tests/native}/nfa${n}.fsm: ${TEST_SRCDIR.tests/native}/in${n}.re
-	${RE} -r native -n -py ${.ALLSRC:M*.re} \
+	${RE} -FC -r native -n -py ${.ALLSRC:M*.re} \
 	> $@
 
 ${TEST_OUTDIR.tests/native}/res${n}: \
@@ -27,7 +27,7 @@ FSMTEST_RESULT += ${TEST_OUTDIR.tests/native}/res${n}
 .for n in ${TEST.tests/native:T:Mout*.err:R:C/^out//}
 
 ${TEST_OUTDIR.tests/native}/got${n}.err: ${TEST_SRCDIR.tests/native}/in${n}.re
-	${RE} -r native -py ${.ALLSRC:M*.re} \
+	${RE} -FC -r native -py ${.ALLSRC:M*.re} \
 	2> $@; [ $$? -ne 0 ]
 
 ${TEST_OUTDIR.tests/native}/res${n}: \
diff --git a/tests/pcre-anchor/Makefile b/tests/pcre-anchor/Makefile
index bb9954554..1dc4a77bc 100644
--- a/tests/pcre-anchor/Makefile
+++ b/tests/pcre-anchor/Makefile
@@ -9,11 +9,11 @@ RE=${BUILD}/bin/re
 .for n in ${TEST.tests/pcre-anchor:T:Mout*.fsm:R:C/^out//}
 
 ${TEST_OUTDIR.tests/pcre-anchor}/got${n}.fsm: ${TEST_SRCDIR.tests/pcre-anchor}/in${n}.re
-	${RE} -r pcre -py ${.ALLSRC:M*.re} \
+	${RE} -FC -r pcre -py ${.ALLSRC:M*.re} \
 	> $@
 
 ${TEST_OUTDIR.tests/pcre-anchor}/nfa${n}.fsm: ${TEST_SRCDIR.tests/pcre-anchor}/in${n}.re
-	${RE} -r pcre -n -py ${.ALLSRC:M*.re} \
+	${RE} -FC -r pcre -n -py ${.ALLSRC:M*.re} \
 	> $@
 
 ${TEST_OUTDIR.tests/pcre-anchor}/res${n}: \
diff --git a/tests/pcre-anchor/in81.re b/tests/pcre-anchor/in81.re
new file mode 100644
index 000000000..8b5fad7c3
--- /dev/null
+++ b/tests/pcre-anchor/in81.re
@@ -0,0 +1 @@
+($x)*
\ No newline at end of file
diff --git a/tests/pcre-anchor/out81.fsm b/tests/pcre-anchor/out81.fsm
new file mode 100644
index 000000000..2cdc2f023
--- /dev/null
+++ b/tests/pcre-anchor/out81.fsm
@@ -0,0 +1,5 @@
+0 -> 0 ?;
+0 -> 1 "\n";
+
+start: 0;
+end: 0, 1;
\ No newline at end of file
diff --git a/tests/pcre-classes/Makefile b/tests/pcre-classes/Makefile
index 0d9809d76..0d459e256 100755
--- a/tests/pcre-classes/Makefile
+++ b/tests/pcre-classes/Makefile
@@ -16,7 +16,7 @@ RE=${BUILD}/bin/re
 FSM=${BUILD}/bin/fsm
 
 ${TEST_OUTDIR.tests/pcre-classes}/dot-all.fsm:
-	${RE} -r pcre -p '^[\x00-\xff]$$' | ${FSM} -pm \
+	${RE} -FC -r pcre -p '^[\x00-\xff]$$' | ${FSM} -pm \
 	> $@
 
 # compl<N>.re tests
@@ -32,7 +32,7 @@ ${TEST_OUTDIR.tests/pcre-classes}/dot-all.fsm:
 .for n in ${TEST.tests/pcre-classes:M*/compl*.re:T:R:C/^compl//}
 
 ${TEST_OUTDIR.tests/pcre-classes}/got${n}.fsm: ${TEST_SRCDIR.tests/pcre-classes}/in${n}.re
-	${RE} -r pcre -py ${.ALLSRC:M*/in*.re} | ${FSM} -pm \
+	${RE} -FC -r pcre -py ${.ALLSRC:M*/in*.re} | ${FSM} -pm \
 	> $@
 
 ${TEST_OUTDIR.tests/pcre-classes}/got-compl${n}.fsm: ${TEST_OUTDIR.tests/pcre-classes}/got${n}.fsm ${TEST_OUTDIR.tests/pcre-classes}/dot-all.fsm
@@ -40,7 +40,7 @@ ${TEST_OUTDIR.tests/pcre-classes}/got-compl${n}.fsm: ${TEST_OUTDIR.tests/pcre-cl
 	> $@
 
 ${TEST_OUTDIR.tests/pcre-classes}/expect-compl${n}.fsm: ${TEST_SRCDIR.tests/pcre-classes}/compl${n}.re
-	${RE} -r pcre -py ${.ALLSRC:M*/compl*.re} | ${FSM} -pm \
+	${RE} -FC -r pcre -py ${.ALLSRC:M*/compl*.re} | ${FSM} -pm \
 	> $@
 
 ${TEST_OUTDIR.tests/pcre-classes}/res${n}: \
@@ -66,11 +66,11 @@ FSMTEST_RESULT += ${TEST_OUTDIR.tests/pcre-classes}/res${n}
 @echo x: ${n}
 
 ${TEST_OUTDIR.tests/pcre-classes}/got${n}.fsm: ${TEST_SRCDIR.tests/pcre-classes}/in${n}.re
-	${RE} -r pcre -py ${.ALLSRC:M*/in*.re} | ${FSM} -pm \
+	${RE} -FC -r pcre -py ${.ALLSRC:M*/in*.re} | ${FSM} -pm \
 	> $@
 
 ${TEST_OUTDIR.tests/pcre-classes}/out${n}.fsm: ${TEST_SRCDIR.tests/pcre-classes}/equal${n}.re
-	${RE} -r pcre -py ${.ALLSRC:M*/equal*.re} | ${FSM} -pm \
+	${RE} -FC -r pcre -py ${.ALLSRC:M*/equal*.re} | ${FSM} -pm \
 	> $@
 
 ${TEST_OUTDIR.tests/pcre-classes}/res${n}: \
diff --git a/tests/pcre-flags/Makefile b/tests/pcre-flags/Makefile
index 67e70fbbb..308571395 100755
--- a/tests/pcre-flags/Makefile
+++ b/tests/pcre-flags/Makefile
@@ -13,17 +13,17 @@ RE=${BUILD}/bin/re
 TEST_OUTDIR.tests/pcre-flags/mode${n} != cat ${TEST_SRCDIR.tests/pcre-flags}/mode${n}
 
 ${TEST_OUTDIR.tests/pcre-flags}/got${n}.fsm: ${TEST_SRCDIR.tests/pcre-flags}/in${n}.re
-	${RE} -F "${TEST_OUTDIR.tests/pcre-flags/mode${n}}" -b -r pcre -py ${.ALLSRC:M*.re} \
+	${RE} -FC -F "${TEST_OUTDIR.tests/pcre-flags/mode${n}}" -b -r pcre -py ${.ALLSRC:M*.re} \
 	> $@
 
 ${TEST_OUTDIR.tests/pcre-flags}/nfa${n}.fsm: ${TEST_SRCDIR.tests/pcre-flags}/in${n}.re
-	${RE} -F "${TEST_OUTDIR.tests/pcre-flags/mode${n}}" -b -r pcre -n -py ${.ALLSRC:M*.re} \
+	${RE} -FC -F "${TEST_OUTDIR.tests/pcre-flags/mode${n}}" -b -r pcre -n -py ${.ALLSRC:M*.re} \
 	> $@
 
 .else
 
 ${TEST_OUTDIR.tests/pcre-flags}/got${n}.fsm: ${TEST_SRCDIR.tests/pcre-flags}/in${n}.re
-	${RE} -b -r pcre -py ${.ALLSRC:M*.re} \
+	${RE} -FC -b -r pcre -py ${.ALLSRC:M*.re} \
 	> $@
 
 ${TEST_OUTDIR.tests/pcre-flags}/nfa${n}.fsm: ${TEST_SRCDIR.tests/pcre-flags}/in${n}.re
diff --git a/tests/pcre-repeat/Makefile b/tests/pcre-repeat/Makefile
index c325d2f8e..97535b2c3 100755
--- a/tests/pcre-repeat/Makefile
+++ b/tests/pcre-repeat/Makefile
@@ -12,11 +12,11 @@ RE=${BUILD}/bin/re
 .for n in ${TEST.tests/pcre-repeat:T:Mout*.fsm:R:C/^out//}
 
 ${TEST_OUTDIR.tests/pcre-repeat}/got${n}.fsm: ${TEST_SRCDIR.tests/pcre-repeat}/in${n}.re
-	${RE} -r pcre -py ${.ALLSRC:M*.re} \
+	${RE} -FC -r pcre -py ${.ALLSRC:M*.re} \
 	> $@
 
 ${TEST_OUTDIR.tests/pcre-repeat}/nfa${n}.fsm: ${TEST_SRCDIR.tests/pcre-repeat}/in${n}.re
-	${RE} -r pcre -n -py ${.ALLSRC:M*.re} \
+	${RE} -FC -r pcre -n -py ${.ALLSRC:M*.re} \
 	> $@
 
 ${TEST_OUTDIR.tests/pcre-repeat}/res${n}: \
@@ -30,7 +30,7 @@ FSMTEST_RESULT += ${TEST_OUTDIR.tests/pcre-repeat}/res${n}
 .for n in ${TEST.tests/pcre-repeat:T:Mout*.err:R:C/^out//}
 
 ${TEST_OUTDIR.tests/pcre-repeat}/got${n}.err: ${TEST_SRCDIR.tests/pcre-repeat}/in${n}.re
-	${RE} -r pcre -py ${.ALLSRC:M*.re} \
+	${RE} -FC -r pcre -py ${.ALLSRC:M*.re} \
 	2> $@; [ $$? -ne 0 ]
 
 ${TEST_OUTDIR.tests/pcre-repeat}/res${n}: \
diff --git a/tests/pcre/Makefile b/tests/pcre/Makefile
index 23f879a04..239d2c93b 100755
--- a/tests/pcre/Makefile
+++ b/tests/pcre/Makefile
@@ -21,7 +21,7 @@ PCREGREP ?= pcregrep
 #    bit of a hack.
 # 2) removes any trailing \n at the end of the input
 ${TEST_OUTDIR.tests/pcre-pcregrep}/in${n}.txt: ${TEST_SRCDIR.tests/pcre}/in${n}.re
-	${RE} -mr pcre -y ${.ALLSRC:M*.re} \
+	${RE} -FC -mr pcre -y ${.ALLSRC:M*.re} \
 	| perl -0pe 's/\\x([0-9a-zA-z]{2})/chr(hex($$1))/ge;' -e 's/\n\Z//' \
 	> $@
 
@@ -41,16 +41,16 @@ test:: ${TEST_OUTDIR.tests/pcre-pcregrep}/res${n}
 .if exists(${TEST_SRCDIR.tests/pcre}/mode${n})
 TEST_OUTDIR.tests/pcre/mode${n} != cat ${TEST_SRCDIR.tests/pcre}/mode$n
 ${TEST_OUTDIR.tests/pcre}/got${n}.fsm: ${TEST_SRCDIR.tests/pcre}/in${n}.re
-	${RE} -F "${TEST_OUTDIR.tests/pcre/mode${n}}" -r pcre -py ${.ALLSRC:M*.re} \
+	${RE} -FC -F "${TEST_OUTDIR.tests/pcre/mode${n}}" -r pcre -py ${.ALLSRC:M*.re} \
 	> $@
 .else
 ${TEST_OUTDIR.tests/pcre}/got${n}.fsm: ${TEST_SRCDIR.tests/pcre}/in${n}.re
-	${RE} -r pcre -py ${.ALLSRC:M*.re} \
+	${RE} -FC -r pcre -py ${.ALLSRC:M*.re} \
 	> $@
 .endif
 
 ${TEST_OUTDIR.tests/pcre}/nfa${n}.fsm: ${TEST_SRCDIR.tests/pcre}/in${n}.re
-	${RE} -r pcre -n -py ${.ALLSRC:M*.re} \
+	${RE} -FC -r pcre -n -py ${.ALLSRC:M*.re} \
 	> $@
 
 ${TEST_OUTDIR.tests/pcre}/res${n}: \
@@ -64,7 +64,7 @@ FSMTEST_RESULT += ${TEST_OUTDIR.tests/pcre}/res${n}
 .for n in ${TEST.tests/pcre:T:Mout*.err:R:C/^out//}
 
 ${TEST_OUTDIR.tests/pcre}/got${n}.err: ${TEST_SRCDIR.tests/pcre}/in${n}.re
-	${RE} -r pcre -py ${.ALLSRC:M*.re} \
+	${RE} -FC -r pcre -py ${.ALLSRC:M*.re} \
 	2> $@; [ $$? -ne 0 ]
 
 ${TEST_OUTDIR.tests/pcre}/res${n}: \
diff --git a/tests/re_literal/Makefile b/tests/re_literal/Makefile
index 1d1333491..9227a5699 100755
--- a/tests/re_literal/Makefile
+++ b/tests/re_literal/Makefile
@@ -9,7 +9,7 @@ RE=${BUILD}/bin/re
 .for n in ${TEST.tests/re_literal:T:Mout*.txt:R:C/^out//}
 
 ${TEST_OUTDIR.tests/re_literal}/got${n}.txt: ${TEST_SRCDIR.tests/re_literal}/in${n}.re
-	( ${RE} -r pcre -t -y ${.ALLSRC:M*.re} || echo non-literal ) \
+	( ${RE} -FC -r pcre -t -y ${.ALLSRC:M*.re} || echo non-literal ) \
 	> $@
 
 ${TEST_OUTDIR.tests/re_literal}/res${n}: \
diff --git a/theft/Makefile b/theft/Makefile
index 0d38d8cfc..921c482a9 100644
--- a/theft/Makefile
+++ b/theft/Makefile
@@ -6,7 +6,6 @@ SRC += theft/util.c
 SRC += theft/wrap.c
 
 SRC += theft/fuzz_adt_edge_set.c
-SRC += theft/fuzz_adt_ipriq.c
 SRC += theft/fuzz_adt_priq.c
 SRC += theft/fuzz_capture_string_set.c
 SRC += theft/fuzz_literals.c
diff --git a/theft/fuzz_adt_ipriq.c b/theft/fuzz_adt_ipriq.c
deleted file mode 100644
index 1847ef6ce..000000000
--- a/theft/fuzz_adt_ipriq.c
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
- * Copyright 2021 Scott Vokes
- *
- * See LICENCE for the full copyright terms.
- */
-
-#include "type_info_adt_ipriq.h"
-
-#include <adt/ipriq.h>
-#include <adt/xalloc.h>
-
-struct model {
-	size_t used;
-	size_t entries[];
-};
-
-static enum ipriq_cmp_res
-cmp_size_t(size_t a, size_t b, void *opaque)
-{
-	(void)opaque;
-	return a < b ? IPRIQ_CMP_LT :
-	    a > b ? IPRIQ_CMP_GT : IPRIQ_CMP_EQ;
-}
-
-static int exec_add(size_t x, struct model *m, struct ipriq *pq)
-{
-	if (!ipriq_add(pq, x)) {
-		return 0;
-	}
-
-	m->entries[m->used] = x;
-	m->used++;
-	return 1;
-}
-
-static int find_min_pos(const struct model *m, size_t *pos)
-{
-	size_t i;
-	if (m->used == 0) {
-		return 0;
-	}
-
-	size_t res, min;
-	res = 0;
-	min = m->entries[0];
-
-	for (i = 1; i < m->used; i++) {
-		if (m->entries[i] < min) {
-			res = i;
-			min = m->entries[i];
-		}
-	}
-	*pos = res;
-	return 1;
-}
-
-static int exec_peek(struct model *m, struct ipriq *pq)
-{
-	size_t res;
-
-	if (!ipriq_peek(pq, &res)) {
-		return m->used == 0;
-	}
-
-	size_t pos;
-	if (!find_min_pos(m, &pos)) {
-		assert(!"unreachable (peek)");
-	}
-
-	return res == m->entries[pos];
-}
-
-static int exec_pop(struct model *m, struct ipriq *pq)
-{
-	size_t res;
-
-	if (!ipriq_pop(pq, &res)) {
-		return m->used == 0;
-	}
-
-	size_t pos;
-	if (!find_min_pos(m, &pos)) {
-		assert(!"unreachable (pop)");
-	}
-
-	if (res != m->entries[pos]) {
-		return 0;
-	}
-
-	assert(m->used > 0);
-	if (pos < m->used - 1) {
-		m->entries[pos] = m->entries[m->used - 1];
-	}
-	m->used--;
-	return 1;
-}
-
-static enum theft_trial_res
-compare_against_model(const struct ipriq_scenario *scen)
-{
-	enum theft_trial_res res = THEFT_TRIAL_FAIL;
-	size_t i;
-
-	struct model *m = malloc(sizeof(*m)
-	    + scen->count * sizeof(m->entries[0]));
-	if (m == NULL) {
-		return THEFT_TRIAL_ERROR;
-	}
-	m->used = 0;
-
-	struct ipriq *pq = ipriq_new(NULL, cmp_size_t, NULL);
-	if (pq == NULL) {
-		return THEFT_TRIAL_ERROR;
-	}
-
-	for (i = 0; i < scen->count; i++) {
-		const struct ipriq_op *op = &scen->ops[i];
-
-		switch (op->t) {
-		case IPRIQ_OP_ADD:
-			if (!exec_add(op->u.add.x, m, pq)) {
-				goto cleanup;
-			}
-			break;
-
-		case IPRIQ_OP_PEEK:
-			if (!exec_peek(m, pq)) {
-				goto cleanup;
-			}
-			break;
-
-		case IPRIQ_OP_POP:
-			if (!exec_pop(m, pq)) {
-				goto cleanup;
-			}
-			break;
-
-		default:
-			assert(false); break;
-		}
-	}
-
-	res = THEFT_TRIAL_PASS;
-
-cleanup:
-	free(m);
-
-	return res;
-}
-
-static enum theft_trial_res
-prop_ipriq_model(struct theft *t, void *arg1)
-{
-	const struct ipriq_scenario *scen = arg1;
-	(void)t;
-	return compare_against_model(scen);
-}
-
-static bool
-test_ipriq(theft_seed seed, uintptr_t limit)
-{
-	enum theft_run_res res;
-
-	struct ipriq_hook_env env = {
-		.tag = 'I',
-		.limit = limit,
-	};
-
-	struct theft_run_config config = {
-		.name = __func__,
-		.prop1 = prop_ipriq_model,
-		.type_info = { &type_info_adt_ipriq },
-		.trials = 1000,
-		.hooks = {
-			.trial_pre  = theft_hook_first_fail_halt,
-			.env = &env,
-		},
-		.fork = {
-			.enable = true,
-		},
-
-		.seed = seed,
-	};
-
-	(void)limit;
-
-	res = theft_run(&config);
-	printf("%s: %s\n", __func__, theft_run_res_str(res));
-
-	return res == THEFT_RUN_PASS;
-}
-
-void
-register_test_adt_ipriq(void)
-{
-	reg_test1("adt_ipriq", test_ipriq, 10000);
-}
diff --git a/theft/fuzz_capture_string_set.c b/theft/fuzz_capture_string_set.c
index 7326356c2..f225bb326 100644
--- a/theft/fuzz_capture_string_set.c
+++ b/theft/fuzz_capture_string_set.c
@@ -158,7 +158,7 @@ check_capstring_set(struct capture_env *env,
 			return THEFT_TRIAL_ERROR;
 		}
 
-		const size_t capture_count = fsm_countcaptures(dfa);
+		const size_t capture_count = fsm_capture_ceiling(dfa);
 
 		if (verbosity > 2) {
 			fprintf(stderr, "==== cs '%s'\n", cs->string);
@@ -172,7 +172,7 @@ check_capstring_set(struct capture_env *env,
 		assert(cp != NULL);
 		fsm_copies[cs_i] = cp;
 
-		const size_t cp_capture_count = fsm_countcaptures(cp);
+		const size_t cp_capture_count = fsm_capture_ceiling(cp);
 		if (verbosity > 2) {
 			fprintf(stderr, "==== min(det(cp))\n");
 			fsm_print_fsm(stderr, cp);
@@ -196,7 +196,7 @@ check_capstring_set(struct capture_env *env,
 		return THEFT_TRIAL_FAIL;
 	}
 
-	combined_capture_count = fsm_countcaptures(combined);
+	combined_capture_count = fsm_capture_ceiling(combined);
 	for (size_t cs_i = 0; cs_i < css->count; cs_i++) {
 		total_captures += capture_counts[cs_i];
 	}
@@ -295,7 +295,7 @@ check_fsms_for_single_input(struct check_env *env, struct fsm_capture *captures,
 	assert(exec_res >= 0);
 	if (exec_res == 1) {
 		if (LOG_LEVEL > 0) {
-			const size_t combined_capture_count = fsm_countcaptures(env->combined);
+			const size_t combined_capture_count = fsm_capture_ceiling(env->combined);
 			for (size_t i = 0; i < combined_capture_count; i++) {
 				fprintf(stderr, "capture[%zu/%zu]: (%ld, %ld)\n",
 				    i, combined_capture_count,
@@ -415,7 +415,7 @@ compare_captures(const struct check_env *env,
     const struct fsm_capture *captures_combined,
     size_t nth_fsm, const struct fsm_capture *captures)
 {
-	const size_t combined_capture_count = fsm_countcaptures(env->combined);
+	const size_t combined_capture_count = fsm_capture_ceiling(env->combined);
 	if (combined_capture_count == 0) {
 		return true;	/* no captures */
 	}
@@ -639,7 +639,7 @@ build_capstring_dfa(const struct capstring *cs, uint8_t end_id)
 		goto cleanup;
 	}
 
-	if (fsm_countcaptures(fsm) != cs->capture_count) {
+	if (fsm_capture_ceiling(fsm) != cs->capture_count) {
 		goto cleanup;
 	}