Skip to content

Commit

Permalink
fix: bug with consecutive repeats.
Browse files Browse the repository at this point in the history
  • Loading branch information
plusvic committed Oct 7, 2024
1 parent 3d35671 commit 85382b4
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 16 deletions.
20 changes: 13 additions & 7 deletions lib/src/re/thompson/pikevm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ pub(crate) fn epsilon_closure(

let is_word_char = |c: u8| c == b'_' || c.is_ascii_alphanumeric();

while let Some(mut ts) = state.threads.pop() {
while let Some(ts) = state.threads.pop() {
let (instr, instr_size) =
InstrParser::decode_instr(unsafe { code.get_unchecked(ts.ip..) });
match instr {
Expand Down Expand Up @@ -382,12 +382,15 @@ pub(crate) fn epsilon_closure(
state.threads.push(ts.ip_offset(instr_size.into()));
}
Instr::RepeatGreedyEnd { offset, min, max } => {
ts.rep_count += 1;
if ts.rep_count >= min {
state.threads.push(ts.ip_offset(instr_size.into()));
let mut ts = ts.ip_offset(instr_size.into());
ts.rep_count = 0;
state.threads.push(ts);
}
if ts.rep_count < max {
state.threads.push(ts.ip_offset(offset));
let mut ts = ts.ip_offset(offset);
ts.rep_count += 1;
state.threads.push(ts);
}
}
Instr::RepeatNonGreedyStart { offset, min } => {
Expand All @@ -397,12 +400,15 @@ pub(crate) fn epsilon_closure(
}
}
Instr::RepeatNonGreedyEnd { offset, min, max } => {
ts.rep_count += 1;
if ts.rep_count < max {
state.threads.push(ts.ip_offset(offset));
let mut ts = ts.ip_offset(offset);
ts.rep_count += 1;
state.threads.push(ts);
}
if ts.rep_count >= min {
state.threads.push(ts.ip_offset(instr_size.into()));
let mut ts = ts.ip_offset(instr_size.into());
ts.rep_count = 0;
state.threads.push(ts);
}
}
Instr::Jump(offset) => {
Expand Down
23 changes: 14 additions & 9 deletions lib/src/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1254,7 +1254,7 @@ fn regexp_patterns_2() {
}

#[test]
fn regexp_patterns_2a() {
fn regexp_patterns_3() {
pattern_match!(r#"/.b{15}/"#, b"abbbbbbbbbbbbbbb", b"abbbbbbbbbbbbbbb");
pattern_match!(
r#"/.b{15,16}/"#,
Expand Down Expand Up @@ -1286,6 +1286,16 @@ fn regexp_patterns_2a() {
b"aabbbbbbbbbbbbbbbcccc",
b"aabbbbbbbbbbbbbbbcccc"
);
pattern_match!(
r#"/abcd.{0,11}efgh.{0,11}ijk/"#,
b"abcd123456789ABefgh123456789ABijk",
b"abcd123456789ABefgh123456789ABijk"
);
pattern_match!(
r#"/abcd.{0,11}?efgh.{0,11}?ijk/"#,
b"abcd123456789ABefgh123456789ABijk",
b"abcd123456789ABefgh123456789ABijk"
);
pattern_match!(r#"/ab{2,15}c/"#, b"abbbc", b"abbbc");
pattern_match!(r#"/ab{2,15}?c/"#, b"abbbc", b"abbbc");
pattern_match!(r#"/ab{0,15}?c/"#, b"abc", b"abc");
Expand Down Expand Up @@ -1348,7 +1358,7 @@ fn regexp_patterns_2a() {
}

#[test]
fn regexp_patterns_3() {
fn regexp_patterns_4() {
pattern_match!(r#"/a[bx]c/"#, b"abc", b"abc");
pattern_match!(r#"/a[bx]c/"#, b"axc", b"axc");
pattern_match!(r#"/a[0-9]*b/"#, b"ab", b"ab");
Expand Down Expand Up @@ -1469,7 +1479,7 @@ fn regexp_patterns_3() {
}

#[test]
fn regexp_patterns_4() {
fn regexp_patterns_5() {
pattern_match!(r"/\\/", b"\\", b"\\");
pattern_match!(r"/\babc/", b"abc", b"abc");
pattern_match!(r"/abc\b/", b"abc", b"abc");
Expand Down Expand Up @@ -1539,7 +1549,7 @@ fn regexp_patterns_4() {
}

#[test]
fn regexp_patterns_5() {
fn regexp_patterns_6() {
rule_true!(
r#"rule test {
strings:
Expand Down Expand Up @@ -3428,8 +3438,3 @@ fn test_defined_3() {
condition_true!(r#"not test_proto3.bool_undef"#);
condition_true!(r#"test_proto3.string_undef == """#);
}

#[test]
fn issue() {
pattern_match!(r#"/a.(bc.){2}/"#, b"aabcabca", b"aabcabca");
}

0 comments on commit 85382b4

Please sign in to comment.