Skip to content

Commit

Permalink
fix: bug while matching regexp that contain long jumps.
Browse files Browse the repository at this point in the history
  • Loading branch information
plusvic committed Mar 10, 2024
1 parent ede8b9b commit 3da6fc8
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 19 deletions.
21 changes: 2 additions & 19 deletions lib/src/scanner/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -695,7 +695,6 @@ impl ScanContext<'_> {
}
| SubPattern::RegexpChainTail { chained_to, gap, flags, .. } => {
if self.within_valid_distance(
sub_pattern_id,
*chained_to,
match_.range.start,
gap,
Expand Down Expand Up @@ -729,32 +728,16 @@ impl ScanContext<'_> {

fn within_valid_distance(
&mut self,
sub_pattern_id: SubPatternId,
chained_to: SubPatternId,
match_start: usize,
gap: &RangeInclusive<u32>,
) -> bool {
// The lowest possible offset where the current sub-pattern can match
// is the offset of the first unconfirmed match, or the offset of the
// current match if no previous unconfirmed match exists.
let lowest_offset = self
.unconfirmed_matches
.get(&sub_pattern_id)
.and_then(|unconfirmed_matches| unconfirmed_matches.front())
.map_or(match_start, |first_match| first_match.range.start);

if let Some(unconfirmed_matches) =
self.unconfirmed_matches.get_mut(&chained_to)
{
let min_gap = *gap.start() as usize;
let max_gap = *gap.end() as usize;

// Retain the unconfirmed matches that can possibly match, but
// discard those that are so far away from the current match that
// there's no possibility for them to match.
unconfirmed_matches
.retain(|m| m.range.end + max_gap >= lowest_offset);

for m in unconfirmed_matches {
let valid_range =
m.range.end + min_gap..=m.range.end + max_gap;
Expand Down Expand Up @@ -797,9 +780,9 @@ impl ScanContext<'_> {
match &self.compiled_rules.get_sub_pattern(id).1 {
SubPattern::LiteralChainHead { flags, .. }
| SubPattern::RegexpChainHead { flags, .. } => {
// The chain head is reached and we know the range where
// The chain head is reached, and we know the range where
// the tail matches. This indicates that the whole chain is
// valid and we have a full match.
// valid, and we have a full match.
if let Some(tail_match_range) = &tail_match_range {
self.track_pattern_match(
pattern_id,
Expand Down
5 changes: 5 additions & 0 deletions lib/src/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1644,6 +1644,11 @@ fn hex_large_jumps() {
JUMPS_DATA.as_bytes()
);

pattern_true!(
"{ 61 61 61 61 [0-0x19c] 63 [0-0x13f] 64 64 64 64 }",
JUMPS_DATA.as_bytes()
);

rule_true!(
r#"rule test {
strings:
Expand Down

0 comments on commit 3da6fc8

Please sign in to comment.