Skip to content

Commit

Permalink
refactor: remove the LiteralAnchored sub-pattern.
Browse files Browse the repository at this point in the history
  • Loading branch information
plusvic committed Sep 7, 2023
1 parent e9acea6 commit 93252ee
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 38 deletions.
18 changes: 15 additions & 3 deletions yara-x/src/compiler/ir/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,8 @@ impl<'src> Pattern<'src> {
/// in order to indicate that the pattern (the `$a` pattern in this case)
/// can match only at a fixed offset.
pub fn anchor_at(&mut self, offset: usize) {
if let Pattern::Literal(p) = self {
match p.anchored_at {
match self {
Pattern::Literal(p) => match p.anchored_at {
Some(o) if o != offset => {
p.anchored_at = None;
p.flags.set(PatternFlags::NonAnchorable);
Expand All @@ -120,7 +120,19 @@ impl<'src> Pattern<'src> {
}
}
_ => {}
}
},
Pattern::Regexp(p) => match p.anchored_at {
Some(o) if o != offset => {
p.anchored_at = None;
p.flags.set(PatternFlags::NonAnchorable);
}
None => {
if !p.flags.contains(PatternFlags::NonAnchorable) {
p.anchored_at = Some(offset);
}
}
_ => {}
},
}
}

Expand Down
45 changes: 21 additions & 24 deletions yara-x/src/compiler/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -738,6 +738,7 @@ impl<'a> Compiler<'a> {
SubPattern::Literal {
pattern: pattern_lit_id,
flags: flags | SubPatternFlags::Nocase,
anchored_at: None,
},
best_atom.case_combinations(),
SubPatternAtom::from_atom,
Expand Down Expand Up @@ -827,14 +828,10 @@ impl<'a> Compiler<'a> {
}
} else {
self.add_sub_pattern(
if let Some(offset) = pattern.anchored_at {
SubPattern::LiteralAnchored {
pattern: pattern_lit_id,
anchored_at: offset,
flags,
}
} else {
SubPattern::Literal { pattern: pattern_lit_id, flags }
SubPattern::Literal {
pattern: pattern_lit_id,
anchored_at: pattern.anchored_at,
flags,
},
iter::once(best_atom),
SubPatternAtom::from_atom,
Expand Down Expand Up @@ -868,7 +865,11 @@ impl<'a> Compiler<'a> {
// /foo|bar|baz/
// { 01 02 03 }
// { (01 02 03 | 04 05 06 ) }
self.process_alternation_literal(head, pattern.flags);
self.process_alternation_literal(
head,
pattern.anchored_at,
pattern.flags,
);
return Ok(());
}

Expand Down Expand Up @@ -918,6 +919,7 @@ impl<'a> Compiler<'a> {
fn process_alternation_literal(
&mut self,
hir: re::hir::Hir,
anchored_at: Option<usize>,
flags: PatternFlagSet,
) {
let ascii = flags.contains(PatternFlags::Ascii);
Expand All @@ -944,24 +946,24 @@ impl<'a> Compiler<'a> {
self.lit_pool.get_bytes(pattern_lit_id).unwrap(),
);

let sp = SubPattern::Literal {
let flags =
if wide { flags | SubPatternFlags::Wide } else { flags };

let sub_pattern = SubPattern::Literal {
pattern: pattern_lit_id,
flags: if wide {
flags | SubPatternFlags::Wide
} else {
flags
},
anchored_at,
flags,
};

if case_insensitive {
self.add_sub_pattern(
sp,
sub_pattern,
best_atom.case_combinations(),
SubPatternAtom::from_atom,
);
} else {
self.add_sub_pattern(
sp,
sub_pattern,
iter::once(best_atom),
SubPatternAtom::from_atom,
);
Expand Down Expand Up @@ -1386,7 +1388,7 @@ impl<'a> Compiler<'a> {
// the Aho-Corasick automata. Instead their IDs are added to the
// sub_patterns_anchored_at_0 list, together with the offset they are
// anchored to.
if let SubPattern::LiteralAnchored { .. } = sub_pattern {
if let SubPattern::Literal { anchored_at: Some(_), .. } = sub_pattern {
self.anchored_sub_patterns.push(sub_pattern_id);
} else {
for atom in atoms {
Expand Down Expand Up @@ -1657,12 +1659,7 @@ bitmask! {
pub(crate) enum SubPattern {
Literal {
pattern: LiteralId,
flags: SubPatternFlagSet,
},

LiteralAnchored {
pattern: LiteralId,
anchored_at: usize,
anchored_at: Option<usize>,
flags: SubPatternFlagSet,
},

Expand Down
2 changes: 1 addition & 1 deletion yara-x/src/compiler/rules.rs
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ impl Rules {
info!("Number of rules: {}", self.rules.len());
info!("Number of patterns: {}", self.num_patterns);
info!(
"Number of patterns anchored sub-patterns: {}",
"Number of anchored sub-patterns: {}",
self.anchored_sub_patterns.len()
);
info!("Number of atoms: {}", self.atoms.len());
Expand Down
13 changes: 3 additions & 10 deletions yara-x/src/scanner/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -388,12 +388,6 @@ impl ScanContext<'_> {
let verification_start = Instant::now();

match sub_pattern {
// Anchored patterns should not be found by the Aho-Corasick
// automata because they are not added to the automata in
// the first place.
SubPattern::LiteralAnchored { .. } => unreachable!(
"anchored pattern found by the Aho-Corasick automata"
),
SubPattern::Literal { pattern, flags, .. }
| SubPattern::LiteralChainHead { pattern, flags, .. }
| SubPattern::LiteralChainTail { pattern, flags, .. } => {
Expand Down Expand Up @@ -558,10 +552,10 @@ impl ScanContext<'_> {
.map(|id| (id, self.compiled_rules.get_sub_pattern(*id)))
{
match sub_pattern {
SubPattern::LiteralAnchored {
SubPattern::Literal {
pattern,
flags,
anchored_at,
anchored_at: Some(offset),
..
} => {
if let Some(match_) = verify_literal_match(
Expand All @@ -570,7 +564,7 @@ impl ScanContext<'_> {
.get_bytes(*pattern)
.unwrap(),
self.scanned_data(),
*anchored_at,
*offset,
*flags,
) {
self.handle_sub_pattern_match(
Expand All @@ -595,7 +589,6 @@ impl ScanContext<'_> {
) {
match sub_pattern {
SubPattern::Literal { .. }
| SubPattern::LiteralAnchored { .. }
| SubPattern::Xor { .. }
| SubPattern::Base64 { .. }
| SubPattern::Base64Wide { .. }
Expand Down

0 comments on commit 93252ee

Please sign in to comment.