From 4978de4e90d9a65f173fa6bc69302ab7f5ac3623 Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Tue, 27 Feb 2024 15:59:53 +0100 Subject: [PATCH 1/3] chore: provide more details about error in `get_regexp` --- lib/src/compiler/rules.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/src/compiler/rules.rs b/lib/src/compiler/rules.rs index 9a5678ed9..174c50f36 100644 --- a/lib/src/compiler/rules.rs +++ b/lib/src/compiler/rules.rs @@ -206,7 +206,7 @@ impl Rules { self.rules.get(rule_id.0 as usize).unwrap() } - /// Returns an slice with the individual rules that were compiled. + /// Returns a slice with the individual rules that were compiled. #[inline] pub(crate) fn rules(&self) -> &[RuleInfo] { self.rules.as_slice() @@ -249,7 +249,11 @@ impl Rules { let hir = translator.translate(re.naked(), &ast).unwrap(); - regex_automata::meta::Builder::new().build_from_hir(&hir).unwrap() + regex_automata::meta::Builder::new() + .build_from_hir(&hir) + .unwrap_or_else(|err| { + panic!("error compiling regex `{}`: {}", re.as_str(), err) + }) } /// Returns a sub-pattern by [`SubPatternId`]. From 80221a0c3d64ab051f11a3aaeed8b3bc00e340a0 Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Tue, 27 Feb 2024 16:16:10 +0100 Subject: [PATCH 2/3] chore: better formatting in error message --- lib/src/compiler/rules.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/src/compiler/rules.rs b/lib/src/compiler/rules.rs index 174c50f36..123655326 100644 --- a/lib/src/compiler/rules.rs +++ b/lib/src/compiler/rules.rs @@ -233,8 +233,8 @@ impl Rules { let re = types::Regexp::new(self.regexp_pool.get(regexp_id).unwrap()); let mut parser = regex_syntax::ast::parse::ParserBuilder::new() - // This the custom configuration option that turns-on support for - // the `{,n}`. This option doesn't exist in the official + // This is the custom configuration option that turns-on support + // for the `{,n}` syntax. This option doesn't exist in the official // `regex_syntax` crate. .empty_min_range(true) .build(); @@ -250,9 +250,10 @@ impl Rules { let hir = translator.translate(re.naked(), &ast).unwrap(); regex_automata::meta::Builder::new() + .configure(Default::default()) .build_from_hir(&hir) .unwrap_or_else(|err| { - panic!("error compiling regex `{}`: {}", re.as_str(), err) + panic!("error compiling regex `{}`: {:#?}", re.as_str(), err) }) } From 530d0dcdf47f5132908ebacfa6e02a63cfdecc44 Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Tue, 27 Feb 2024 16:35:50 +0100 Subject: [PATCH 3/3] chore: set a larger limit for the NFA while compiling regular expressions --- lib/src/compiler/rules.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/src/compiler/rules.rs b/lib/src/compiler/rules.rs index 123655326..61336b391 100644 --- a/lib/src/compiler/rules.rs +++ b/lib/src/compiler/rules.rs @@ -249,8 +249,14 @@ impl Rules { let hir = translator.translate(re.naked(), &ast).unwrap(); + // Set a size limit for the NFA automata. The default limit (10MB) is + // too small for certain regexps seen in YARA rules in the wild, see: + // https://github.com/VirusTotal/yara-x/issues/85 + let config = regex_automata::meta::Config::new() + .nfa_size_limit(Some(50 * 1024 * 1024)); + regex_automata::meta::Builder::new() - .configure(Default::default()) + .configure(config) .build_from_hir(&hir) .unwrap_or_else(|err| { panic!("error compiling regex `{}`: {:#?}", re.as_str(), err)