diff --git a/Cargo.lock b/Cargo.lock index e810e487b..2a654efab 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -221,7 +221,7 @@ dependencies = [ "peeking_take_while", "proc-macro2", "quote", - "regex", + "regex 1.9.5 (registry+https://github.com/rust-lang/crates.io-index)", "rustc-hash", "shlex", "syn 1.0.109", @@ -304,7 +304,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6798148dccfbff0fae41c7574d2fa8f1ef3492fba0face179de5d8d447d67b05" dependencies = [ "memchr", - "regex-automata", + "regex-automata 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", "serde", ] @@ -687,7 +687,7 @@ dependencies = [ "oorandom", "plotters", "rayon", - "regex", + "regex 1.9.5 (registry+https://github.com/rust-lang/crates.io-index)", "serde", "serde_derive", "serde_json", @@ -991,7 +991,7 @@ dependencies = [ "humantime", "is-terminal", "log", - "regex", + "regex 1.9.5 (registry+https://github.com/rust-lang/crates.io-index)", "termcolor", ] @@ -1247,7 +1247,7 @@ dependencies = [ "bstr", "fnv", "log", - "regex", + "regex 1.9.5 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -1353,7 +1353,7 @@ dependencies = [ "lazy_static", "log", "memchr", - "regex", + "regex 1.9.5 (registry+https://github.com/rust-lang/crates.io-index)", "same-file", "thread_local", "walkdir", @@ -1586,7 +1586,7 @@ dependencies = [ "maplit", "once_cell", "rayon", - "regex", + "regex 1.9.5 (registry+https://github.com/rust-lang/crates.io-index)", "serde", "serde-wasm-bindgen", "serde_json", @@ -1708,9 +1708,9 @@ checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" [[package]] name = "memchr" -version = "2.5.0" +version = "2.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c" [[package]] name = "memfd" @@ -2038,7 +2038,7 @@ checksum = "3b25af4ef94a8528b41fb49a696e361dc6ef975c782417268072d987ac327964" dependencies = [ "once_cell", "parse-display-derive", - "regex", + "regex 1.9.5 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -2050,7 +2050,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "regex", + "regex 1.9.5 (registry+https://github.com/rust-lang/crates.io-index)", "regex-syntax 0.6.29", "structmeta", "syn 1.0.109", @@ -2295,7 +2295,7 @@ dependencies = [ "once_cell", "protobuf", "protobuf-parse", - "regex", + "regex 1.9.5 (registry+https://github.com/rust-lang/crates.io-index)", "tempfile", "thiserror", ] @@ -2546,25 +2546,46 @@ dependencies = [ [[package]] name = "regex" -version = "1.9.3" +version = "1.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81bc1d4caf89fac26a70747fe603c130093b53c773888797a6329091246d651a" +checksum = "697061221ea1b4a94a624f67d0ae2bfe4e22b8a17b6a192afb11046542cc8c47" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex" +version = "1.9.5" +source = "git+https://github.com/plusvic/regex.git?rev=423493d#423493d4094e8167f50e1d812eb5a6349440c984" dependencies = [ "aho-corasick", "memchr", - "regex-automata", - "regex-syntax 0.7.4", + "regex-automata 0.3.8 (git+https://github.com/plusvic/regex.git?rev=423493d)", + "regex-syntax 0.7.5 (git+https://github.com/plusvic/regex.git?rev=423493d)", ] [[package]] name = "regex-automata" -version = "0.3.6" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fed1ceff11a1dddaee50c9dc8e4938bd106e9d89ae372f192311e7da498e3b69" +checksum = "c2f401f4955220693b56f8ec66ee9c78abffd8d1c4f23dc41a23839eb88f0795" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.7.4", + "regex-syntax 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex-automata" +version = "0.3.8" +source = "git+https://github.com/plusvic/regex.git?rev=423493d#423493d4094e8167f50e1d812eb5a6349440c984" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax 0.7.5 (git+https://github.com/plusvic/regex.git?rev=423493d)", ] [[package]] @@ -2575,9 +2596,14 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "regex-syntax" -version = "0.7.4" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2" +checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" + +[[package]] +name = "regex-syntax" +version = "0.7.5" +source = "git+https://github.com/plusvic/regex.git?rev=423493d#423493d4094e8167f50e1d812eb5a6349440c984" [[package]] name = "rgb" @@ -3057,7 +3083,7 @@ dependencies = [ "pest", "pest_derive", "phf 0.10.1", - "regex", + "regex 1.9.5 (registry+https://github.com/rust-lang/crates.io-index)", "semver 0.11.0", "sha2 0.9.9", "signal-hook 0.1.17", @@ -3999,8 +4025,8 @@ dependencies = [ "protobuf", "protobuf-codegen", "protobuf-parse", - "regex", - "regex-syntax 0.7.4", + "regex 1.9.5 (git+https://github.com/plusvic/regex.git?rev=423493d)", + "regex-syntax 0.7.5 (git+https://github.com/plusvic/regex.git?rev=423493d)", "rustc-hash", "serde", "smallvec", diff --git a/Cargo.toml b/Cargo.toml index 184e0422c..3cfb96205 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,7 +46,7 @@ lazy_static = "1.4.0" line-span = "0.1.3" linkme = "0.3" log = "0.4" -memchr = "2.5.0" +memchr = "2.6.3" memx = "0.1.28" num = "0.4.0" pest = "2.5.5" @@ -55,8 +55,8 @@ pretty_assertions = "1.3.0" protobuf = "3.2.0" protobuf-codegen = "3.2.0" protobuf-parse = "3.2.0" -regex = "1.9.1" -regex-syntax = "0.7.4" +regex = { git = "https://github.com/plusvic/regex.git", rev="423493d" } +regex-syntax = { git = "https://github.com/plusvic/regex.git", rev="423493d" } rustc-hash = "1.1.0" smallvec = "1.10.0" serde = "1.0.156" diff --git a/yara-x/src/compiler/rules.rs b/yara-x/src/compiler/rules.rs index 860338457..c82ebb51e 100644 --- a/yara-x/src/compiler/rules.rs +++ b/yara-x/src/compiler/rules.rs @@ -207,6 +207,7 @@ impl Rules { /// If no regular expression with such [`RegexpId`] exists. #[inline] pub(crate) fn get_regexp(&self, regexp_id: RegexpId) -> Regex { + // TODO: when compiling regexps allow the use of `{,n}` syntax let re = Regexp::new(self.regexp_pool.get(regexp_id).unwrap()); RegexBuilder::new(re.naked()) .case_insensitive(re.case_insensitive()) diff --git a/yara-x/src/re/parser.rs b/yara-x/src/re/parser.rs index a0b5478cb..78e2d331a 100644 --- a/yara-x/src/re/parser.rs +++ b/yara-x/src/re/parser.rs @@ -61,12 +61,14 @@ impl Parser { /// Parses the regexp and returns its HIR. pub fn parse(&self, regexp: &ast::Regexp) -> Result { - let ast = re::ast::parse::Parser::new().parse(regexp.src).map_err( - |err| Error::SyntaxError { + let mut parser = + re::ast::parse::ParserBuilder::new().empty_min_range(true).build(); + + let ast = + parser.parse(regexp.src).map_err(|err| Error::SyntaxError { msg: err.kind().to_string(), span: *err.span(), - }, - )?; + })?; let greedy = Validator::new().validate(&ast); diff --git a/yara-x/src/tests/mod.rs b/yara-x/src/tests/mod.rs index 204b26737..e9c95e9e2 100644 --- a/yara-x/src/tests/mod.rs +++ b/yara-x/src/tests/mod.rs @@ -916,6 +916,7 @@ fn regexp_patterns_2() { pattern_match!(r#"/ab{2,3}c/"#, b"abbbc", b"abbbc"); pattern_match!(r#"/ab{2,3}?c/"#, b"abbbc", b"abbbc"); pattern_match!(r#"/ab{0,1}?c/"#, b"abc", b"abc"); + pattern_match!(r#"/ab{,1}?c/"#, b"abc", b"abc"); pattern_match!(r#"/a{0,1}bc/"#, b"bbc", b"bc"); pattern_match!(r#"/ab{0,}c/"#, b"ac", b"ac"); pattern_match!(r#"/ab{0,}c/"#, b"abc", b"abc"); @@ -935,7 +936,9 @@ fn regexp_patterns_2() { pattern_false!(r#"/ab{1,}b/"#, b"ab"); pattern_match!(r#"/ab{1,1}c/"#, b"abc", b"abc"); pattern_match!(r#"/ab{0,3}c/"#, b"abbbc", b"abbbc"); + pattern_match!(r#"/ab{,3}c/"#, b"abbbc", b"abbbc"); pattern_false!(r#"/ab{0,2}c/"#, b"abbbc"); + pattern_false!(r#"/ab{,2}c/"#, b"abbbc"); pattern_false!(r#"/ab{4,5}c/"#, b"abbbc"); pattern_false!(r#"/ab{3}c/"#, b"abbbbc"); pattern_false!(r#"/ab{4}c/"#, b"abbbbbc");