Skip to content

Commit

Permalink
chore: upgrade regex-syntax and regex-automata crates.
Browse files Browse the repository at this point in the history
This solves an issue that prevented using unicode characters in regular expressions.
See: rust-lang/regex@04f5d7b
  • Loading branch information
plusvic committed Mar 15, 2024
1 parent a7190fe commit 03a52a2
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 33 deletions.
32 changes: 16 additions & 16 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ protobuf-codegen = { git = "https://github.com/plusvic/rust-protobuf.git", rev =
protobuf-json-mapping = { git = "https://github.com/plusvic/rust-protobuf.git", rev = "b484d8a7" }
protobuf-parse = { git = "https://github.com/plusvic/rust-protobuf.git", rev = "b484d8a7" }
protobuf-support = { git = "https://github.com/plusvic/rust-protobuf.git", rev = "b484d8a7" }
regex-syntax = { git = "https://github.com/plusvic/regex.git", rev = "423493d" }
regex-automata = { git = "https://github.com/plusvic/regex.git", rev = "423493d" }
regex-syntax = { git = "https://github.com/plusvic/regex.git", rev = "50a708b" }
regex-automata = { git = "https://github.com/plusvic/regex.git", rev = "50a708b" }
roxmltree = "0.19.0"
rustc-hash = "1.1.0"
smallvec = "1.10.0"
Expand Down
15 changes: 7 additions & 8 deletions lib/src/re/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ impl Parser {
/// If true, allows regular expressions that mixes greedy and non-greedy
/// quantifiers (e.g: `/ab.*cd.*?ef/`). When mixed greediness is not allowed
/// [`Parser::parse`] returns an error if the regular expression contains
/// both greedy and non-greedy quantifiers. By default mixed greediness is
/// both greedy and non-greedy quantifiers. By default, mixed greediness is
/// allowed.
pub fn allow_mixed_greediness(mut self, yes: bool) -> Self {
self.allow_mixed_greediness = yes;
Expand Down Expand Up @@ -89,13 +89,12 @@ impl Parser {
regexp.case_insensitive
};

let mut translator =
regex_syntax::hir::translate::TranslatorBuilder::new()
.case_insensitive(case_insensitive)
.dot_matches_new_line(regexp.dot_matches_new_line)
.unicode(false)
.utf8(false)
.build();
let mut translator = re::hir::translate::TranslatorBuilder::new()
.case_insensitive(case_insensitive)
.dot_matches_new_line(regexp.dot_matches_new_line)
.unicode(false)
.utf8(false)
.build();

let hir = translator.translate(regexp.src, &ast).map_err(|err| {
Error::SyntaxError {
Expand Down
9 changes: 2 additions & 7 deletions lib/src/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,8 @@ fn string_operations() {
condition_true!(r#""foo\nbar" matches /foo.*bar/s"#);
condition_false!(r#""foo\nbar" matches /foo.*bar/"#);
condition_true!(r#""foobar" matches /fo{,2}bar/"#);
condition_true!(r#""タイトル" matches /タイトル/"#);
condition_true!(r#""🙈🙉🙊" matches /.../"#);
}

#[test]
Expand Down Expand Up @@ -1309,17 +1311,10 @@ fn regexp_patterns_4() {

pattern_match!(r"/foo\x01bar/", b"foo\x01bar", b"foo\x01bar");

/*
TODO: YARA accepts unicode characters in regexps but regexp_syntax either
accepts unicode characters or escape sequences like \x01, but not both
at the same time. Presumably this is because with escape sequence you can't
create non-valid unicode codepoints, so when you enable unicode it disables
escape sequences.
pattern_true!(
r#"/🙈🙉🙊/i"#,
b"\xF0\x9F\x99\x88\xF0\x9F\x99\x89\xF0\x9F\x99\x8A"
);
*/
}

#[test]
Expand Down

0 comments on commit 03a52a2

Please sign in to comment.