From 2d6f395af155a077c006c972f2061768763dea2c Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Wed, 10 Jul 2024 11:13:19 +0200 Subject: [PATCH] feat: implement `InternalParser::not` --- parser-ng/src/parser/mod.rs | 185 ++++++++++++------ .../parser/tests/testdata/basic-error-1.out | 2 +- .../src/parser/tests/testdata/for-error-1.out | 2 +- .../tests/testdata/hex-patterns-error-1.out | 4 +- .../tests/testdata/hex-patterns-error-2.out | 4 +- .../parser/tests/testdata/meta-error-1.out | 2 +- .../parser/tests/testdata/meta-error-2.out | 2 +- .../parser/tests/testdata/meta-error-3.out | 2 +- .../parser/tests/testdata/meta-error-4.out | 2 +- .../parser/tests/testdata/meta-error-5.out | 2 +- .../parser/tests/testdata/meta-error-6.out | 4 +- .../src/parser/tests/testdata/of-error-1.out | 52 ++--- .../tests/testdata/pattern-mods-error-1.out | 4 +- .../tests/testdata/patterns-error-1.out | 2 +- .../tests/testdata/patterns-error-2.out | 2 +- .../tests/testdata/patterns-error-3.out | 2 +- .../tests/testdata/rule-tags-error-1.out | 2 +- .../tests/testdata/rule-tags-error-2.out | 2 +- .../tests/testdata/rule-tags-error-3.out | 2 +- 19 files changed, 168 insertions(+), 111 deletions(-) diff --git a/parser-ng/src/parser/mod.rs b/parser-ng/src/parser/mod.rs index a0518ef32..4ff0e4fcb 100644 --- a/parser-ng/src/parser/mod.rs +++ b/parser-ng/src/parser/mod.rs @@ -15,7 +15,6 @@ error nodes is valid YARA code. [2]: https://github.com/rust-analyzer/rowan */ -use indexmap::map::Entry; use indexmap::{IndexMap, IndexSet}; use rustc_hash::{FxHashMap, FxHashSet}; @@ -81,6 +80,9 @@ struct InternalParser<'src> { /// and the "zero or more" operation (examples: `(A|B)`, `A*`). opt_depth: usize, + /// How deep is the parse into "not" branches of the grammar. + not_depth: usize, + /// Hash map where keys are spans within the source code, and values /// are a list of tokens that were expected to match at that span. /// @@ -118,6 +120,12 @@ struct InternalParser<'src> { /// position and produces a comprehensive error message. expected_token_errors: FxHashMap>, + /// Similar to `expected_token_errors` but tracks the positions where + /// unexpected tokens were found. This type of error is produced when + /// [`InternalParser::not`] is used. This only stores the span were the + /// unexpected token was found. + unexpected_token_errors: FxHashSet, + /// Errors that are not yet sent to the `output` stream. The purpose of /// this map is removing duplicate messages for the same code span. In /// certain cases the parser can produce two different error messages for @@ -149,8 +157,10 @@ impl<'src> From> for InternalParser<'src> { output: SyntaxStream::new(), pending_errors: IndexMap::new(), expected_token_errors: FxHashMap::default(), + unexpected_token_errors: FxHashSet::default(), cache: FxHashSet::default(), opt_depth: 0, + not_depth: 0, failed: false, } } @@ -325,7 +335,15 @@ impl<'src> InternalParser<'src> { return self; } else { let span = token.span(); - self.unexpected_token_error(span, recovery_set, None); + self.expected_token_errors + .entry(span) + .or_default() + .extend( + recovery_set + .token_ids() + .map(|token| token.description()), + ); + self.handle_errors(); } } } @@ -470,16 +488,41 @@ impl<'src> InternalParser<'src> { let found_expected_token = match self.peek_non_ws() { None => None, Some(token) => { - let t = expected_tokens.contains(token); - if t.is_none() { - let span = token.span(); - self.unexpected_token_error( - span, - expected_tokens, - description, - ); + let span = token.span(); + let token = expected_tokens.contains(token); + + match (self.not_depth, token) { + // The expected token was found, but we are inside a "not". + // When we are inside a "not", any "expect" is negated, and + // actually means that the token was *not* expected. + (not_depth, Some(_)) if not_depth > 0 => { + self.unexpected_token_errors.insert(span); + self.handle_errors() + } + // We are not inside a "not", and the expected token was + // not found. + (0, None) => { + let tokens = self + .expected_token_errors + .entry(span.clone()) + .or_default(); + + if let Some(description) = description { + tokens.insert(description); + } else { + tokens.extend( + expected_tokens + .token_ids() + .map(|token| token.description()), + ); + } + + self.handle_errors(); + } + _ => {} } - t + + token } }; @@ -554,6 +597,32 @@ impl<'src> InternalParser<'src> { self } + /// Negates the result of `parser`. + /// + /// If `parser` is successful the parser transitions to failure state. + fn not

(&mut self, parser: P) -> &mut Self + where + P: Fn(&mut Self) -> &mut Self, + { + if self.failed { + return self; + } + + let bookmark = self.bookmark(); + + self.trivia(); + + self.not_depth += 1; + parser(self); + self.not_depth -= 1; + + self.failed = !self.failed; + + self.restore_bookmark(&bookmark); + self.remove_bookmark(bookmark); + self + } + /// Like [`InternalParser::expect`], but optional. fn opt_expect(&mut self, expected_tokens: &'static TokenSet) -> &mut Self { self.opt(|p| p.expect(expected_tokens)) @@ -706,54 +775,60 @@ impl<'src> InternalParser<'src> { } } - fn unexpected_token_error( - &mut self, - span: Span, - expected_tokens: &'static TokenSet, - description: Option<&'static str>, - ) { - let tokens = - self.expected_token_errors.entry(span.clone()).or_default(); + fn handle_errors(&mut self) { + if self.opt_depth > 0 { + return; + } - if let Some(description) = description { - tokens.insert(description); - } else { - tokens.extend( - expected_tokens.token_ids().map(|token| token.description()), - ); + // From all errors in expected_token_errors, use the one at the largest + // offset. If several errors start at the same offset, the last one is + // used. + let expected_token = self + .expected_token_errors + .drain() + .max_by_key(|(span, _)| span.start()); + + // From all errors in unexpected_token_errors, use the one at the + // largest offset. If several errors start at the same offset, the last + // one is used. + let unexpected_token = self + .unexpected_token_errors + .drain() + .max_by_key(|span| span.start()); + + let (span, expected) = match (expected_token, unexpected_token) { + (Some((e, _)), Some(u)) if u.start() > e.start() => (u, None), + (None, Some(u)) => (u, None), + (Some((e, expected)), _) => (e, Some(expected)), + _ => unreachable!(), + }; + + // There's a previous error for the same span, ignore this one. + if self.pending_errors.contains_key(&span) { + return; } - if self.opt_depth == 0 { - // From all the unexpected token errors, use the one at the largest - // offset. If several errors start at the same offset, the last one - // is used. `self.expected_tokens` is left empty. - if let Some((span, tokens)) = self - .expected_token_errors - .drain() - .max_by_key(|(span, _)| span.start()) - { - match self.pending_errors.entry(span) { - Entry::Occupied(_) => { - // already present, don't replace. - } - Entry::Vacant(entry) => { - let (last, all_except_last) = - tokens.as_slice().split_last().unwrap(); + let actual_token = String::from_utf8_lossy( + self.tokens.source().get(span.range()).unwrap(), + ); - let error_msg = if all_except_last.is_empty() { - format!("expecting {last}") - } else { - format!( - "expecting {} or {last}", - itertools::join(all_except_last.iter(), ", "), - ) - }; + let error_msg = if let Some(expected) = expected { + let (last, all_except_last) = + expected.as_slice().split_last().unwrap(); - entry.insert(error_msg); - } - } + if all_except_last.is_empty() { + format!("expecting {last}, found `{actual_token}`") + } else { + format!( + "expecting {} or {last}, found `{actual_token}`", + itertools::join(all_except_last.iter(), ", "), + ) } - } + } else { + format!("unexpected `{actual_token}`") + }; + + self.pending_errors.insert(span, error_msg); } } @@ -1401,8 +1476,8 @@ impl<'src> InternalParser<'src> { }) }) .alt(|p| { - p.boolean_expr_tuple() //.not(|p| p.expect(t!(AT_KW | IN_KW))) - }) // TODO + p.boolean_expr_tuple().not(|p| p.expect(t!(AT_KW | IN_KW))) + }) .end_alt() .end() } diff --git a/parser-ng/src/parser/tests/testdata/basic-error-1.out b/parser-ng/src/parser/tests/testdata/basic-error-1.out index 62c4221fa..134b3f9cd 100644 --- a/parser-ng/src/parser/tests/testdata/basic-error-1.out +++ b/parser-ng/src/parser/tests/testdata/basic-error-1.out @@ -34,4 +34,4 @@ SOURCE_FILE@0..58 R_BRACE@57..58 "}" ERRORS: -- [11..12]: expecting `meta`, `strings` or `condition` +- [11..12]: expecting `meta`, `strings` or `condition`, found `{` diff --git a/parser-ng/src/parser/tests/testdata/for-error-1.out b/parser-ng/src/parser/tests/testdata/for-error-1.out index 1d26285dd..08196217b 100644 --- a/parser-ng/src/parser/tests/testdata/for-error-1.out +++ b/parser-ng/src/parser/tests/testdata/for-error-1.out @@ -59,4 +59,4 @@ SOURCE_FILE@0..94 R_BRACE@93..94 "}" ERRORS: -- [77..79]: expecting FLOAT, INTEGER, STRING, regexp, `filesize`, `entrypoint`, pattern count, pattern offset, pattern length, `-`, `~`, `(` or identifier +- [77..79]: expecting FLOAT, INTEGER, STRING, regexp, `filesize`, `entrypoint`, pattern count, pattern offset, pattern length, `-`, `~`, `(` or identifier, found `of` diff --git a/parser-ng/src/parser/tests/testdata/hex-patterns-error-1.out b/parser-ng/src/parser/tests/testdata/hex-patterns-error-1.out index e41f2cd03..0755f1b08 100644 --- a/parser-ng/src/parser/tests/testdata/hex-patterns-error-1.out +++ b/parser-ng/src/parser/tests/testdata/hex-patterns-error-1.out @@ -79,5 +79,5 @@ SOURCE_FILE@0..124 R_BRACE@123..124 "}" ERRORS: -- [40..41]: expecting BYTE or `(` -- [100..101]: expecting BYTE or `(` +- [40..41]: expecting BYTE or `(`, found `)` +- [100..101]: expecting BYTE or `(`, found `}` diff --git a/parser-ng/src/parser/tests/testdata/hex-patterns-error-2.out b/parser-ng/src/parser/tests/testdata/hex-patterns-error-2.out index ccd23f054..74466ab6d 100644 --- a/parser-ng/src/parser/tests/testdata/hex-patterns-error-2.out +++ b/parser-ng/src/parser/tests/testdata/hex-patterns-error-2.out @@ -73,5 +73,5 @@ SOURCE_FILE@0..118 R_BRACE@117..118 "}" ERRORS: -- [33..34]: expecting `[`, BYTE, `(` or `}` -- [93..94]: expecting `[`, BYTE, `(` or `}` +- [33..34]: expecting `[`, BYTE, `(` or `}`, found `0` +- [93..94]: expecting `[`, BYTE, `(` or `}`, found `a` diff --git a/parser-ng/src/parser/tests/testdata/meta-error-1.out b/parser-ng/src/parser/tests/testdata/meta-error-1.out index ecb74f652..3345fc3e7 100644 --- a/parser-ng/src/parser/tests/testdata/meta-error-1.out +++ b/parser-ng/src/parser/tests/testdata/meta-error-1.out @@ -23,4 +23,4 @@ SOURCE_FILE@0..40 R_BRACE@39..40 "}" ERRORS: -- [14..17]: expecting `meta`, `strings` or `condition` +- [14..17]: expecting `meta`, `strings` or `condition`, found `foo` diff --git a/parser-ng/src/parser/tests/testdata/meta-error-2.out b/parser-ng/src/parser/tests/testdata/meta-error-2.out index c504a5b3e..55dec3a66 100644 --- a/parser-ng/src/parser/tests/testdata/meta-error-2.out +++ b/parser-ng/src/parser/tests/testdata/meta-error-2.out @@ -28,4 +28,4 @@ SOURCE_FILE@0..50 R_BRACE@49..50 "}" ERRORS: -- [30..39]: expecting `=` +- [30..39]: expecting `=`, found `condition` diff --git a/parser-ng/src/parser/tests/testdata/meta-error-3.out b/parser-ng/src/parser/tests/testdata/meta-error-3.out index 34697e285..88a966529 100644 --- a/parser-ng/src/parser/tests/testdata/meta-error-3.out +++ b/parser-ng/src/parser/tests/testdata/meta-error-3.out @@ -30,4 +30,4 @@ SOURCE_FILE@0..52 R_BRACE@51..52 "}" ERRORS: -- [32..41]: expecting `true`, `false`, INTEGER, FLOAT or STRING +- [32..41]: expecting `true`, `false`, INTEGER, FLOAT or STRING, found `condition` diff --git a/parser-ng/src/parser/tests/testdata/meta-error-4.out b/parser-ng/src/parser/tests/testdata/meta-error-4.out index 2717d50d3..7cdaa47c0 100644 --- a/parser-ng/src/parser/tests/testdata/meta-error-4.out +++ b/parser-ng/src/parser/tests/testdata/meta-error-4.out @@ -40,4 +40,4 @@ SOURCE_FILE@0..73 R_BRACE@72..73 "}" ERRORS: -- [30..34]: expecting `true`, `false`, INTEGER, FLOAT or STRING +- [30..34]: expecting `true`, `false`, INTEGER, FLOAT or STRING, found `meta` diff --git a/parser-ng/src/parser/tests/testdata/meta-error-5.out b/parser-ng/src/parser/tests/testdata/meta-error-5.out index 2bd69c3c4..86bc7714f 100644 --- a/parser-ng/src/parser/tests/testdata/meta-error-5.out +++ b/parser-ng/src/parser/tests/testdata/meta-error-5.out @@ -53,4 +53,4 @@ SOURCE_FILE@0..97 R_BRACE@96..97 "}" ERRORS: -- [30..33]: expecting `true`, `false`, INTEGER, FLOAT or STRING +- [30..33]: expecting `true`, `false`, INTEGER, FLOAT or STRING, found `bar` diff --git a/parser-ng/src/parser/tests/testdata/meta-error-6.out b/parser-ng/src/parser/tests/testdata/meta-error-6.out index 81c1aa3b2..9e65a6533 100644 --- a/parser-ng/src/parser/tests/testdata/meta-error-6.out +++ b/parser-ng/src/parser/tests/testdata/meta-error-6.out @@ -53,5 +53,5 @@ SOURCE_FILE@0..105 R_BRACE@104..105 "}" ERRORS: -- [32..39]: expecting `true`, `false`, INTEGER, FLOAT or STRING -- [44..47]: expecting `:` +- [32..39]: expecting `true`, `false`, INTEGER, FLOAT or STRING, found `strings` +- [44..47]: expecting `:`, found `bar` diff --git a/parser-ng/src/parser/tests/testdata/of-error-1.out b/parser-ng/src/parser/tests/testdata/of-error-1.out index 919d7b381..90a3a3412 100644 --- a/parser-ng/src/parser/tests/testdata/of-error-1.out +++ b/parser-ng/src/parser/tests/testdata/of-error-1.out @@ -7,44 +7,26 @@ SOURCE_FILE@0..47 L_BRACE@10..11 "{" NEWLINE@11..12 "\n" WHITESPACE@12..13 "\t" - CONDITION_BLK@13..40 + ERROR@13..26 CONDITION_KW@13..22 "condition" COLON@22..23 ":" NEWLINE@23..24 "\n" WHITESPACE@24..26 "\t\t" - BOOLEAN_EXPR@26..40 - BOOLEAN_TERM@26..40 - OF_EXPR@26..40 - QUANTIFIER@26..29 - ANY_KW@26..29 "any" - WHITESPACE@29..30 " " - OF_KW@30..32 "of" - WHITESPACE@32..33 " " - BOOLEAN_EXPR_TUPLE@33..40 - L_PAREN@33..34 "(" - BOOLEAN_EXPR@34..35 - BOOLEAN_TERM@34..35 - EXPR@34..35 - TERM@34..35 - PRIMARY_EXPR@34..35 - IDENT@34..35 "a" - COMMA@35..36 "," - BOOLEAN_EXPR@36..37 - BOOLEAN_TERM@36..37 - EXPR@36..37 - TERM@36..37 - PRIMARY_EXPR@36..37 - IDENT@36..37 "b" - COMMA@37..38 "," - BOOLEAN_EXPR@38..39 - BOOLEAN_TERM@38..39 - EXPR@38..39 - TERM@38..39 - PRIMARY_EXPR@38..39 - IDENT@38..39 "c" - R_PAREN@39..40 ")" - WHITESPACE@40..41 " " - ERROR@41..46 + ERROR@26..26 + ERROR@26..26 + ERROR@26..46 + ANY_KW@26..29 "any" + WHITESPACE@29..30 " " + OF_KW@30..32 "of" + WHITESPACE@32..33 " " + L_PAREN@33..34 "(" + IDENT@34..35 "a" + COMMA@35..36 "," + IDENT@36..37 "b" + COMMA@37..38 "," + IDENT@38..39 "c" + R_PAREN@39..40 ")" + WHITESPACE@40..41 " " AT_KW@41..43 "at" WHITESPACE@43..44 " " INTEGER_LIT@44..45 "0" @@ -52,4 +34,4 @@ SOURCE_FILE@0..47 R_BRACE@46..47 "}" ERRORS: -- [41..43]: expecting `and`, `or` or `}` +- [41..43]: unexpected `at` diff --git a/parser-ng/src/parser/tests/testdata/pattern-mods-error-1.out b/parser-ng/src/parser/tests/testdata/pattern-mods-error-1.out index 4d6ef56c2..c7a45117f 100644 --- a/parser-ng/src/parser/tests/testdata/pattern-mods-error-1.out +++ b/parser-ng/src/parser/tests/testdata/pattern-mods-error-1.out @@ -75,5 +75,5 @@ SOURCE_FILE@0..131 R_BRACE@130..131 "}" ERRORS: -- [36..39]: expecting pattern modifier, pattern identifier or `condition` -- [107..108]: expecting STRING +- [36..39]: expecting pattern modifier, pattern identifier or `condition`, found `foo` +- [107..108]: expecting STRING, found `)` diff --git a/parser-ng/src/parser/tests/testdata/patterns-error-1.out b/parser-ng/src/parser/tests/testdata/patterns-error-1.out index ce64a772f..5016d8a60 100644 --- a/parser-ng/src/parser/tests/testdata/patterns-error-1.out +++ b/parser-ng/src/parser/tests/testdata/patterns-error-1.out @@ -35,4 +35,4 @@ SOURCE_FILE@0..57 R_BRACE@56..57 "}" ERRORS: -- [37..46]: expecting `[`, BYTE, `(` or `}` +- [37..46]: expecting `[`, BYTE, `(` or `}`, found `condition` diff --git a/parser-ng/src/parser/tests/testdata/patterns-error-2.out b/parser-ng/src/parser/tests/testdata/patterns-error-2.out index c9a93841c..c2fdbb50c 100644 --- a/parser-ng/src/parser/tests/testdata/patterns-error-2.out +++ b/parser-ng/src/parser/tests/testdata/patterns-error-2.out @@ -42,4 +42,4 @@ SOURCE_FILE@0..70 R_BRACE@69..70 "}" ERRORS: -- [50..59]: expecting `[`, BYTE, `(` or `}` +- [50..59]: expecting `[`, BYTE, `(` or `}`, found `condition` diff --git a/parser-ng/src/parser/tests/testdata/patterns-error-3.out b/parser-ng/src/parser/tests/testdata/patterns-error-3.out index 39230ab21..5e349a991 100644 --- a/parser-ng/src/parser/tests/testdata/patterns-error-3.out +++ b/parser-ng/src/parser/tests/testdata/patterns-error-3.out @@ -47,4 +47,4 @@ SOURCE_FILE@0..89 R_BRACE@88..89 "}" ERRORS: -- [47..50]: expecting STRING, regexp or `{` +- [47..50]: expecting STRING, regexp or `{`, found `bar` diff --git a/parser-ng/src/parser/tests/testdata/rule-tags-error-1.out b/parser-ng/src/parser/tests/testdata/rule-tags-error-1.out index cf56f0e7d..50118a215 100644 --- a/parser-ng/src/parser/tests/testdata/rule-tags-error-1.out +++ b/parser-ng/src/parser/tests/testdata/rule-tags-error-1.out @@ -22,4 +22,4 @@ SOURCE_FILE@0..36 R_BRACE@35..36 "}" ERRORS: -- [12..13]: expecting identifier +- [12..13]: expecting identifier, found `{` diff --git a/parser-ng/src/parser/tests/testdata/rule-tags-error-2.out b/parser-ng/src/parser/tests/testdata/rule-tags-error-2.out index 70df95267..0f734ffa8 100644 --- a/parser-ng/src/parser/tests/testdata/rule-tags-error-2.out +++ b/parser-ng/src/parser/tests/testdata/rule-tags-error-2.out @@ -22,4 +22,4 @@ SOURCE_FILE@0..36 R_BRACE@35..36 "}" ERRORS: -- [10..11]: expecting `:` or `{` +- [10..11]: expecting `:` or `{`, found `=` diff --git a/parser-ng/src/parser/tests/testdata/rule-tags-error-3.out b/parser-ng/src/parser/tests/testdata/rule-tags-error-3.out index ffd6f6421..cae60d728 100644 --- a/parser-ng/src/parser/tests/testdata/rule-tags-error-3.out +++ b/parser-ng/src/parser/tests/testdata/rule-tags-error-3.out @@ -25,4 +25,4 @@ SOURCE_FILE@0..38 R_BRACE@37..38 "}" ERRORS: -- [12..13]: expecting identifier +- [12..13]: expecting identifier, found `=`