diff --git a/parser-ng/src/parser/mod.rs b/parser-ng/src/parser/mod.rs index 024b0e2e9..a6967bfeb 100644 --- a/parser-ng/src/parser/mod.rs +++ b/parser-ng/src/parser/mod.rs @@ -81,38 +81,8 @@ struct InternalParser<'src> { /// and the "zero or more" operation (examples: `(A|B)`, `A*`). opt_depth: usize, - /// Errors found during parsing that haven't been sent to `ready_errors` - /// yet. - /// - /// When the parser expects a token, and that tokens is not the next one - /// in input, it produces an error like `expecting "foo", found "bar"`. - /// However, these errors are not sent immediately to `ready_errors` - /// stream because some the errors may occur while parsing optional code, - /// or while parsing some branch in an alternation. For instance, in the - /// grammar rule `A := (B | C)`, if the parser finds an error while parsing - /// `B`, but `C` succeeds, then `A` is successful and the error found while - /// parsing `B` is not reported. - /// - /// In the other hand, if both `B` and `C` produce errors, then `A` has - /// failed, but only one of the two errors is reported. The error that - /// gets reported is the one that advanced more in the source code (i.e: - /// the one with the largest span start). This approach tends to produce - /// more meaningful errors. - /// - /// The items in the vector are error messages accompanied by the span in - /// the source code where the error occurred. - pending_errors: Vec<(String, Span)>, - - /// Errors go from `pending_errors` to `ready_errors` before they are - /// finally pushed to the `output` stream. This extra step has the purpose - /// of removing duplicate messages for the same code span. In certain cases - /// the parser can produce two different error messages for the same span, - /// but this map guarantees that only the first error is taken into account - /// and that any further error for the same span is ignored. - ready_errors: IndexMap, - - /// Hash map where keys are positions within the source code, and values - /// are a list of tokens that were expected to match at that position. + /// Hash map where keys are spans within the source code, and values + /// are a list of tokens that were expected to match at that span. /// /// This hash map plays a crucial role in error reporting during parsing. /// Consider the following grammar rule: @@ -139,14 +109,22 @@ struct InternalParser<'src> { /// that both `a` and `b` are valid tokens at the position where `c` was /// found? /// - /// This is where the `expected_tokens` hash map comes into play. We know - /// that `a` is also a valid alternative because the `expect(a)` inside the - /// `opt` was tried and failed. The parser doesn't fail at that point - /// because `a` is optional, but it records that `a` was expected at the - /// position of `c`. When `expect(b)` fails later, the parser looks up + /// This is where the `expected_token_errors` hash map comes into play. We + /// know that `a` is also a valid alternative because the `expect(a)` + /// inside the `opt` was tried and failed. The parser doesn't fail at that + /// point because `a` is optional, but it records that `a` was expected at + /// the position of `c`. When `expect(b)` fails later, the parser looks up /// any other token (besides `b`) that were expected to match at the /// position and produces a comprehensive error message. - expected_tokens: HashMap>, + expected_token_errors: HashMap>, + + /// Errors that are not yet sent to the `output` stream. The purpose of + /// this map is removing duplicate messages for the same code span. In + /// certain cases the parser can produce two different error messages for + /// the same span, but this map guarantees that only the first error is + /// taken into account and that any further error for the same span is + /// ignored. + pending_errors: IndexMap, } impl<'src> From> for InternalParser<'src> { @@ -155,9 +133,8 @@ impl<'src> From> for InternalParser<'src> { Self { tokens: TokenStream::new(tokenizer), output: SyntaxStream::new(), - pending_errors: Vec::new(), - ready_errors: IndexMap::new(), - expected_tokens: HashMap::new(), + pending_errors: IndexMap::new(), + expected_token_errors: HashMap::new(), opt_depth: 0, failed: false, } @@ -323,7 +300,7 @@ impl<'src> InternalParser<'src> { self } - fn sync(&mut self, recovery_set: &TokenSet) -> &mut Self { + fn sync(&mut self, recovery_set: &'static TokenSet) -> &mut Self { self.trivia(); match self.peek() { None => return self, @@ -332,13 +309,7 @@ impl<'src> InternalParser<'src> { return self; } else { let span = token.span(); - let token_str = token.description(); - self.unexpected_token_error( - token_str, - span, - recovery_set, - None, - ); + self.unexpected_token_error(span, recovery_set, None); } } } @@ -419,7 +390,10 @@ impl<'src> InternalParser<'src> { /// ``` /// /// Notice how the error is now more localized. - fn recover_and_sync(&mut self, recovery_set: &TokenSet) -> &mut Self { + fn recover_and_sync( + &mut self, + recovery_set: &'static TokenSet, + ) -> &mut Self { self.recover(); /*if let Some(t) = self.peek_non_ws() { if recovery_set.contains(t) { @@ -460,7 +434,7 @@ impl<'src> InternalParser<'src> { /// # Panics /// /// If `expected_tokens` is empty. - fn expect(&mut self, expected_tokens: &TokenSet) -> &mut Self { + fn expect(&mut self, expected_tokens: &'static TokenSet) -> &mut Self { self.expect_d(expected_tokens, None) } @@ -468,7 +442,7 @@ impl<'src> InternalParser<'src> { /// description for the expected tokens. fn expect_d( &mut self, - expected_tokens: &TokenSet, + expected_tokens: &'static TokenSet, description: Option<&'static str>, ) -> &mut Self { assert!(!expected_tokens.is_empty()); @@ -483,9 +457,7 @@ impl<'src> InternalParser<'src> { let t = expected_tokens.contains(token); if t.is_none() { let span = token.span(); - let token_str = token.description(); self.unexpected_token_error( - token_str, span, expected_tokens, description, @@ -567,7 +539,7 @@ impl<'src> InternalParser<'src> { } /// Like [`InternalParser::expect`], but optional. - fn opt_expect(&mut self, expected_tokens: &TokenSet) -> &mut Self { + fn opt_expect(&mut self, expected_tokens: &'static TokenSet) -> &mut Self { self.opt(|p| p.expect(expected_tokens)) } @@ -593,7 +565,7 @@ impl<'src> InternalParser<'src> { /// fn if_found

( &mut self, - expected_tokens: &TokenSet, + expected_tokens: &'static TokenSet, parser: P, ) -> &mut Self where @@ -610,11 +582,14 @@ impl<'src> InternalParser<'src> { parser(self); } else { let span = token.span(); - let tokens = - self.expected_tokens.entry(span.start()).or_default(); - tokens.extend( - expected_tokens.token_ids().map(|t| t.description()), - ); + self.expected_token_errors + .entry(span) + .or_default() + .extend( + expected_tokens + .token_ids() + .map(|t| t.description()), + ); } } } @@ -689,59 +664,56 @@ impl<'src> InternalParser<'src> { } fn flush_errors(&mut self) { - self.expected_tokens.clear(); - self.pending_errors.clear(); - for (span, error) in self.ready_errors.drain(0..) { + self.expected_token_errors.clear(); + for (span, error) in self.pending_errors.drain(0..) { self.output.push_error(error, span); } } fn unexpected_token_error( &mut self, - token_str: &str, span: Span, - expected_tokens: &TokenSet, + expected_tokens: &'static TokenSet, description: Option<&'static str>, ) { - let tokens = self.expected_tokens.entry(span.start()).or_default(); + let tokens = + self.expected_token_errors.entry(span.clone()).or_default(); if let Some(description) = description { tokens.insert(description); } else { - tokens - .extend(expected_tokens.token_ids().map(|t| t.description())); + tokens.extend( + expected_tokens.token_ids().map(|token| token.description()), + ); } - let (last, all_except_last) = tokens.as_slice().split_last().unwrap(); - - let error_msg = if all_except_last.is_empty() { - format!("expecting {last}, found {}", token_str) - } else { - format!( - "expecting {} or {last}, found {}", - itertools::join(all_except_last.iter(), ", "), - token_str, - ) - }; - - self.pending_errors.push((error_msg, span)); - if self.opt_depth == 0 { - // Find the pending error starting at the largest offset. If several - // errors start at the same offset, the last one is used (this is - // guaranteed by the `max_by_key` function). `self.pending_errors` - // is left empty. - if let Some((error, span)) = self - .pending_errors - .drain(0..) - .max_by_key(|(_, span)| span.start()) + // From all the unexpected token errors, use the one at the largest + // offset. If several errors start at the same offset, the last one + // is used. `self.expected_tokens` is left empty. + if let Some((span, tokens)) = self + .expected_token_errors + .drain() + .max_by_key(|(span, _)| span.start()) { - match self.ready_errors.entry(span) { + match self.pending_errors.entry(span) { Entry::Occupied(_) => { // already present, don't replace. } - Entry::Vacant(v) => { - v.insert(error); + Entry::Vacant(entry) => { + let (last, all_except_last) = + tokens.as_slice().split_last().unwrap(); + + let error_msg = if all_except_last.is_empty() { + format!("expecting {last}") + } else { + format!( + "expecting {} or {last}", + itertools::join(all_except_last.iter(), ", "), + ) + }; + + entry.insert(error_msg); } } } diff --git a/parser-ng/src/parser/tests/testdata/basic-error-1.out b/parser-ng/src/parser/tests/testdata/basic-error-1.out index 134b3f9cd..62c4221fa 100644 --- a/parser-ng/src/parser/tests/testdata/basic-error-1.out +++ b/parser-ng/src/parser/tests/testdata/basic-error-1.out @@ -34,4 +34,4 @@ SOURCE_FILE@0..58 R_BRACE@57..58 "}" ERRORS: -- [11..12]: expecting `meta`, `strings` or `condition`, found `{` +- [11..12]: expecting `meta`, `strings` or `condition` diff --git a/parser-ng/src/parser/tests/testdata/hex-patterns-error-1.out b/parser-ng/src/parser/tests/testdata/hex-patterns-error-1.out index 0755f1b08..e41f2cd03 100644 --- a/parser-ng/src/parser/tests/testdata/hex-patterns-error-1.out +++ b/parser-ng/src/parser/tests/testdata/hex-patterns-error-1.out @@ -79,5 +79,5 @@ SOURCE_FILE@0..124 R_BRACE@123..124 "}" ERRORS: -- [40..41]: expecting BYTE or `(`, found `)` -- [100..101]: expecting BYTE or `(`, found `}` +- [40..41]: expecting BYTE or `(` +- [100..101]: expecting BYTE or `(` diff --git a/parser-ng/src/parser/tests/testdata/hex-patterns-error-2.out b/parser-ng/src/parser/tests/testdata/hex-patterns-error-2.out index 22b535107..ccd23f054 100644 --- a/parser-ng/src/parser/tests/testdata/hex-patterns-error-2.out +++ b/parser-ng/src/parser/tests/testdata/hex-patterns-error-2.out @@ -73,5 +73,5 @@ SOURCE_FILE@0..118 R_BRACE@117..118 "}" ERRORS: -- [33..34]: expecting `[`, BYTE, `(` or `}`, found INTEGER -- [93..94]: expecting `[`, BYTE, `(` or `}`, found identifier +- [33..34]: expecting `[`, BYTE, `(` or `}` +- [93..94]: expecting `[`, BYTE, `(` or `}` diff --git a/parser-ng/src/parser/tests/testdata/meta-error-1.out b/parser-ng/src/parser/tests/testdata/meta-error-1.out index 443404c86..ecb74f652 100644 --- a/parser-ng/src/parser/tests/testdata/meta-error-1.out +++ b/parser-ng/src/parser/tests/testdata/meta-error-1.out @@ -23,4 +23,4 @@ SOURCE_FILE@0..40 R_BRACE@39..40 "}" ERRORS: -- [14..17]: expecting `meta`, `strings` or `condition`, found identifier +- [14..17]: expecting `meta`, `strings` or `condition` diff --git a/parser-ng/src/parser/tests/testdata/meta-error-2.out b/parser-ng/src/parser/tests/testdata/meta-error-2.out index 55dec3a66..c504a5b3e 100644 --- a/parser-ng/src/parser/tests/testdata/meta-error-2.out +++ b/parser-ng/src/parser/tests/testdata/meta-error-2.out @@ -28,4 +28,4 @@ SOURCE_FILE@0..50 R_BRACE@49..50 "}" ERRORS: -- [30..39]: expecting `=`, found `condition` +- [30..39]: expecting `=` diff --git a/parser-ng/src/parser/tests/testdata/meta-error-3.out b/parser-ng/src/parser/tests/testdata/meta-error-3.out index 88a966529..34697e285 100644 --- a/parser-ng/src/parser/tests/testdata/meta-error-3.out +++ b/parser-ng/src/parser/tests/testdata/meta-error-3.out @@ -30,4 +30,4 @@ SOURCE_FILE@0..52 R_BRACE@51..52 "}" ERRORS: -- [32..41]: expecting `true`, `false`, INTEGER, FLOAT or STRING, found `condition` +- [32..41]: expecting `true`, `false`, INTEGER, FLOAT or STRING diff --git a/parser-ng/src/parser/tests/testdata/meta-error-4.out b/parser-ng/src/parser/tests/testdata/meta-error-4.out index 7cdaa47c0..2717d50d3 100644 --- a/parser-ng/src/parser/tests/testdata/meta-error-4.out +++ b/parser-ng/src/parser/tests/testdata/meta-error-4.out @@ -40,4 +40,4 @@ SOURCE_FILE@0..73 R_BRACE@72..73 "}" ERRORS: -- [30..34]: expecting `true`, `false`, INTEGER, FLOAT or STRING, found `meta` +- [30..34]: expecting `true`, `false`, INTEGER, FLOAT or STRING diff --git a/parser-ng/src/parser/tests/testdata/meta-error-5.out b/parser-ng/src/parser/tests/testdata/meta-error-5.out index cca29181d..2bd69c3c4 100644 --- a/parser-ng/src/parser/tests/testdata/meta-error-5.out +++ b/parser-ng/src/parser/tests/testdata/meta-error-5.out @@ -53,4 +53,4 @@ SOURCE_FILE@0..97 R_BRACE@96..97 "}" ERRORS: -- [30..33]: expecting `true`, `false`, INTEGER, FLOAT or STRING, found identifier +- [30..33]: expecting `true`, `false`, INTEGER, FLOAT or STRING diff --git a/parser-ng/src/parser/tests/testdata/meta-error-6.out b/parser-ng/src/parser/tests/testdata/meta-error-6.out index 9fae34403..81c1aa3b2 100644 --- a/parser-ng/src/parser/tests/testdata/meta-error-6.out +++ b/parser-ng/src/parser/tests/testdata/meta-error-6.out @@ -53,5 +53,5 @@ SOURCE_FILE@0..105 R_BRACE@104..105 "}" ERRORS: -- [32..39]: expecting `true`, `false`, INTEGER, FLOAT or STRING, found `strings` -- [44..47]: expecting `:`, found identifier +- [32..39]: expecting `true`, `false`, INTEGER, FLOAT or STRING +- [44..47]: expecting `:` diff --git a/parser-ng/src/parser/tests/testdata/pattern-mods-error-1.out b/parser-ng/src/parser/tests/testdata/pattern-mods-error-1.out index 00f14fc6d..4d6ef56c2 100644 --- a/parser-ng/src/parser/tests/testdata/pattern-mods-error-1.out +++ b/parser-ng/src/parser/tests/testdata/pattern-mods-error-1.out @@ -75,5 +75,5 @@ SOURCE_FILE@0..131 R_BRACE@130..131 "}" ERRORS: -- [36..39]: expecting pattern modifier, pattern identifier or `condition`, found identifier -- [107..108]: expecting STRING, found `)` +- [36..39]: expecting pattern modifier, pattern identifier or `condition` +- [107..108]: expecting STRING diff --git a/parser-ng/src/parser/tests/testdata/patterns-error-1.out b/parser-ng/src/parser/tests/testdata/patterns-error-1.out index 5016d8a60..ce64a772f 100644 --- a/parser-ng/src/parser/tests/testdata/patterns-error-1.out +++ b/parser-ng/src/parser/tests/testdata/patterns-error-1.out @@ -35,4 +35,4 @@ SOURCE_FILE@0..57 R_BRACE@56..57 "}" ERRORS: -- [37..46]: expecting `[`, BYTE, `(` or `}`, found `condition` +- [37..46]: expecting `[`, BYTE, `(` or `}` diff --git a/parser-ng/src/parser/tests/testdata/patterns-error-2.out b/parser-ng/src/parser/tests/testdata/patterns-error-2.out index c2fdbb50c..c9a93841c 100644 --- a/parser-ng/src/parser/tests/testdata/patterns-error-2.out +++ b/parser-ng/src/parser/tests/testdata/patterns-error-2.out @@ -42,4 +42,4 @@ SOURCE_FILE@0..70 R_BRACE@69..70 "}" ERRORS: -- [50..59]: expecting `[`, BYTE, `(` or `}`, found `condition` +- [50..59]: expecting `[`, BYTE, `(` or `}` diff --git a/parser-ng/src/parser/tests/testdata/patterns-error-3.out b/parser-ng/src/parser/tests/testdata/patterns-error-3.out index e115de155..39230ab21 100644 --- a/parser-ng/src/parser/tests/testdata/patterns-error-3.out +++ b/parser-ng/src/parser/tests/testdata/patterns-error-3.out @@ -47,4 +47,4 @@ SOURCE_FILE@0..89 R_BRACE@88..89 "}" ERRORS: -- [47..50]: expecting STRING, regexp or `{`, found identifier +- [47..50]: expecting STRING, regexp or `{` diff --git a/parser-ng/src/parser/tests/testdata/rule-tags-error-1.out b/parser-ng/src/parser/tests/testdata/rule-tags-error-1.out index 50118a215..cf56f0e7d 100644 --- a/parser-ng/src/parser/tests/testdata/rule-tags-error-1.out +++ b/parser-ng/src/parser/tests/testdata/rule-tags-error-1.out @@ -22,4 +22,4 @@ SOURCE_FILE@0..36 R_BRACE@35..36 "}" ERRORS: -- [12..13]: expecting identifier, found `{` +- [12..13]: expecting identifier diff --git a/parser-ng/src/parser/tests/testdata/rule-tags-error-2.out b/parser-ng/src/parser/tests/testdata/rule-tags-error-2.out index 0f734ffa8..70df95267 100644 --- a/parser-ng/src/parser/tests/testdata/rule-tags-error-2.out +++ b/parser-ng/src/parser/tests/testdata/rule-tags-error-2.out @@ -22,4 +22,4 @@ SOURCE_FILE@0..36 R_BRACE@35..36 "}" ERRORS: -- [10..11]: expecting `:` or `{`, found `=` +- [10..11]: expecting `:` or `{` diff --git a/parser-ng/src/parser/tests/testdata/rule-tags-error-3.out b/parser-ng/src/parser/tests/testdata/rule-tags-error-3.out index cae60d728..ffd6f6421 100644 --- a/parser-ng/src/parser/tests/testdata/rule-tags-error-3.out +++ b/parser-ng/src/parser/tests/testdata/rule-tags-error-3.out @@ -25,4 +25,4 @@ SOURCE_FILE@0..38 R_BRACE@37..38 "}" ERRORS: -- [12..13]: expecting identifier, found `=` +- [12..13]: expecting identifier