From 2d55a6b771fe675f19cdc4404d7eb439170cfd95 Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Tue, 2 Jul 2024 16:26:04 +0200 Subject: [PATCH] feat: improve error reporting and recovering --- Cargo.lock | 2 +- parser-ng/src/lib.rs | 7 + parser-ng/src/parser/cst/mod.rs | 2 +- parser-ng/src/parser/cst/syntax_kind.rs | 5 +- parser-ng/src/parser/mod.rs | 146 +++++++++++++++--- parser-ng/src/parser/tests/mod.rs | 6 +- parser-ng/src/parser/tests/testdata/2.out | 24 --- parser-ng/src/parser/tests/testdata/3.out | 29 ---- .../tests/testdata/{1.in => basic-1.in} | 0 .../tests/testdata/{1.out => basic-1.out} | 15 +- .../tests/testdata/{2.in => basic-2.in} | 0 .../src/parser/tests/testdata/basic-2.out | 25 +++ .../tests/testdata/{3.in => basic-3.in} | 0 .../src/parser/tests/testdata/basic-3.out | 30 ++++ .../tests/testdata/rule-tags-error-1.in | 4 + .../tests/testdata/rule-tags-error-1.out | 25 +++ .../tests/testdata/rule-tags-error-2.in | 4 + .../tests/testdata/rule-tags-error-2.out | 25 +++ .../tests/testdata/rule-tags-error-3.in | 4 + .../tests/testdata/rule-tags-error-3.out | 27 ++++ .../src/parser/tests/testdata/rule-tags.in | 4 + .../src/parser/tests/testdata/rule-tags.out | 26 ++++ parser-ng/src/tokenizer/tokens.rs | 32 ++++ 23 files changed, 356 insertions(+), 86 deletions(-) delete mode 100644 parser-ng/src/parser/tests/testdata/2.out delete mode 100644 parser-ng/src/parser/tests/testdata/3.out rename parser-ng/src/parser/tests/testdata/{1.in => basic-1.in} (100%) rename parser-ng/src/parser/tests/testdata/{1.out => basic-1.out} (54%) rename parser-ng/src/parser/tests/testdata/{2.in => basic-2.in} (100%) create mode 100644 parser-ng/src/parser/tests/testdata/basic-2.out rename parser-ng/src/parser/tests/testdata/{3.in => basic-3.in} (100%) create mode 100644 parser-ng/src/parser/tests/testdata/basic-3.out create mode 100644 parser-ng/src/parser/tests/testdata/rule-tags-error-1.in create mode 100644 parser-ng/src/parser/tests/testdata/rule-tags-error-1.out create mode 100644 parser-ng/src/parser/tests/testdata/rule-tags-error-2.in create mode 100644 parser-ng/src/parser/tests/testdata/rule-tags-error-2.out create mode 100644 parser-ng/src/parser/tests/testdata/rule-tags-error-3.in create mode 100644 parser-ng/src/parser/tests/testdata/rule-tags-error-3.out create mode 100644 parser-ng/src/parser/tests/testdata/rule-tags.in create mode 100644 parser-ng/src/parser/tests/testdata/rule-tags.out diff --git a/Cargo.lock b/Cargo.lock index adc9afdac..50868710f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5010,7 +5010,7 @@ dependencies = [ [[package]] name = "yara-x-parser-ng" -version = "0.4.0" +version = "0.5.0" dependencies = [ "globwalk", "goldenfile", diff --git a/parser-ng/src/lib.rs b/parser-ng/src/lib.rs index a57121cbf..c0ac2ddd7 100644 --- a/parser-ng/src/lib.rs +++ b/parser-ng/src/lib.rs @@ -21,6 +21,7 @@ Deciding whether to use a CST or AST depends on the kind of problem you want to solve. */ +use std::fmt::{Display, Formatter}; use std::ops::Range; mod parser; @@ -39,6 +40,12 @@ impl From for Span { } } +impl Display for Span { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "[{}..{}]", self.start(), self.end()) + } +} + impl Span { const MAX: usize = u32::MAX as usize; diff --git a/parser-ng/src/parser/cst/mod.rs b/parser-ng/src/parser/cst/mod.rs index 0f36247d2..26e79cfbe 100644 --- a/parser-ng/src/parser/cst/mod.rs +++ b/parser-ng/src/parser/cst/mod.rs @@ -36,7 +36,7 @@ impl Debug for CST { if !self.errors.is_empty() { writeln!(f, "\nERRORS:")?; for (span, err) in &self.errors { - writeln!(f, "- {:?}: {}", span, err)?; + writeln!(f, "- {}: {}", span, err)?; } } Ok(()) diff --git a/parser-ng/src/parser/cst/syntax_kind.rs b/parser-ng/src/parser/cst/syntax_kind.rs index 3e0627776..5d6a3f5de 100644 --- a/parser-ng/src/parser/cst/syntax_kind.rs +++ b/parser-ng/src/parser/cst/syntax_kind.rs @@ -29,18 +29,19 @@ pub enum SyntaxKind { WHITESPACE, NEWLINE, - ERROR, IDENT, IMPORT_STMT, RULE_DECL, RULE_MODS, RULE_TAGS, - CONDITION, + CONDITION_BLK, META_DEF, META_DEFS, SOURCE_FILE, BOOLEAN_EXPR, BOOLEAN_TERM, + + ERROR, } impl From for rowan::SyntaxKind { diff --git a/parser-ng/src/parser/mod.rs b/parser-ng/src/parser/mod.rs index c684bb099..ca8b3e7d7 100644 --- a/parser-ng/src/parser/mod.rs +++ b/parser-ng/src/parser/mod.rs @@ -2,14 +2,20 @@ The parser receives a sequence of tokens produced by the [`Tokenizer`], and produces a Concrete Syntax-Tree ([`CST`]), also known as a lossless syntax -tree. +tree. The CST is initially represented as a stream of [events][`Event`], but +this stream is later converted to a tree using the [rowan][2] create. -Under the hood, the parser uses the [rowan][2] create. +This parser is error-tolerant, it is able to parse YARA code that contains +syntax errors. After each error, the parser recovers and keeps parsing the +remaining code. The resulting CST may contain error nodes containing portions +of the code that are not syntactically correct, but anything outside of those +error nodes is valid YARA code. [1]: https://en.wikipedia.org/wiki/Parsing_expression_grammar [2]: https://github.com/rust-analyzer/rowan */ +use std::collections::HashMap; use std::mem; pub mod cst; @@ -57,6 +63,9 @@ impl<'src> Parser<'src> { struct InternalParser<'src> { tokens: TokenStream<'src>, output: SyntaxStream, + pending_errors: Vec<(String, Span)>, + expected_tokens: HashMap>, + opt_depth: usize, failed: bool, } @@ -66,6 +75,9 @@ impl<'src> From> for InternalParser<'src> { Self { tokens: TokenStream::new(tokenizer), output: SyntaxStream::new(), + pending_errors: Vec::new(), + expected_tokens: HashMap::new(), + opt_depth: 0, failed: false, } } @@ -89,6 +101,7 @@ impl Iterator for InternalParser<'_> { if self.output.is_empty() && self.tokens.has_more() { let _ = self.ws(); let _ = self.top_level_item(); + self.failed = false; } self.output.pop() } @@ -164,28 +177,85 @@ impl<'src> InternalParser<'src> { self } + fn recover(&mut self, tokens: &TokenSet) -> &mut Self { + match self.peek() { + None => {} + Some(token) if tokens.contains(token) => {} + Some(_) => { + self.output.begin(SyntaxKind::ERROR); + while let Some(token) = self.peek() { + if tokens.contains(token) { + break; + } else { + self.bump(); + } + } + self.output.end(); + } + } + self.failed = false; + self + } + + fn expect_and_recover(&mut self, tokens: &TokenSet) -> &mut Self { + self.expect(tokens); + if self.failed { + self.recover(tokens); + self.bump(); + } + self + } + /// Expects one of the tokens in `expected_tokens`. /// /// If the next token is not one of the expected ones, the parser enters /// the failed state. - fn expect(&mut self, expected_tokens: &[Token]) -> &mut Self { + fn expect(&mut self, tokens: &TokenSet) -> &mut Self { let token = match self.peek() { - Some(token) => token, None => { self.failed = true; return self; } + Some(token) if tokens.contains(token) => { + self.bump(); + return self; + } + Some(token) => token, }; - if expected_tokens.iter().any(|expected| { - mem::discriminant(expected) == mem::discriminant(token) - }) { - self.bump(); + + let span = token.span(); + let token_str = token.as_str(); + + let expected_tokens = + self.expected_tokens.entry(span.start()).or_default(); + + expected_tokens.extend(tokens.iter().map(|t| t.as_str())); + + let (last, all_except_last) = expected_tokens.split_last().unwrap(); + + let error_msg = if all_except_last.is_empty() { + format!("expecting {last}, found {}", token_str) } else { - let span = token.span(); - self.bump(); - self.output.push_error("foo", span); - self.failed = true; + format!( + "expecting {} or {last}, found {}", + all_except_last.join(", "), + token_str, + ) + }; + + self.pending_errors.push((error_msg, span)); + + if self.opt_depth == 0 { + if let Some((error, span)) = self + .pending_errors + .drain(0..) + .max_by_key(|(_, span)| span.start()) + { + self.output.push_error(error, span); + } } + + self.failed = true; self } @@ -203,7 +273,10 @@ impl<'src> InternalParser<'src> { } let bookmark = self.bookmark(); + + self.opt_depth += 1; p(self); + self.opt_depth -= 1; // Any error occurred while parsing the optional production is ignored. if self.failed { @@ -253,7 +326,9 @@ impl<'src> InternalParser<'src> { if !self.failed { loop { let bookmark = self.bookmark(); + self.opt_depth += 1; p(self); + self.opt_depth -= 1; if self.failed { self.failed = false; self.restore_bookmark(&bookmark); @@ -300,7 +375,7 @@ use Token::*; macro_rules! t { ($( $tokens:path )|*) => { - &[$( $tokens(Span::default()) ),*] + &TokenSet(&[$( $tokens(Span::default()) ),*]) }; } @@ -413,17 +488,16 @@ impl<'src> InternalParser<'src> { .ws() .expect(t!(IDENT)) .ws() - .expect(t!(L_BRACE)) + .opt(|p| p.rule_tags()) + .ws() + .expect_and_recover(t!(L_BRACE)) .ws() .opt(|p| p.meta_defs()) .ws() .opt(|p| p.pattern_defs()) .ws() - .expect(t!(CONDITION_KW)) - .ws() - .expect(t!(COLON)) - .ws() - .one(|p| p.boolean_expr()) + .recover(t!(CONDITION_KW)) + .one(|p| p.condition_blk()) .ws() .expect(t!(R_BRACE)) .end() @@ -447,6 +521,13 @@ impl<'src> InternalParser<'src> { .end() } + fn rule_tags(&mut self) -> &mut Self { + self.begin(SyntaxKind::RULE_TAGS) + .expect(t!(COLON)) + .one_or_more(|p| p.ws().expect(t!(IDENT))) + .end() + } + /// Parses metadata definitions /// /// ```text @@ -500,6 +581,16 @@ impl<'src> InternalParser<'src> { todo!() } + fn condition_blk(&mut self) -> &mut Self { + self.begin(SyntaxKind::CONDITION_BLK) + .expect(t!(CONDITION_KW)) + .ws() + .expect(t!(COLON)) + .ws() + .one(|p| p.boolean_expr()) + .end() + } + fn hex_pattern(&mut self) -> &mut Self { todo!() } @@ -564,6 +655,23 @@ struct Bookmark { output: syntax_stream::Bookmark, } +struct TokenSet<'a>(&'a [Token]); + +impl<'a> TokenSet<'a> { + #[inline] + fn is_empty(&self) -> bool { + self.0.is_empty() + } + + fn contains(&self, token: &Token) -> bool { + self.0.iter().any(|t| mem::discriminant(t) == mem::discriminant(token)) + } + + fn iter(&self) -> impl Iterator { + self.0.iter() + } +} + struct Alt<'a, 'src> { parser: &'a mut InternalParser<'src>, matched: bool, diff --git a/parser-ng/src/parser/tests/mod.rs b/parser-ng/src/parser/tests/mod.rs index 7b44301c3..f51346bc2 100644 --- a/parser-ng/src/parser/tests/mod.rs +++ b/parser-ng/src/parser/tests/mod.rs @@ -10,9 +10,9 @@ use crate::Parser; fn test() { let cst = CST::from(Parser::new( r#" -rule test { - condition: - true and false +rule test : { + condition: + true } "# .as_bytes(), diff --git a/parser-ng/src/parser/tests/testdata/2.out b/parser-ng/src/parser/tests/testdata/2.out deleted file mode 100644 index 670757609..000000000 --- a/parser-ng/src/parser/tests/testdata/2.out +++ /dev/null @@ -1,24 +0,0 @@ -SOURCE_FILE@0..43 - RULE_DECL@0..42 - RULE_KW@0..4 "rule" - WHITESPACE@4..5 " " - IDENT@5..9 "test" - WHITESPACE@9..10 " " - L_BRACE@10..11 "{" - NEWLINE@11..12 "\n" - WHITESPACE@12..13 "\t" - CONDITION_KW@13..22 "condition" - COLON@22..23 ":" - NEWLINE@23..24 "\n" - WHITESPACE@24..26 "\t\t" - BOOLEAN_EXPR@26..40 - BOOLEAN_TERM@26..30 - TRUE_KW@26..30 "true" - WHITESPACE@30..31 " " - AND_KW@31..34 "and" - WHITESPACE@34..35 " " - BOOLEAN_TERM@35..40 - FALSE_KW@35..40 "false" - NEWLINE@40..41 "\n" - R_BRACE@41..42 "}" - NEWLINE@42..43 "\n" diff --git a/parser-ng/src/parser/tests/testdata/3.out b/parser-ng/src/parser/tests/testdata/3.out deleted file mode 100644 index 6b55af705..000000000 --- a/parser-ng/src/parser/tests/testdata/3.out +++ /dev/null @@ -1,29 +0,0 @@ -SOURCE_FILE@0..52 - RULE_DECL@0..51 - RULE_KW@0..4 "rule" - WHITESPACE@4..5 " " - IDENT@5..9 "test" - WHITESPACE@9..10 " " - L_BRACE@10..11 "{" - NEWLINE@11..12 "\n" - WHITESPACE@12..13 "\t" - CONDITION_KW@13..22 "condition" - COLON@22..23 ":" - NEWLINE@23..24 "\n" - WHITESPACE@24..26 "\t\t" - BOOLEAN_EXPR@26..49 - BOOLEAN_TERM@26..30 - TRUE_KW@26..30 "true" - WHITESPACE@30..31 " " - AND_KW@31..34 "and" - WHITESPACE@34..35 " " - BOOLEAN_TERM@35..40 - FALSE_KW@35..40 "false" - WHITESPACE@40..41 " " - OR_KW@41..43 "or" - WHITESPACE@43..44 " " - BOOLEAN_TERM@44..49 - FALSE_KW@44..49 "false" - NEWLINE@49..50 "\n" - R_BRACE@50..51 "}" - NEWLINE@51..52 "\n" diff --git a/parser-ng/src/parser/tests/testdata/1.in b/parser-ng/src/parser/tests/testdata/basic-1.in similarity index 100% rename from parser-ng/src/parser/tests/testdata/1.in rename to parser-ng/src/parser/tests/testdata/basic-1.in diff --git a/parser-ng/src/parser/tests/testdata/1.out b/parser-ng/src/parser/tests/testdata/basic-1.out similarity index 54% rename from parser-ng/src/parser/tests/testdata/1.out rename to parser-ng/src/parser/tests/testdata/basic-1.out index 7cc0164e3..1054d1c48 100644 --- a/parser-ng/src/parser/tests/testdata/1.out +++ b/parser-ng/src/parser/tests/testdata/basic-1.out @@ -7,13 +7,14 @@ SOURCE_FILE@0..33 L_BRACE@10..11 "{" NEWLINE@11..12 "\n" WHITESPACE@12..13 "\t" - CONDITION_KW@13..22 "condition" - COLON@22..23 ":" - NEWLINE@23..24 "\n" - WHITESPACE@24..26 "\t\t" - BOOLEAN_EXPR@26..30 - BOOLEAN_TERM@26..30 - TRUE_KW@26..30 "true" + CONDITION_BLK@13..30 + CONDITION_KW@13..22 "condition" + COLON@22..23 ":" + NEWLINE@23..24 "\n" + WHITESPACE@24..26 "\t\t" + BOOLEAN_EXPR@26..30 + BOOLEAN_TERM@26..30 + TRUE_KW@26..30 "true" NEWLINE@30..31 "\n" R_BRACE@31..32 "}" NEWLINE@32..33 "\n" diff --git a/parser-ng/src/parser/tests/testdata/2.in b/parser-ng/src/parser/tests/testdata/basic-2.in similarity index 100% rename from parser-ng/src/parser/tests/testdata/2.in rename to parser-ng/src/parser/tests/testdata/basic-2.in diff --git a/parser-ng/src/parser/tests/testdata/basic-2.out b/parser-ng/src/parser/tests/testdata/basic-2.out new file mode 100644 index 000000000..51e24c80a --- /dev/null +++ b/parser-ng/src/parser/tests/testdata/basic-2.out @@ -0,0 +1,25 @@ +SOURCE_FILE@0..43 + RULE_DECL@0..42 + RULE_KW@0..4 "rule" + WHITESPACE@4..5 " " + IDENT@5..9 "test" + WHITESPACE@9..10 " " + L_BRACE@10..11 "{" + NEWLINE@11..12 "\n" + WHITESPACE@12..13 "\t" + CONDITION_BLK@13..40 + CONDITION_KW@13..22 "condition" + COLON@22..23 ":" + NEWLINE@23..24 "\n" + WHITESPACE@24..26 "\t\t" + BOOLEAN_EXPR@26..40 + BOOLEAN_TERM@26..30 + TRUE_KW@26..30 "true" + WHITESPACE@30..31 " " + AND_KW@31..34 "and" + WHITESPACE@34..35 " " + BOOLEAN_TERM@35..40 + FALSE_KW@35..40 "false" + NEWLINE@40..41 "\n" + R_BRACE@41..42 "}" + NEWLINE@42..43 "\n" diff --git a/parser-ng/src/parser/tests/testdata/3.in b/parser-ng/src/parser/tests/testdata/basic-3.in similarity index 100% rename from parser-ng/src/parser/tests/testdata/3.in rename to parser-ng/src/parser/tests/testdata/basic-3.in diff --git a/parser-ng/src/parser/tests/testdata/basic-3.out b/parser-ng/src/parser/tests/testdata/basic-3.out new file mode 100644 index 000000000..4c9feacea --- /dev/null +++ b/parser-ng/src/parser/tests/testdata/basic-3.out @@ -0,0 +1,30 @@ +SOURCE_FILE@0..52 + RULE_DECL@0..51 + RULE_KW@0..4 "rule" + WHITESPACE@4..5 " " + IDENT@5..9 "test" + WHITESPACE@9..10 " " + L_BRACE@10..11 "{" + NEWLINE@11..12 "\n" + WHITESPACE@12..13 "\t" + CONDITION_BLK@13..49 + CONDITION_KW@13..22 "condition" + COLON@22..23 ":" + NEWLINE@23..24 "\n" + WHITESPACE@24..26 "\t\t" + BOOLEAN_EXPR@26..49 + BOOLEAN_TERM@26..30 + TRUE_KW@26..30 "true" + WHITESPACE@30..31 " " + AND_KW@31..34 "and" + WHITESPACE@34..35 " " + BOOLEAN_TERM@35..40 + FALSE_KW@35..40 "false" + WHITESPACE@40..41 " " + OR_KW@41..43 "or" + WHITESPACE@43..44 " " + BOOLEAN_TERM@44..49 + FALSE_KW@44..49 "false" + NEWLINE@49..50 "\n" + R_BRACE@50..51 "}" + NEWLINE@51..52 "\n" diff --git a/parser-ng/src/parser/tests/testdata/rule-tags-error-1.in b/parser-ng/src/parser/tests/testdata/rule-tags-error-1.in new file mode 100644 index 000000000..ee0f6642d --- /dev/null +++ b/parser-ng/src/parser/tests/testdata/rule-tags-error-1.in @@ -0,0 +1,4 @@ +rule test : { + condition: + true +} \ No newline at end of file diff --git a/parser-ng/src/parser/tests/testdata/rule-tags-error-1.out b/parser-ng/src/parser/tests/testdata/rule-tags-error-1.out new file mode 100644 index 000000000..1573b3539 --- /dev/null +++ b/parser-ng/src/parser/tests/testdata/rule-tags-error-1.out @@ -0,0 +1,25 @@ +SOURCE_FILE@0..36 + RULE_DECL@0..36 + RULE_KW@0..4 "rule" + WHITESPACE@4..5 " " + IDENT@5..9 "test" + WHITESPACE@9..10 " " + ERROR@10..12 + COLON@10..11 ":" + WHITESPACE@11..12 " " + L_BRACE@12..13 "{" + NEWLINE@13..14 "\n" + WHITESPACE@14..16 " " + CONDITION_BLK@16..34 + CONDITION_KW@16..25 "condition" + COLON@25..26 ":" + NEWLINE@26..27 "\n" + WHITESPACE@27..30 "\t " + BOOLEAN_EXPR@30..34 + BOOLEAN_TERM@30..34 + TRUE_KW@30..34 "true" + NEWLINE@34..35 "\n" + R_BRACE@35..36 "}" + +ERRORS: +- [12..13]: expecting IDENT, found `{` diff --git a/parser-ng/src/parser/tests/testdata/rule-tags-error-2.in b/parser-ng/src/parser/tests/testdata/rule-tags-error-2.in new file mode 100644 index 000000000..177840ded --- /dev/null +++ b/parser-ng/src/parser/tests/testdata/rule-tags-error-2.in @@ -0,0 +1,4 @@ +rule test = { + condition: + true +} \ No newline at end of file diff --git a/parser-ng/src/parser/tests/testdata/rule-tags-error-2.out b/parser-ng/src/parser/tests/testdata/rule-tags-error-2.out new file mode 100644 index 000000000..0f734ffa8 --- /dev/null +++ b/parser-ng/src/parser/tests/testdata/rule-tags-error-2.out @@ -0,0 +1,25 @@ +SOURCE_FILE@0..36 + RULE_DECL@0..36 + RULE_KW@0..4 "rule" + WHITESPACE@4..5 " " + IDENT@5..9 "test" + WHITESPACE@9..10 " " + ERROR@10..12 + EQUAL@10..11 "=" + WHITESPACE@11..12 " " + L_BRACE@12..13 "{" + NEWLINE@13..14 "\n" + WHITESPACE@14..16 " " + CONDITION_BLK@16..34 + CONDITION_KW@16..25 "condition" + COLON@25..26 ":" + NEWLINE@26..27 "\n" + WHITESPACE@27..30 "\t " + BOOLEAN_EXPR@30..34 + BOOLEAN_TERM@30..34 + TRUE_KW@30..34 "true" + NEWLINE@34..35 "\n" + R_BRACE@35..36 "}" + +ERRORS: +- [10..11]: expecting `:` or `{`, found `=` diff --git a/parser-ng/src/parser/tests/testdata/rule-tags-error-3.in b/parser-ng/src/parser/tests/testdata/rule-tags-error-3.in new file mode 100644 index 000000000..8c1601051 --- /dev/null +++ b/parser-ng/src/parser/tests/testdata/rule-tags-error-3.in @@ -0,0 +1,4 @@ +rule test : = { + condition: + true +} \ No newline at end of file diff --git a/parser-ng/src/parser/tests/testdata/rule-tags-error-3.out b/parser-ng/src/parser/tests/testdata/rule-tags-error-3.out new file mode 100644 index 000000000..9258208df --- /dev/null +++ b/parser-ng/src/parser/tests/testdata/rule-tags-error-3.out @@ -0,0 +1,27 @@ +SOURCE_FILE@0..38 + RULE_DECL@0..38 + RULE_KW@0..4 "rule" + WHITESPACE@4..5 " " + IDENT@5..9 "test" + WHITESPACE@9..10 " " + ERROR@10..14 + COLON@10..11 ":" + WHITESPACE@11..12 " " + EQUAL@12..13 "=" + WHITESPACE@13..14 " " + L_BRACE@14..15 "{" + NEWLINE@15..16 "\n" + WHITESPACE@16..18 " " + CONDITION_BLK@18..36 + CONDITION_KW@18..27 "condition" + COLON@27..28 ":" + NEWLINE@28..29 "\n" + WHITESPACE@29..32 "\t " + BOOLEAN_EXPR@32..36 + BOOLEAN_TERM@32..36 + TRUE_KW@32..36 "true" + NEWLINE@36..37 "\n" + R_BRACE@37..38 "}" + +ERRORS: +- [12..13]: expecting IDENT, found `=` diff --git a/parser-ng/src/parser/tests/testdata/rule-tags.in b/parser-ng/src/parser/tests/testdata/rule-tags.in new file mode 100644 index 000000000..fbe99549b --- /dev/null +++ b/parser-ng/src/parser/tests/testdata/rule-tags.in @@ -0,0 +1,4 @@ +rule test : foo bar { + condition: + true +} \ No newline at end of file diff --git a/parser-ng/src/parser/tests/testdata/rule-tags.out b/parser-ng/src/parser/tests/testdata/rule-tags.out new file mode 100644 index 000000000..5bd85af5a --- /dev/null +++ b/parser-ng/src/parser/tests/testdata/rule-tags.out @@ -0,0 +1,26 @@ +SOURCE_FILE@0..44 + RULE_DECL@0..44 + RULE_KW@0..4 "rule" + WHITESPACE@4..5 " " + IDENT@5..9 "test" + WHITESPACE@9..10 " " + RULE_TAGS@10..19 + COLON@10..11 ":" + WHITESPACE@11..12 " " + IDENT@12..15 "foo" + WHITESPACE@15..16 " " + IDENT@16..19 "bar" + WHITESPACE@19..20 " " + L_BRACE@20..21 "{" + NEWLINE@21..22 "\n" + WHITESPACE@22..24 " " + CONDITION_BLK@24..42 + CONDITION_KW@24..33 "condition" + COLON@33..34 ":" + NEWLINE@34..35 "\n" + WHITESPACE@35..38 "\t " + BOOLEAN_EXPR@38..42 + BOOLEAN_TERM@38..42 + TRUE_KW@38..42 "true" + NEWLINE@42..43 "\n" + R_BRACE@43..44 "}" diff --git a/parser-ng/src/tokenizer/tokens.rs b/parser-ng/src/tokenizer/tokens.rs index aa2d0239c..4621f894b 100644 --- a/parser-ng/src/tokenizer/tokens.rs +++ b/parser-ng/src/tokenizer/tokens.rs @@ -89,4 +89,36 @@ impl Token { | Token::INVALID_UTF8(span) => span.clone(), } } + + pub fn as_str(&self) -> &'static str { + match self { + Token::AND_KW(_) => "`and`", + Token::CONDITION_KW(_) => "`condition`", + Token::FALSE_KW(_) => "`false`", + Token::GLOBAL_KW(_) => "`global`", + Token::IMPORT_KW(_) => "`import`", + Token::META_KW(_) => "`meta`", + Token::NOT_KW(_) => "`not`", + Token::OR_KW(_) => "`or`", + Token::PRIVATE_KW(_) => "`private`", + Token::RULE_KW(_) => "`rule`", + Token::TRUE_KW(_) => "`true`", + Token::FLOAT_LIT(_) => "FLOAT", + Token::INTEGER_LIT(_) => "INTEGER", + Token::STRING_LIT(_) => "STRING", + Token::IDENT(_) => "IDENT", + Token::COLON(_) => "`:`", + Token::EQUAL(_) => "`=`", + Token::L_BRACE(_) => "`{`", + Token::R_BRACE(_) => "`}`", + Token::L_PAREN(_) => "`(`", + Token::R_PAREN(_) => "`)`", + Token::HEX_BYTE(_) => "BYTE", + Token::COMMENT(_) => "comment", + Token::NEWLINE(_) => "newline", + Token::WHITESPACE(_) => "whitespace", + Token::UNKNOWN(_) => "unknown", + Token::INVALID_UTF8(_) => unreachable!() + } + } }