Skip to content

Commit

Permalink
fix: some issues in the parser.
Browse files Browse the repository at this point in the history
  • Loading branch information
plusvic committed Jul 4, 2024
1 parent 38ef3ea commit 17dc81d
Show file tree
Hide file tree
Showing 11 changed files with 141 additions and 36 deletions.
80 changes: 64 additions & 16 deletions parser-ng/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -291,10 +291,6 @@ impl<'src> InternalParser<'src> {
fn check(&mut self, expected_tokens: &TokenSet) -> &mut Self {
assert!(!expected_tokens.is_empty());

if self.failed {
return self;
}

let token = match self.peek() {
None => {
self.failed = true;
Expand Down Expand Up @@ -327,6 +323,10 @@ impl<'src> InternalParser<'src> {
self.pending_errors.push((error_msg, span));

if self.opt_depth == 0 {
// Find the pending error starting at the largest offset. If several
// errors start at the same offset, the last one is used (this is
// guaranteed by the `max_by_key` function). `self.pending_errors`
// is left empty.
if let Some((error, span)) = self
.pending_errors
.drain(0..)
Expand Down Expand Up @@ -445,6 +445,7 @@ impl<'src> InternalParser<'src> {
// can't fail again at any earlier position.
if self.opt_depth == 0 {
self.expected_tokens.clear();
self.pending_errors.clear();
}
}

Expand Down Expand Up @@ -511,6 +512,51 @@ impl<'src> InternalParser<'src> {
self
}

/// If the next token matches one of the expected tokens, applies `parser`.
///
/// `if_found(TOKEN, |p| p.expect(TOKEN))` is logically equivalent to
/// `opt(|p| p.expect(TOKEN))`, but the former is more efficient because it
/// doesn't do any backtracking. The closure `|p| p.expect(TOKEN)` is
/// executed only after we are sure that the next token is `TOKEN`.
///
/// This can be used for replacing `opt` when the optional production can
/// be unequivocally distinguished by its first token. For instance, in a
/// YARA rule the metadata section is optional, but always starts with
/// the `meta` keyword, so, instead of:
///
/// `opt(|p| p.meta_blk()`)
///
/// We can use:
///
/// `if_found(t!(META_KW), |p| p.meta_blk())`
///
fn if_found<P>(
&mut self,
expected_tokens: &TokenSet,
parser: P,
) -> &mut Self
where
P: Fn(&mut Self) -> &mut Self,
{
if self.failed {
return self;
}
match self.peek() {
None => {}
Some(token) => {
if expected_tokens.contains(token) {
parser(self);
} else {
let span = token.span();
let tokens =
self.expected_tokens.entry(span.start()).or_default();
tokens.extend(expected_tokens.iter().map(|t| t.as_str()));
}
}
}
self
}

/// Applies `parser` zero or more times.
#[inline]
fn zero_or_more<P>(&mut self, parser: P) -> &mut Self
Expand Down Expand Up @@ -572,9 +618,6 @@ impl<'src> InternalParser<'src> {
return self;
}
parser(self);
if self.failed {
self.failed = true;
}
self
}

Expand Down Expand Up @@ -711,13 +754,13 @@ impl<'src> InternalParser<'src> {
.ws()
.expect(t!(IDENT))
.ws()
.opt(|p| p.rule_tags())
.if_found(t!(COLON), |p| p.rule_tags())
.ws()
.expect_and_recover(t!(L_BRACE))
.ws()
.opt(|p| p.meta_blk())
.if_found(t!(META_KW), |p| p.meta_blk())
.ws()
.opt(|p| p.patterns_blk())
.if_found(t!(STRINGS_KW), |p| p.patterns_blk())
.ws()
.check_and_recover(t!(CONDITION_KW))
.one(|p| p.condition_blk())
Expand Down Expand Up @@ -806,6 +849,8 @@ impl<'src> InternalParser<'src> {
.ws()
.expect(t!(COLON))
.one_or_more(|p| p.ws().pattern_def())
//.ws()
//.check_and_recover(t!(CONDITION_KW))
.end()
}

Expand All @@ -825,18 +870,19 @@ impl<'src> InternalParser<'src> {
.expect(t!(EQUAL))
.ws()
.begin_alt()
.alt(|p| {
p.expect(t!(STRING_LIT | REGEXP))
.opt(|p| p.ws().pattern_mods())
})
.alt(|p| p.hex_pattern().opt(|p| p.ws().pattern_mods()))
.alt(|p| p.expect(t!(STRING_LIT)))
.alt(|p| p.expect(t!(REGEXP)))
.alt(|p| p.hex_pattern())
.end_alt()
.opt(|p| p.ws().pattern_mods())
.end()
}

fn pattern_mods(&mut self) -> &mut Self {
// TODO
self.begin(SyntaxKind::PATTERN_MODS).expect(t!(PRIVATE_KW)).end()
self.begin(SyntaxKind::PATTERN_MODS)
.expect(t!(ASCII_KW | WIDE_KW | PRIVATE_KW))
.end()
}

/// Parses the condition block.
Expand Down Expand Up @@ -959,7 +1005,9 @@ impl<'a, 'src> Alt<'a, 'src> {
// Don't try to match the current alternative if the parser a previous
// one already matched.
if !self.matched {
self.parser.opt_depth += 1;
self.parser = f(self.parser);
self.parser.opt_depth -= 1;
match self.parser.failed {
// The current alternative matched.
false => {
Expand Down
2 changes: 1 addition & 1 deletion parser-ng/src/parser/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ fn test() {
r#"
rule test {
strings:
$c = { ab
$c = { ab }
condition:
true
}
Expand Down
2 changes: 1 addition & 1 deletion parser-ng/src/parser/tests/testdata/meta-errors-1.out
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ [email protected]
[email protected] "}"

ERRORS:
- [14..17]: expecting `meta`, `strings` or `condition`, found IDENT
- [14..17]: expecting `meta`, `strings` or `condition`, found identifier
7 changes: 4 additions & 3 deletions parser-ng/src/parser/tests/testdata/meta-errors-2.out
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@ [email protected]
[email protected] ":"
[email protected] "\n"
[email protected] " "
[email protected] "foo"
[email protected] "\n"
[email protected] " "
[email protected]
[email protected] "foo"
[email protected] "\n"
[email protected] " "
[email protected]
[email protected] "condition"
[email protected] ":"
Expand Down
11 changes: 6 additions & 5 deletions parser-ng/src/parser/tests/testdata/meta-errors-3.out
12 changes: 7 additions & 5 deletions parser-ng/src/parser/tests/testdata/patterns-error-1.out
7 changes: 7 additions & 0 deletions parser-ng/src/parser/tests/testdata/patterns-error-2.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
rule test {
strings:
$b = "foo"
$a = { ab
condition:
true
}
2 changes: 1 addition & 1 deletion parser-ng/src/parser/tests/testdata/rule-tags-error-1.out
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@ [email protected]
[email protected] "}"

ERRORS:
- [12..13]: expecting IDENT, found `{`
- [12..13]: expecting identifier, found `{`
5 changes: 3 additions & 2 deletions parser-ng/src/parser/tests/testdata/rule-tags-error-3.out
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ [email protected]
[email protected] " "
[email protected] "test"
[email protected] " "
ERROR@10..14
ERROR@10..12
[email protected] ":"
[email protected] " "
[email protected]
[email protected] "="
[email protected] " "
[email protected] "{"
Expand All @@ -24,4 +25,4 @@ [email protected]
[email protected] "}"

ERRORS:
- [12..13]: expecting IDENT, found `=`
- [12..13]: expecting identifier, found `=`
4 changes: 2 additions & 2 deletions parser-ng/src/tokenizer/tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -282,8 +282,8 @@ impl Token {
Token::FLOAT_LIT(_) => "FLOAT",
Token::INTEGER_LIT(_) => "INTEGER",
Token::STRING_LIT(_) => "STRING",
Token::IDENT(_) => "IDENT",
Token::PATTERN_IDENT(_) => "IDENT",
Token::IDENT(_) => "identifier",
Token::PATTERN_IDENT(_) => "pattern identifier",
Token::HEX_BYTE(_) => "BYTE",
Token::COMMENT(_) => "comment",
Token::NEWLINE(_) => "newline",
Expand Down

0 comments on commit 17dc81d

Please sign in to comment.