Skip to content

Commit

Permalink
Merge pull request #2 from TommYDeeee/parser
Browse files Browse the repository at this point in the history
Parser
  • Loading branch information
TommYDeeee authored Jan 29, 2024
2 parents 4cbe27b + 22eedaf commit 5fc4cff
Show file tree
Hide file tree
Showing 28 changed files with 954 additions and 43 deletions.
425 changes: 424 additions & 1 deletion Cargo.lock

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,7 @@ logos = "0.13.0"
rowan-test = { git = "https://github.com/avast/avast-rowan.git" }
text-size = "1.1.1"
drop_bomb = "0.1.5"

[dev-dependencies]
goldenfile = "1.6.0"
globwalk = "0.9.1"
4 changes: 2 additions & 2 deletions example.yar
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@ rule test
$a = "foo"
$b = "bar"
condition:
$a and
$b
$a or
$b and true
}
67 changes: 54 additions & 13 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/// This library is used to create a parser for YARA language
/// It should provide also token for whitespaces
/// as we want full fidelity and error resilience.;
use std::{env::args, fs, path::Path};
use std::{env::args, fs, io::Write, path::Path};

use rowan_test::{GreenNode, NodeOrToken};

Expand Down Expand Up @@ -42,27 +42,68 @@ fn parse_text(text: &str) -> (GreenNode, Vec<SyntaxError>) {
println!();

let indent = 0;
print(indent, syntax_tree.into());
//for child in syntax_tree.children() {
// print!("{:indent$}", "", indent = indent);
// println!("{:?}", child.kind());
// println!("{:?}", child.green().children());
//}
let result = print(indent, syntax_tree.into());

print!("{}", result);

(tree, parser_errors)
}

fn print(indent: usize, element: SyntaxElement) {
let kind: SyntaxKind = element.kind().into();
print!("{:indent$}", "", indent = indent);
fn print(indent: usize, element: SyntaxElement) -> String {
let mut result = String::new();
let kind: SyntaxKind = element.kind();
result.push_str(&format!("{:indent$}", "", indent = indent));
match element {
NodeOrToken::Node(node) => {
println!("- {:?}", kind);
result.push_str(&format!("- {:?}\n", kind));
for child in node.children_with_tokens() {
print(indent + 2, child);
result.push_str(&print(indent + 2, child));
}
}

NodeOrToken::Token(token) => println!("- {:?} {:?}", token.text(), kind),
NodeOrToken::Token(token) => {
result.push_str(&format!("- {:?} {:?}\n", token.text(), kind));
}
}
result
}

#[test]
fn test_parse_text() {
let mut mint = goldenfile::Mint::new(".");

for entry in globwalk::glob("tests/*.in").unwrap().flatten() {
// Path to the .in.zip file.
let path = entry.into_path();
let display_path = path.display();

let input = fs::read_to_string(&path)
.unwrap_or_else(|_| panic!("Failed to read input file {:?}", display_path));

let (tree, errors) = parse_text(&input);

let out_path = path.with_extension("").with_extension("out");
let syntax_tree = SyntaxNode::new_root(tree.clone());

let output = print(0, syntax_tree.into());

let mut output_file = mint.new_goldenfile(out_path).unwrap();

write!(output_file, "{}", output).unwrap();

// Check errors
let err_path = path.with_extension("").with_extension("err");
if err_path.exists() {
let expected_errors = fs::read_to_string(&err_path)
.unwrap_or_else(|_| panic!("Failed to read error file {:?}", err_path.display()));
let actual_errors = errors
.iter()
.map(|error| format!("{:?}", error))
.collect::<Vec<_>>()
.join("\n");
assert_eq!(actual_errors, expected_errors);
} else {
assert!(errors.is_empty(), "Unexpected errors: {:?}", errors);
}
}
}
28 changes: 17 additions & 11 deletions src/parser/grammar/expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,25 +45,34 @@ fn condition(p: &mut Parser) {
m.complete(p, CONDITION);
}

const VARIABLE_RECOVERY_SET: TokenSet = TokenSet::new(&[VARIABLE]);

pub(super) fn strings_body(p: &mut Parser) {
// add support for meta also
while !p.at(EOF) && !p.at(STRINGS) && !p.at(CONDITION) && !p.at(RBRACE) {
assert!(p.at(VARIABLE));
let m = p.start();
p.bump(VARIABLE);
if p.at(VARIABLE) {
let m = p.start();
p.bump(VARIABLE);
m.complete(p, VARIABLE);
} else {
p.err_recover("expected a variable", VARIABLE_RECOVERY_SET);
}
p.expect(ASSIGN);
// so far only strings are supported, later add match for hex strings and regex
string(p);
m.complete(p, VARIABLE);
m.complete(p, VARIABLE_STMT);
}
}

// do the same for hex and regex strings
// add support for hex and regex strings later on
fn string(p: &mut Parser) {
assert!(p.at(STRING));
let m = p.start();
p.bump(STRING);
// add plain string modifiers
match p.current() {
STRING => p.bump(STRING),
_ => p.err_and_bump("expected a string"),
}
// add string modifiers
m.complete(p, STRING);
}

Expand Down Expand Up @@ -96,10 +105,7 @@ fn current_op(p: &mut Parser) -> (u8, SyntaxKind, Associativity) {
fn expression(p: &mut Parser, m: Option<Marker>, bp: u8) -> Option<CompletedMarker> {
let m = m.unwrap_or_else(|| p.start());
let mut lhs = match lhs(p) {
Some(lhs) => {
let lhs = lhs.extend_to(p, m);
lhs
}
Some(lhs) => lhs.extend_to(p, m),
None => {
m.abandon(p);
return None;
Expand Down
14 changes: 12 additions & 2 deletions src/parser/grammar/expressions/atom.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,21 @@ pub(crate) fn literal(p: &mut Parser) -> Option<CompletedMarker> {
Some(m.complete(p, LITERAL))
}

const EXPR_RECOVERY_SET: TokenSet = TokenSet::new(&[VARIABLE, TRUE, FALSE, AND, OR, NOT]);

// add support for while/for loops, if/else statements, etc.
pub(super) fn atom_expr(p: &mut Parser) -> Option<CompletedMarker> {
if let Some(m) = literal(p) {
return Some(m);
} else {
todo!("add support for other atoms")
}

// This will be extended to support more expressions later
#[allow(clippy::match_single_binding)]
match p.current() {
_ => {
p.err_recover("expected expression", EXPR_RECOVERY_SET);
#[allow(clippy::needless_return)]
return None;
}
};
}
2 changes: 1 addition & 1 deletion src/parser/grammar/items.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ pub(super) const RULE_RECOVERY_SET: TokenSet = TokenSet::new(
);

pub(super) fn mod_content(p: &mut Parser, stop_on_r_brace: bool) {
while !p.at(EOF) && !(p.at(RBRACE) && stop_on_r_brace) {
while !(p.at(EOF) || p.at(RBRACE) && stop_on_r_brace) {
process_top_level(p, stop_on_r_brace);
}
}
Expand Down
3 changes: 2 additions & 1 deletion src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@ pub mod syntaxkind;
pub use syntaxkind::SyntaxKind;
mod event;
mod grammar;
#[allow(clippy::module_inception)]
mod parser;
mod token_set;

use grammar::parse_source_file;

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct ParseError(pub Box<String>);
pub struct ParseError(pub String);

pub trait TokenSource {
fn current(&self) -> Token;
Expand Down
11 changes: 2 additions & 9 deletions src/parser/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ impl<'t> Parser<'t> {
}

pub(crate) fn error<T: Into<String>>(&mut self, message: T) {
let msg = ParseError(Box::new(message.into()));
let msg = ParseError(message.into());
self.push_event(Event::Error { msg });
}

Expand All @@ -111,15 +111,8 @@ impl<'t> Parser<'t> {
}

pub(crate) fn err_recover(&mut self, message: &str, recovery: TokenSet) {
match self.current() {
LBRACE | RBRACE => {
self.error(message);
return;
}
_ => (),
}

if self.at_ts(recovery) {
println!("recovery: {:?}", self.current());
self.error(message);
return;
}
Expand Down
3 changes: 3 additions & 0 deletions src/parser/syntaxkind.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#![allow(clippy::upper_case_acronyms)]

#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
#[repr(u16)]
pub enum SyntaxKind {
Expand Down Expand Up @@ -31,6 +33,7 @@ pub enum SyntaxKind {
LITERAL,
EXPRESSION,
EXPRESSION_STMT,
VARIABLE_STMT,
__LAST,
}

Expand Down
2 changes: 1 addition & 1 deletion src/syntax/syntax_node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,6 @@ impl SyntaxTreeBuilder {

pub fn error(&mut self, error: parser::ParseError, text_pos: TextSize) {
self.errors
.push(SyntaxError::new_at_offset(*error.0, text_pos))
.push(SyntaxError::new_at_offset(error.0, text_pos))
}
}
4 changes: 2 additions & 2 deletions src/syntax/text_tree_sink.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,9 +148,9 @@ fn n_attached_trivias<'a>(
match kind {
SyntaxKind::RULE | SyntaxKind::BLOCK_EXPR | SyntaxKind::STRINGS | SyntaxKind::CONDITION => {
let mut res = 0;
let mut trivias = trivias.enumerate().peekable();
let trivias = trivias.enumerate().peekable();

while let Some((i, (kind, text))) = trivias.next() {
for (i, (kind, text)) in trivias {
match kind {
SyntaxKind::WHITESPACE if text.contains("\n\n") => {
break;
Expand Down
7 changes: 7 additions & 0 deletions tests/test1.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
rule test
{
strings:
$a = "foo"
condition:
$a
}
33 changes: 33 additions & 0 deletions tests/test1.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
- SOURCE_FILE
- RULE
- "rule" RULE
- " " WHITESPACE
- IDENTIFIER
- "test" IDENTIFIER
- "\n" WHITESPACE
- BLOCK_EXPR
- "{" LBRACE
- "\n\t" WHITESPACE
- STRINGS
- "strings" STRINGS
- ":" COLON
- "\n\t\t" WHITESPACE
- VARIABLE_STMT
- VARIABLE
- "$a" VARIABLE
- " " WHITESPACE
- "=" ASSIGN
- " " WHITESPACE
- STRING
- "\"foo\"" STRING
- "\n\t" WHITESPACE
- CONDITION
- "condition" CONDITION
- ":" COLON
- "\n\t\t" WHITESPACE
- EXPRESSION_STMT
- LITERAL
- "$a" VARIABLE
- "\n" WHITESPACE
- "}" RBRACE
- "\n" WHITESPACE
9 changes: 9 additions & 0 deletions tests/test2.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
rule test
{
strings:
$a = "foo"
$b = "bar"
condition:
$a or
$b
}
48 changes: 48 additions & 0 deletions tests/test2.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
- SOURCE_FILE
- RULE
- "rule" RULE
- " " WHITESPACE
- IDENTIFIER
- "test" IDENTIFIER
- "\n" WHITESPACE
- BLOCK_EXPR
- "{" LBRACE
- "\n\t" WHITESPACE
- STRINGS
- "strings" STRINGS
- ":" COLON
- "\n\t\t" WHITESPACE
- VARIABLE_STMT
- VARIABLE
- "$a" VARIABLE
- " " WHITESPACE
- "=" ASSIGN
- " " WHITESPACE
- STRING
- "\"foo\"" STRING
- "\n\t\t" WHITESPACE
- VARIABLE_STMT
- VARIABLE
- "$b" VARIABLE
- " " WHITESPACE
- "=" ASSIGN
- " " WHITESPACE
- STRING
- "\"bar\"" STRING
- "\n\t" WHITESPACE
- CONDITION
- "condition" CONDITION
- ":" COLON
- "\n\t\t" WHITESPACE
- EXPRESSION_STMT
- EXPRESSION
- LITERAL
- "$a" VARIABLE
- " " WHITESPACE
- "or" OR
- "\n\t\t" WHITESPACE
- LITERAL
- "$b" VARIABLE
- "\n" WHITESPACE
- "}" RBRACE
- "\n" WHITESPACE
15 changes: 15 additions & 0 deletions tests/test3.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
//Global comment

//Rule comment
rule test
{
//Rule block comment

//String comment
strings:
$a = "foo"
$b = "bar"
condition:
$a or
$b and true
}
Loading

0 comments on commit 5fc4cff

Please sign in to comment.