From c825a3694f38b87338248d7168bd402040bb95d7 Mon Sep 17 00:00:00 2001 From: Jason Lee Date: Thu, 5 Jan 2023 00:27:05 +0800 Subject: [PATCH] Add to support load multiple grammars for derive generator. Resolve #197 Example: ```rust #[derive(Parser)] #[grammar = "base.pest"] #[grammar = "json.pest"] pub struct JSONParser; ``` For supports sharing rules between grammars. --- derive/examples/base.pest | 2 + derive/examples/calc.pest | 5 +- derive/examples/calc.rs | 1 + generator/src/lib.rs | 122 ++++++++++++++++++++++++-------------- 4 files changed, 81 insertions(+), 49 deletions(-) create mode 100644 derive/examples/base.pest diff --git a/derive/examples/base.pest b/derive/examples/base.pest new file mode 100644 index 00000000..c63d48af --- /dev/null +++ b/derive/examples/base.pest @@ -0,0 +1,2 @@ +WHITESPACE = _{ " " | "\t" | NEWLINE } +int = @{ (ASCII_NONZERO_DIGIT ~ ASCII_DIGIT+ | ASCII_DIGIT) } \ No newline at end of file diff --git a/derive/examples/calc.pest b/derive/examples/calc.pest index 9f2cc3b7..38349739 100644 --- a/derive/examples/calc.pest +++ b/derive/examples/calc.pest @@ -1,5 +1,3 @@ -WHITESPACE = _{ " " | "\t" | NEWLINE } - program = { SOI ~ expr ~ EOI } expr = { prefix* ~ primary ~ postfix* ~ (infix ~ prefix* ~ primary ~ postfix* )* } infix = _{ add | sub | mul | div | pow } @@ -12,5 +10,4 @@ WHITESPACE = _{ " " | "\t" | NEWLINE } neg = { "-" } // Negation postfix = _{ fac } fac = { "!" } // Factorial - primary = _{ int | "(" ~ expr ~ ")" } - int = @{ (ASCII_NONZERO_DIGIT ~ ASCII_DIGIT+ | ASCII_DIGIT) } \ No newline at end of file + primary = _{ int | "(" ~ expr ~ ")" } \ No newline at end of file diff --git a/derive/examples/calc.rs b/derive/examples/calc.rs index efc6b7b2..70716253 100644 --- a/derive/examples/calc.rs +++ b/derive/examples/calc.rs @@ -2,6 +2,7 @@ mod parser { use pest_derive::Parser; #[derive(Parser)] + #[grammar = "../examples/base.pest"] #[grammar = "../examples/calc.pest"] pub struct Parser; } diff --git a/generator/src/lib.rs b/generator/src/lib.rs index 2a1203e4..938bd168 100644 --- a/generator/src/lib.rs +++ b/generator/src/lib.rs @@ -41,39 +41,49 @@ use pest_meta::{optimizer, unwrap_or_report, validator}; /// "include_str" statement (done in pest_derive, but turned off in the local bootstrap). pub fn derive_parser(input: TokenStream, include_grammar: bool) -> TokenStream { let ast: DeriveInput = syn::parse2(input).unwrap(); - let (name, generics, content) = parse_derive(ast); - - let (data, path) = match content { - GrammarSource::File(ref path) => { - let root = env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| ".".into()); - - // Check whether we can find a file at the path relative to the CARGO_MANIFEST_DIR - // first. - // - // If we cannot find the expected file over there, fallback to the - // `CARGO_MANIFEST_DIR/src`, which is the old default and kept for convenience - // reasons. - // TODO: This could be refactored once `std::path::absolute()` get's stabilized. - // https://doc.rust-lang.org/std/path/fn.absolute.html - let path = if Path::new(&root).join(path).exists() { - Path::new(&root).join(path) - } else { - Path::new(&root).join("src/").join(path) - }; - - let file_name = match path.file_name() { - Some(file_name) => file_name, - None => panic!("grammar attribute should point to a file"), - }; - - let data = match read_file(&path) { - Ok(data) => data, - Err(error) => panic!("error opening {:?}: {}", file_name, error), - }; - (data, Some(path.clone())) + let (name, generics, contents) = parse_derive(ast); + + let mut data = String::new(); + let mut path = None; + + for content in contents { + let (_data, _path) = match content { + GrammarSource::File(ref path) => { + let root = env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| ".".into()); + + // Check whether we can find a file at the path relative to the CARGO_MANIFEST_DIR + // first. + // + // If we cannot find the expected file over there, fallback to the + // `CARGO_MANIFEST_DIR/src`, which is the old default and kept for convenience + // reasons. + // TODO: This could be refactored once `std::path::absolute()` get's stabilized. + // https://doc.rust-lang.org/std/path/fn.absolute.html + let path = if Path::new(&root).join(path).exists() { + Path::new(&root).join(path) + } else { + Path::new(&root).join("src/").join(path) + }; + + let file_name = match path.file_name() { + Some(file_name) => file_name, + None => panic!("grammar attribute should point to a file"), + }; + + let data = match read_file(&path) { + Ok(data) => data, + Err(error) => panic!("error opening {:?}: {}", file_name, error), + }; + (data, Some(path.clone())) + } + GrammarSource::Inline(content) => (content, None), + }; + + data.push_str(&_data); + if _path.is_some() { + path = _path; } - GrammarSource::Inline(content) => (content, None), - }; + } let pairs = match parser::parse(Rule::grammar_rules, &data) { Ok(pairs) => pairs, @@ -100,7 +110,7 @@ enum GrammarSource { Inline(String), } -fn parse_derive(ast: DeriveInput) -> (Ident, Generics, GrammarSource) { +fn parse_derive(ast: DeriveInput) -> (Ident, Generics, Vec) { let name = ast.ident; let generics = ast.generics; @@ -115,13 +125,16 @@ fn parse_derive(ast: DeriveInput) -> (Ident, Generics, GrammarSource) { }) .collect(); - let argument = match grammar.len() { - 0 => panic!("a grammar file needs to be provided with the #[grammar = \"PATH\"] or #[grammar_inline = \"GRAMMAR CONTENTS\"] attribute"), - 1 => get_attribute(grammar[0]), - _ => panic!("only 1 grammar file can be provided"), - }; + if grammar.is_empty() { + panic!("a grammar file needs to be provided with the #[grammar = \"PATH\"] or #[grammar_inline = \"GRAMMAR CONTENTS\"] attribute"); + } - (name, generics, argument) + let mut grammar_sources = Vec::with_capacity(grammar.len()); + for attr in grammar { + grammar_sources.push(get_attribute(attr)) + } + + (name, generics, grammar_sources) } fn get_attribute(attr: &Attribute) -> GrammarSource { @@ -153,8 +166,8 @@ mod tests { pub struct MyParser<'a, T>; "; let ast = syn::parse_str(definition).unwrap(); - let (_, _, filename) = parse_derive(ast); - assert_eq!(filename, GrammarSource::Inline("GRAMMAR".to_string())); + let (_, _, filenames) = parse_derive(ast); + assert_eq!(filenames, [GrammarSource::Inline("GRAMMAR".to_string())]); } #[test] @@ -165,12 +178,11 @@ mod tests { pub struct MyParser<'a, T>; "; let ast = syn::parse_str(definition).unwrap(); - let (_, _, filename) = parse_derive(ast); - assert_eq!(filename, GrammarSource::File("myfile.pest".to_string())); + let (_, _, filenames) = parse_derive(ast); + assert_eq!(filenames, [GrammarSource::File("myfile.pest".to_string())]); } #[test] - #[should_panic(expected = "only 1 grammar file can be provided")] fn derive_multiple_grammars() { let definition = " #[other_attr] @@ -179,7 +191,14 @@ mod tests { pub struct MyParser<'a, T>; "; let ast = syn::parse_str(definition).unwrap(); - parse_derive(ast); + let (_, _, filenames) = parse_derive(ast); + assert_eq!( + filenames, + [ + GrammarSource::File("myfile1.pest".to_string()), + GrammarSource::File("myfile2.pest".to_string()) + ] + ); } #[test] @@ -193,4 +212,17 @@ mod tests { let ast = syn::parse_str(definition).unwrap(); parse_derive(ast); } + + #[test] + #[should_panic( + expected = "a grammar file needs to be provided with the #[grammar = \"PATH\"] or #[grammar_inline = \"GRAMMAR CONTENTS\"] attribute" + )] + fn derive_no_grammar() { + let definition = " + #[other_attr] + pub struct MyParser<'a, T>; + "; + let ast = syn::parse_str(definition).unwrap(); + parse_derive(ast); + } }