Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Subgrammars: prototype design for #25 (re-usable parsers) #181

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions peg-tests/build.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
extern crate peg;

fn main() {
peg::cargo_build("src/test_subgrammar.rustpeg");
peg::cargo_build("src/test_subgrammar_with_args.rustpeg");
peg::cargo_build("src/test_grammar.rustpeg");
}
9 changes: 9 additions & 0 deletions peg-tests/src/test_grammar.rustpeg
Original file line number Diff line number Diff line change
Expand Up @@ -119,3 +119,12 @@ issue152 -> i32 // a
}

pub error_pos = ("a" / "\n" / "\r")*

import test_subgrammar

pub subgrammar_rule = test_subgrammar::only_rule

import test_subgrammar_with_args through { 5 }

pub subgrammar_with_args_rule = test_subgrammar_with_args::repeater

3 changes: 3 additions & 0 deletions peg-tests/src/test_subgrammar.rustpeg
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
shared only_rule -> ()
= "this is the only subgrammar rule"

4 changes: 4 additions & 0 deletions peg-tests/src/test_subgrammar_with_args.rustpeg
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#![arguments(my_arg: usize)]

shared repeater -> ()
= "a"*<{my_arg}>
20 changes: 20 additions & 0 deletions peg-tests/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,14 @@ mod test_grammar {
include!(concat!(env!("OUT_DIR"), "/test_grammar.rs"));
}

mod test_subgrammar {
include!(concat!(env!("OUT_DIR"), "/test_subgrammar.rs"));
}

mod test_subgrammar_with_args {
include!(concat!(env!("OUT_DIR"), "/test_subgrammar_with_args.rs"));
}

use self::test_grammar::*;

#[test]
Expand Down Expand Up @@ -131,6 +139,16 @@ fn test_renamed_imports() {
assert_eq!(renamed_imports("").unwrap(), (42, 42));
}

#[test]
fn test_subgrammar() {
assert!(subgrammar_rule("this is the only subgrammar rule").is_ok())
}

#[test]
fn test_subgrammar_with_args() {
assert!(subgrammar_with_args_rule("aaaaa").is_ok())
}

#[test]
fn test_neg_lookahead_err() {
let err = neg_lookahead_err("ac").err().unwrap();
Expand Down Expand Up @@ -186,3 +204,5 @@ fn test_error_pos() {
assert_eq!(err.line, 3);
assert_eq!(err.column, 4);
}


2 changes: 1 addition & 1 deletion src/grammar.rs

Large diffs are not rendered by default.

18 changes: 15 additions & 3 deletions src/grammar.rustpeg
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,19 @@ spanned<inner>
pub items -> Vec<Spanned<Item>> = _ items:spanned<item>**_ _ { items }

rule -> Rule
= legacy_exported:exportflag _ cached:cacheflag _ public:pubflag _ name:identifier _ returns:returntype _ "=" _ expression:expression (_ ";")? {
= legacy_exported:exportflag _ cached:cacheflag _ public:pubflag _ is_shared:shareflag _ name:identifier _ returns:returntype _ "=" _ expression:expression (_ ";")? {
Rule{
name: name,
expr: Box::new(expression),
ret_type: returns,
exported: public || legacy_exported,
shared: is_shared,
cached: cached
}
}

pubflag -> bool = PUB { true } / { false }
shareflag -> bool = SHARED { true } / { false }
exportflag -> bool = #quiet<("#[export]" / "#[pub]") {true} / {false}>
cacheflag -> bool = "#[cache]" {true} / {false}

Expand All @@ -30,10 +32,14 @@ template -> Template
Template { name: name, params: params, expr: Box::new(expression) }
}

subgrammar -> Subgrammar
= IMPORT _ grammar_name:identifier converter:( _ THROUGH _ "{" _ code:rust_expr _ "}" { code })? { Subgrammar { name: grammar_name, converter: converter } }

item -> Item
= u:rust_use { Item::Use(u) }
/ r:rule { Item::Rule(r) }
/ t:template { Item::Template(t) }
/ s:subgrammar { Item::Subgrammar(s) }
/ grammar_args

grammar_args -> Item
Expand Down Expand Up @@ -148,9 +154,12 @@ repeatnum -> String
#[cache]
primary -> Spanned<Expr>
= spanned<
name:identifier !(_ ("<" / "->" / "=")) {
name:identifier !(_ ("<" / "->" / "=" / "::")) {
RuleExpr(name)
}
/ grammar_name:identifier "::" rule_name:identifier {
SubgrammarRuleExpr(grammar_name, rule_name)
}
/ name:identifier _ "<" _ args:COMMASEP<expression> _ ">" !( _ "=") {
TemplateInvoke(name, args)
}
Expand Down Expand Up @@ -189,8 +198,11 @@ nonBraceCharacters = [^{}]+
KEYWORD<k> = k !([a-zA-Z0-9_])
USE = KEYWORD<"use">
PUB = KEYWORD<"pub">
SHARED = KEYWORD<"shared">
IMPORT = KEYWORD<"import">
THROUGH = KEYWORD<"through">

keyword = USE / PUB
keyword = USE / PUB / SHARED / IMPORT / THROUGH

integer -> usize
= i:$([0-9]+) { i.parse().unwrap() }
Expand Down
101 changes: 91 additions & 10 deletions src/translate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ fn raw(s: &str) -> TokenStream {

pub(crate) struct Grammar {
pub imports: Vec<String>,
pub subgrammars : Vec<Subgrammar>,
pub rules: Vec<Rule>,
pub templates: HashMap<String, Template>,
pub args: Vec<(String, String)>,
Expand All @@ -19,6 +20,7 @@ pub(crate) struct Grammar {
impl Grammar {
pub fn from_ast(compiler: &mut PegCompiler, items: Vec<Spanned<Item>>) -> Result<Grammar, ()> {
let mut imports = Vec::new();
let mut subgrammars : Vec<Subgrammar> = Vec::new();
let mut rules: Vec<Rule> = Vec::new();
let mut templates = HashMap::new();

Expand Down Expand Up @@ -47,6 +49,17 @@ impl Grammar {
)
}
}
Item::Subgrammar(subgrammar) => {
if subgrammars.iter().any(|imported_grammar| imported_grammar.name == subgrammar.name) {
compiler.span_error(
format!("Subgrammar `{}` imported multiple times", subgrammar.name),
item.span,
Some("duplicate import".to_owned())
)
}

subgrammars.push(subgrammar);
}
Item::GrammarArgs(args) => {
if grammar_args.is_none() {
grammar_args = Some(args);
Expand All @@ -61,7 +74,7 @@ impl Grammar {
}
}

Ok(Grammar{ imports:imports, rules:rules, templates:templates, args: grammar_args.unwrap_or(vec![]) })
Ok(Grammar{ imports:imports, subgrammars:subgrammars, rules:rules, templates:templates, args: grammar_args.unwrap_or(vec![]) })
}

fn find_rule(&self, name: &str) -> Option<&Rule> {
Expand Down Expand Up @@ -90,6 +103,7 @@ pub enum Item {
Use(String),
Rule(Rule),
Template(Template),
Subgrammar(Subgrammar),
GrammarArgs(Vec<(String, String)>)
}

Expand All @@ -98,6 +112,7 @@ pub struct Rule {
pub expr: Box<Spanned<Expr>>,
pub ret_type: String,
pub exported: bool,
pub shared: bool,
pub cached: bool,
}

Expand All @@ -107,6 +122,11 @@ pub struct Template {
pub expr: Box<Spanned<Expr>>,
}

pub struct Subgrammar {
pub name: String,
pub converter: Option<String>,
}

#[derive(Clone)]
pub struct CharSetCase {
pub start: char,
Expand All @@ -125,6 +145,7 @@ pub enum Expr {
LiteralExpr(String,bool),
CharSetExpr(bool, Vec<CharSetCase>),
RuleExpr(String),
SubgrammarRuleExpr(String, String),
SequenceExpr(Vec<Spanned<Expr>>),
ChoiceExpr(Vec<Spanned<Expr>>),
OptionalExpr(Box<Spanned<Expr>>),
Expand Down Expand Up @@ -178,7 +199,7 @@ static HELPERS: &'static str = stringify! {
}

#[derive(Clone)]
enum RuleResult<T> {
pub enum RuleResult<T> {
Matched(usize, T),
Failed,
}
Expand Down Expand Up @@ -326,25 +347,40 @@ fn make_parse_state(rules: &[Rule]) -> TokenStream {
}
}

let cache_field_new = cache_fields.iter();
let cache_field_from = cache_fields.iter();

quote! {
struct ParseState<'input> {
max_err_pos: usize,
suppress_fail: usize,
reparsing_on_error: bool,
expected: ::std::collections::HashSet<&'static str>,
pub(crate) struct ParseState<'input> {
pub max_err_pos: usize,
pub suppress_fail: usize,
pub reparsing_on_error: bool,
pub expected: ::std::collections::HashSet<&'static str>,
_phantom: ::std::marker::PhantomData<&'input ()>,
#(#cache_fields_def),*
}

impl<'input> ParseState<'input> {
pub fn from(max_err_pos: usize, suppress_fail: usize, reparsing_on_error: bool, expected: &::std::collections::HashSet<&'static str>) -> ParseState<'input> {
#![allow(unused)]
ParseState {
max_err_pos: max_err_pos,
suppress_fail: suppress_fail,
reparsing_on_error: reparsing_on_error,
expected: expected.clone(),
_phantom: ::std::marker::PhantomData,
#(#cache_field_from: ::std::collections::HashMap::new()),*
}
}
fn new() -> ParseState<'input> {
#![allow(unused)]
ParseState {
max_err_pos: 0,
suppress_fail: 0,
reparsing_on_error: false,
expected: ::std::collections::HashSet::new(),
_phantom: ::std::marker::PhantomData,
#(#cache_fields: ::std::collections::HashMap::new()),*
#(#cache_field_new: ::std::collections::HashMap::new()),*
}
}
}
Expand Down Expand Up @@ -382,6 +418,11 @@ fn compile_rule(compiler: &mut PegCompiler, grammar: &Grammar, rule: &Rule) -> T

let nl = raw("\n\n"); // make output slightly more readable
let extra_args_def = grammar.extra_args_def();
let shared = if rule.shared {
raw("pub(crate)")
} else {
raw("")
};

if rule.cached {
let cache_field = Ident::new(&format!("{}_cache", rule.name), Span::call_site());
Expand All @@ -399,7 +440,7 @@ fn compile_rule(compiler: &mut PegCompiler, grammar: &Grammar, rule: &Rule) -> T
};

quote! { #nl
fn #name<'input>(__input: &'input str, __state: &mut ParseState<'input>, __pos: usize #extra_args_def) -> RuleResult<#ret_ty> {
#shared fn #name<'input>(__input: &'input str, __state: &mut ParseState<'input>, __pos: usize #extra_args_def) -> RuleResult<#ret_ty> {
#![allow(non_snake_case, unused)]
if let Some(entry) = __state.#cache_field.get(&__pos) {
#cache_trace
Expand All @@ -412,7 +453,7 @@ fn compile_rule(compiler: &mut PegCompiler, grammar: &Grammar, rule: &Rule) -> T
}
} else {
quote! { #nl
fn #name<'input>(__input: &'input str, __state: &mut ParseState<'input>, __pos: usize #extra_args_def) -> RuleResult<#ret_ty> {
#shared fn #name<'input>(__input: &'input str, __state: &mut ParseState<'input>, __pos: usize #extra_args_def) -> RuleResult<#ret_ty> {
#![allow(non_snake_case, unused)]
#wrapped_body
}
Expand Down Expand Up @@ -608,6 +649,46 @@ fn compile_expr(compiler: &mut PegCompiler, cx: Context, e: &Spanned<Expr>) -> T
}
}

SubgrammarRuleExpr(ref subgrammar_name, ref rule_name) => {
if let Some(ref subgrammar) = cx.grammar.subgrammars.iter().find(|ref sg| &sg.name == subgrammar_name) {
let func = raw(&format!("super::{}::__parse_{}", subgrammar_name, rule_name));
let subgrammar_module = raw(subgrammar_name);
let converter = subgrammar.converter.as_ref()
.map(|code| raw(&format!(", {}", code)))
.unwrap_or(raw(""));

let subgrammar_path = quote!{ super::#subgrammar_module };

let state_conversion =
quote! {
&mut #subgrammar_path::ParseState::from(
__state.max_err_pos,
__state.suppress_fail,
__state.reparsing_on_error,
&__state.expected
)
};

if cx.result_used {
quote!{ #func(__input, #state_conversion, __pos #converter) }
} else {
quote!{
match #func(__input, #state_conversion, __pos #converter) {
#subgrammar_path::RuleResult::Matched(pos, _) => Matched(pos, ()),
#subgrammar_path::RuleResult::Failed => Failed,
}
}
}
} else {
compiler.span_error(
format!("No subgrammar named `{}` found", subgrammar_name),
e.span,
Some("subgrammar not found".to_owned())
);
quote!()
}
}

TemplateInvoke(ref name, ref params) => {
let template = match cx.grammar.templates.get(&name[..]) {
Some(x) => x,
Expand Down