Skip to content

Commit

Permalink
Introduce the slow_bruteforce_interpreter.
Browse files Browse the repository at this point in the history
  • Loading branch information
eddyb committed Jul 23, 2019
1 parent 7e572f3 commit 313de37
Show file tree
Hide file tree
Showing 11 changed files with 1,333 additions and 130 deletions.
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,11 @@ indexmap = "1"
indexing = "0.3.2"
proc-macro2 = "0.4.30"
elsa = "1.3.2"
cyclotron = "0.0.1"

[lib]
doctest = false
test = false

[patch.'crates-io']
cyclotron = { git = "https://github.com/LykenSol/cyclotron", rev = "8179a9ee6ddd313f69a4dc50a49a6e92c44c1a54" }
155 changes: 142 additions & 13 deletions src/forest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ use std::collections::{BTreeSet, HashMap, VecDeque};
use std::fmt;
use std::hash::Hash;
use std::io::{self, Write};
use std::iter;
use std::rc::Rc;
use std::str;

#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
Expand Down Expand Up @@ -51,6 +53,10 @@ pub trait GrammarReflector {

fn node_shape(&self, kind: Self::NodeKind) -> NodeShape<Self::NodeKind>;
fn node_desc(&self, kind: Self::NodeKind) -> String;

fn choice_by_index(&self, _kind: Self::NodeKind, _i: usize) -> Option<Self::NodeKind> {
None
}
}

#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
Expand All @@ -72,25 +78,25 @@ impl<P: fmt::Debug> fmt::Debug for Node<'_, P> {
}

/// A parse forest, in SPPF (Shared Packed Parse Forest) representation.
pub struct ParseForest<'i, G: GrammarReflector, I: Input> {
pub struct ParseForest<'i, P, G, I: Input> {
pub grammar: G,
// HACK(eddyb) `pub(crate)` only for `parser`.
pub(crate) input: Container<'i, I::Container>,
pub(crate) possible_choices: HashMap<Node<'i, G::NodeKind>, BTreeSet<G::NodeKind>>,
pub(crate) possible_splits: HashMap<Node<'i, G::NodeKind>, BTreeSet<usize>>,
pub(crate) possible_choices: HashMap<Node<'i, P>, BTreeSet<P>>,
pub(crate) possible_splits: HashMap<Node<'i, P>, BTreeSet<usize>>,
}

type_lambda! {
pub type<'i> ParseForestL<G: GrammarReflector, I: Input> = ParseForest<'i, G, I>;
pub type<'i> ParseForestL<P, G, I: Input> = ParseForest<'i, P, G, I>;
pub type<'i> NodeL<P> = Node<'i, P>;
}

pub type OwnedParseForestAndNode<G, P, I> = ExistsL<PairL<ParseForestL<G, I>, NodeL<P>>>;
pub type OwnedParseForestAndNode<G, P, I> = ExistsL<PairL<ParseForestL<P, G, I>, NodeL<P>>>;

#[derive(Debug)]
pub struct MoreThanOne;

impl<'i, P, G, I: Input> ParseForest<'i, G, I>
impl<'i, P, G, I: Input> ParseForest<'i, P, G, I>
where
// FIXME(eddyb) these shouldn't be needed, as they are bounds on
// `GrammarReflector::NodeKind`, but that's ignored currently.
Expand Down Expand Up @@ -225,14 +231,17 @@ where
}
}

pub fn dump_graphviz(&self, out: &mut dyn Write) -> io::Result<()> {
pub fn dump_graphviz(&self, root: Option<Node<'i, P>>, out: &mut dyn Write) -> io::Result<()> {
writeln!(out, "digraph forest {{")?;
let mut queue: VecDeque<_> = self
.possible_choices
.keys()
.chain(self.possible_splits.keys())
.cloned()
.collect();
let mut queue: VecDeque<_> = match root {
Some(root) => iter::once(root).collect(),
None => self
.possible_choices
.keys()
.chain(self.possible_splits.keys())
.cloned()
.collect(),
};
let mut seen: BTreeSet<_> = queue.iter().cloned().collect();
let mut p = 0;
let node_name = |Node { kind, range }| {
Expand Down Expand Up @@ -293,6 +302,126 @@ where
}
}

/// Inefficient expansion of a forest, for use when shapes are not statically known.
// TODO(eddyb) cache these `Rc`s, or maybe even use something better?
#[derive(Clone, Debug)]
pub struct DynExpandedTree<'i, P> {
pub node: Node<'i, P>,
pub kind: DynExpandedTreeKind<'i, P>,
}

#[derive(Clone, Debug)]
pub enum DynExpandedTreeKind<'i, P> {
Leaf,
Or(P, Rc<DynExpandedTree<'i, P>>),
Opt(Option<Rc<DynExpandedTree<'i, P>>>),
Concat([Rc<DynExpandedTree<'i, P>>; 2]),
}

impl<'i, P> DynExpandedTree<'i, P>
// FIXME(eddyb) these shouldn't be needed, as they are bounds on
// `GrammarReflector::NodeKind`, but that's ignored currently.
where
P: fmt::Debug + Ord + Hash + Copy,
{
pub fn one_from_node<G, I>(
forest: &ParseForest<'i, P, G, I>,
node: Node<'i, P>,
) -> Result<Rc<Self>, MoreThanOne>
where
G: GrammarReflector<NodeKind = P>,
I: Input,
{
let kind = match forest.grammar.node_shape(node.kind) {
NodeShape::Opaque | NodeShape::Alias(_) => DynExpandedTreeKind::Leaf,
NodeShape::Choice => {
let child = forest.one_choice(node)?;
DynExpandedTreeKind::Or(child.kind, Self::one_from_node(forest, child)?)
}
NodeShape::Opt(_) => DynExpandedTreeKind::Opt(match forest.unpack_opt(node) {
Some(child) => Some(Self::one_from_node(forest, child)?),
None => None,
}),
NodeShape::Split(..) => {
let (left, right) = forest.one_split(node)?;
DynExpandedTreeKind::Concat([
Self::one_from_node(forest, left)?,
Self::one_from_node(forest, right)?,
])
}
};
Ok(Rc::new(DynExpandedTree { node, kind }))
}

pub fn all_from_node<G, I>(
forest: &ParseForest<'i, P, G, I>,
node: Node<'i, P>,
) -> Vec<Rc<Self>>
where
G: GrammarReflector<NodeKind = P>,
I: Input,
{
let new = |kind| Rc::new(DynExpandedTree { node, kind });
match forest.grammar.node_shape(node.kind) {
NodeShape::Opaque | NodeShape::Alias(_) => vec![new(DynExpandedTreeKind::Leaf)],
NodeShape::Choice => forest
.all_choices(node)
.flat_map(|child| {
Self::all_from_node(forest, child)
.into_iter()
.map(move |child_tree| new(DynExpandedTreeKind::Or(child.kind, child_tree)))
})
.collect(),
NodeShape::Opt(_) => match forest.unpack_opt(node) {
Some(child) => Self::all_from_node(forest, child)
.into_iter()
.map(|child_tree| new(DynExpandedTreeKind::Opt(Some(child_tree))))
.collect(),
None => vec![new(DynExpandedTreeKind::Opt(None))],
},
NodeShape::Split(..) => forest
.all_splits(node)
.flat_map(|(left, right)| {
Self::all_from_node(forest, left)
.into_iter()
.flat_map(move |left_tree| {
Self::all_from_node(forest, right)
.into_iter()
.map(move |right_tree| {
new(DynExpandedTreeKind::Concat([
left_tree.clone(),
right_tree,
]))
})
})
})
.collect(),
}
}

pub fn get<G, I>(&self, forest: &ParseForest<'i, P, G, I>, i: usize) -> Option<Rc<Self>>
where
G: GrammarReflector<NodeKind = P>,
I: Input,
{
match &self.kind {
DynExpandedTreeKind::Leaf => unreachable!(),
DynExpandedTreeKind::Or(child, child_tree) => {
if forest.grammar.choice_by_index(self.node.kind, i).unwrap() == *child {
Some(child_tree.clone())
} else {
None
}
}
DynExpandedTreeKind::Opt(child) => {
assert_eq!(i, 0);
child.clone()
}
DynExpandedTreeKind::Concat(children) => Some(children[i].clone()),
}
}
}

// FIXME(rust-lang/rust#54175) work around iterator adapter compile-time
// blowup issues by using a makeshift "non-determinism arrow toolkit".
pub mod nd {
Expand Down
102 changes: 4 additions & 98 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,17 @@ pub mod forest;
#[forbid(unsafe_code)]
pub mod input;
#[forbid(unsafe_code)]
pub mod lyg;
#[forbid(unsafe_code)]
pub mod parser;
#[forbid(unsafe_code)]
pub mod proc_macro;
#[forbid(unsafe_code)]
pub mod rule;
#[forbid(unsafe_code)]
pub mod scannerless;
#[forbid(unsafe_code)]
pub mod slow_bruteforce_interpreter;

// HACK(eddyb) this contains impls for types in `proc_macro`, which depend on
// `input`, collapse this back into `proc_macro`.
Expand Down Expand Up @@ -78,101 +82,3 @@ impl Grammar {
}
}
}

/// Construct a (meta-)grammar for parsing a grammar.
pub fn grammar_grammar<Pat: Eq + Hash + From<&'static str>>(cx: &Context<Pat>) -> Grammar {
use crate::rule::*;

// HACK(eddyb) more explicit subset of the grammar, for bootstrapping.
macro_rules! rule {
({ $start:tt ..= $end:tt }) => {
eat($start..=$end)
};
({ ! $pat:tt }) => {
negative_lookahead($pat)
};
({ ! $start:tt ..= $end:tt }) => {
negative_lookahead($start..=$end)
};
($rule:ident) => {
call(stringify!($rule))
};
({ $name:ident : $rule:tt }) => {
rule!($rule).field(stringify!($name))
};
({ $rule:tt ? }) => {
rule!($rule).opt()
};
({ $elem:tt * }) => {
rule!($elem).repeat_many()
};
({ $elem:tt + }) => {
rule!($elem).repeat_more()
};
({ $elem:tt + % $sep:tt }) => {
rule!($elem).repeat_more_sep(rule!($sep), SepKind::Simple)
};
({ $rule0:tt $(| $rule:tt)+ }) => {
rule!($rule0) $(| rule!($rule))+
};
({ $rule0:tt $($rule:tt)* }) => {
rule!($rule0) $(+ rule!($rule))*
};
($pat:expr) => {
eat($pat)
};
}

macro_rules! grammar {
($($rule_name:ident = $($rule:tt)|+;)*) => ({
let mut grammar = Grammar::new();
$(grammar.define(
cx.intern(stringify!($rule_name)),
rule!({ $($rule)|+ }).finish(cx),
);)*
grammar
})
}

// Main grammar.
let mut grammar = grammar! {
Grammar = { FileStart {rules:{RuleDef*}} FileEnd };
RuleDef = { {name:Ident} "=" {rule:Or} ";" };
Or = {{"|"?} {rules:{Concat+ % "|"}}};
Concat = {rules:{Rule+}};
Rule = { {{ {field:Ident} ":" }?} {rule:Primary} {{modifier:Modifier}?} };
Primary =
{Eat:Pattern} |
{Call:Ident} |
{Group:{ "{" {{or:Or}?} "}" }};
Modifier =
{Opt:"?"} |
{Repeat:{ {repeat:Repeat} {{ {kind:SepKind} {sep:Primary} }?} }};
Repeat =
{Many:"*"} |
{More:"+"};
SepKind =
{Simple:"%"} |
// HACK(eddyb) should be "%%", but `rustc`'s `proc_macro` server doesn't
// always preserve jointness, except within multi-character Rust operators.
{Trailing:{"%" "%"}};
Pattern =
{Str:StrLit} |
{CharRange:{ {{start:CharLit}?} ".." {{end:CharLit}?} }} |
{CharRangeInclusive:{ {{start:CharLit}?} "..=" {end:CharLit} }};
};

// Lexical fragment of the grammar.
grammar.extend(grammar! {
FileStart = "";
FileEnd = "";

Ident = IDENT;

// FIXME(eddyb) restrict literals, once `proc_macro` allows it.
StrLit = LITERAL;
CharLit = LITERAL;
});

grammar
}
Loading

0 comments on commit 313de37

Please sign in to comment.