diff --git a/Cargo.toml b/Cargo.toml index e0a8364..782312f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,6 @@ description = "Grammar framework." [dependencies] indexmap = "1" -indexing = "0.3.2" proc-macro2 = "1.0.6" elsa = "1.3.2" flat-token = "0.0.1" diff --git a/src/forest.rs b/src/forest.rs index 3b9cca2..ca75631 100644 --- a/src/forest.rs +++ b/src/forest.rs @@ -1,11 +1,11 @@ -use crate::high::{type_lambda, ExistsL, PairL}; -use crate::input::{Input, Range}; -use indexing::{self, Container}; +use crate::input::Input; +use crate::RangeExt; use std::cmp::Ordering; use std::collections::{BTreeSet, HashMap, HashSet, VecDeque}; use std::fmt; use std::hash::{Hash, Hasher}; use std::io::{self, Write}; +use std::ops::Range; use std::str; #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] @@ -55,87 +55,84 @@ pub trait GrammarReflector { fn node_desc(&self, kind: Self::NodeKind) -> String; } -pub struct Node<'i, G: GrammarReflector> { +pub struct Node { pub kind: G::NodeKind, - pub range: Range<'i>, + pub range: std::ops::Range, } // FIXME(eddyb) can't derive these on `Node` because that puts bounds on `G`. -impl Copy for Node<'_, G> {} -impl Clone for Node<'_, G> { +impl Clone for Node { fn clone(&self) -> Self { - *self + Self { + kind: self.kind, + range: self.range.clone(), + } } } -impl PartialEq for Node<'_, G> { +impl PartialEq for Node { fn eq(&self, other: &Self) -> bool { - (self.kind, self.range) == (other.kind, other.range) + (self.kind, (self.range.start, self.range.end)) + == (other.kind, (other.range.start, other.range.end)) } } -impl Eq for Node<'_, G> {} -impl PartialOrd for Node<'_, G> + +impl Eq for Node {} +impl PartialOrd for Node where G::NodeKind: PartialOrd, { fn partial_cmp(&self, other: &Self) -> Option { - (self.kind, self.range).partial_cmp(&(other.kind, other.range)) + (self.kind, (self.range.start, self.range.end)) + .partial_cmp(&(other.kind, (other.range.start, other.range.end))) } } -impl Ord for Node<'_, G> +impl Ord for Node where G::NodeKind: Ord, { fn cmp(&self, other: &Self) -> Ordering { - (self.kind, self.range).cmp(&(other.kind, other.range)) + (self.kind, (self.range.start, self.range.end)) + .cmp(&(other.kind, (other.range.start, other.range.end))) } } -impl Hash for Node<'_, G> { +impl Hash for Node { fn hash(&self, state: &mut H) { - (self.kind, self.range).hash(state); + (self.kind, (self.range.start, self.range.end)).hash(state); } } -impl fmt::Debug for Node<'_, G> { +impl fmt::Debug for Node { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, "{:?} @ {}..{}", - self.kind, - self.range.start(), - self.range.end() + self.kind, self.range.start, self.range.end ) } } /// A parse forest, in SPPF (Shared Packed Parse Forest) representation. -pub struct ParseForest<'i, G: GrammarReflector, I: Input> { +pub struct ParseForest { pub grammar: G, // HACK(eddyb) `pub(crate)` only for `parser`. - pub(crate) input: Container<'i, I::Container>, - pub(crate) possibilities: HashMap, BTreeSet>, + pub(crate) input: I::Container, + pub(crate) possibilities: HashMap, BTreeSet>, } -type_lambda! { - pub type<'i> ParseForestL = ParseForest<'i, G, I>; - pub type<'i> NodeL = Node<'i, G>; -} - -pub type OwnedParseForestAndNode = ExistsL, NodeL>>; - #[derive(Debug)] pub struct MoreThanOne; -impl<'i, G: GrammarReflector, I: Input> ParseForest<'i, G, I> { - pub fn input(&self, range: Range<'i>) -> &I::Slice { +impl ParseForest { + pub fn input(&self, range: Range) -> &I::Slice { I::slice(&self.input, range) } - pub fn source_info(&self, range: Range<'i>) -> I::SourceInfo { + pub fn source_info(&self, range: Range) -> I::SourceInfo { I::source_info(&self.input, range) } // NOTE(eddyb) this is a private helper and should never be exported. - fn choice_child(&self, node: Node<'i, G>, choice: usize) -> Node<'i, G> { + fn choice_child(&self, node: Node, choice: usize) -> Node { match self.grammar.node_shape(node.kind) { NodeShape::Choice(_) => Node { kind: self.grammar.node_shape_choice_get(node.kind, choice), @@ -148,7 +145,7 @@ impl<'i, G: GrammarReflector, I: Input> ParseForest<'i, G, I> { } } - pub fn one_choice(&self, node: Node<'i, G>) -> Result, MoreThanOne> { + pub fn one_choice(&self, node: Node) -> Result, MoreThanOne> { let choices = &self.possibilities[&node]; if choices.len() > 1 { return Err(MoreThanOne); @@ -157,32 +154,29 @@ impl<'i, G: GrammarReflector, I: Input> ParseForest<'i, G, I> { Ok(self.choice_child(node, choice)) } - pub fn all_choices<'a>( - &'a self, - node: Node<'i, G>, - ) -> impl Iterator> + Clone + 'a + pub fn all_choices<'a>(&'a self, node: Node) -> impl Iterator> + Clone + 'a where G::NodeKind: 'a, { self.possibilities[&node] .iter() .cloned() - .map(move |choice| self.choice_child(node, choice)) + .map(move |choice| self.choice_child(node.clone(), choice)) } // NOTE(eddyb) this is a private helper and should never be exported. - fn split_children(&self, node: Node<'i, G>, split: usize) -> (Node<'i, G>, Node<'i, G>) { + fn split_children(&self, node: Node, split: usize) -> (Node, Node) { match self.grammar.node_shape(node.kind) { NodeShape::Split(left_kind, right_kind) => { - let (left, right, _) = node.range.split_at(split); + let (left, right) = node.range.split_at(split); ( Node { kind: left_kind, - range: Range(left), + range: left, }, Node { kind: right_kind, - range: Range(right), + range: right, }, ) } @@ -193,7 +187,7 @@ impl<'i, G: GrammarReflector, I: Input> ParseForest<'i, G, I> { } } - pub fn one_split(&self, node: Node<'i, G>) -> Result<(Node<'i, G>, Node<'i, G>), MoreThanOne> { + pub fn one_split(&self, node: Node) -> Result<(Node, Node), MoreThanOne> { let splits = &self.possibilities[&node]; if splits.len() > 1 { return Err(MoreThanOne); @@ -204,18 +198,18 @@ impl<'i, G: GrammarReflector, I: Input> ParseForest<'i, G, I> { pub fn all_splits<'a>( &'a self, - node: Node<'i, G>, - ) -> impl Iterator, Node<'i, G>)> + Clone + 'a + node: Node, + ) -> impl Iterator, Node)> + Clone + 'a where G::NodeKind: 'a, { self.possibilities[&node] .iter() .cloned() - .map(move |split| self.split_children(node, split)) + .map(move |split| self.split_children(node.clone(), split)) } - pub fn unpack_alias(&self, node: Node<'i, G>) -> Node<'i, G> { + pub fn unpack_alias(&self, node: Node) -> Node { match self.grammar.node_shape(node.kind) { NodeShape::Alias(inner) => Node { kind: inner, @@ -225,10 +219,10 @@ impl<'i, G: GrammarReflector, I: Input> ParseForest<'i, G, I> { } } - pub fn unpack_opt(&self, node: Node<'i, G>) -> Option> { + pub fn unpack_opt(&self, node: Node) -> Option> { match self.grammar.node_shape(node.kind) { NodeShape::Opt(inner) => { - if node.range.is_empty() { + if !(node.range.start < node.range.end) { None } else { Some(Node { @@ -254,21 +248,21 @@ impl<'i, G: GrammarReflector, I: Input> ParseForest<'i, G, I> { ) }; while let Some(source) = queue.pop_front() { - let source_name = node_name(source); + let source_name = node_name(source.clone()); writeln!(out, " {:?} [shape=box]", source_name)?; - let mut add_children = |children: &[(&str, Node<'i, G>)]| -> io::Result<()> { + let mut add_children = |children: &[(&str, Node)]| -> io::Result<()> { writeln!(out, r#" p{} [label="" shape=point]"#, p)?; writeln!(out, " {:?} -> p{}:n", source_name, p)?; - for &(port, child) in children { + for (port, child) in children { writeln!( out, " p{}:{} -> {:?}:n [dir=none]", p, port, - node_name(child) + node_name(child.clone()) )?; - if seen.insert(child) { - queue.push_back(child); + if seen.insert(child.clone()) { + queue.push_back(child.clone()); } } p += 1; @@ -322,21 +316,15 @@ pub mod typed { type State: Default + AsMut<[usize]>; } - pub trait FromShapeFields<'a, 'i, G: GrammarReflector, I: Input>: Sized { + pub trait FromShapeFields<'a, G: GrammarReflector, I: Input>: Sized { type Output; // FIXME(eddyb) use an array length const here instead when that works. - type Fields: Default + AsMut<[Option>]>; + type Fields: Default + AsMut<[Option>]>; - fn from_shape_fields( - forest: &'a ParseForest<'i, G, I>, - fields: Self::Fields, - ) -> Self::Output; - - fn one( - forest: &'a ParseForest<'i, G, I>, - node: Node<'i, G>, - ) -> Result + fn from_shape_fields(forest: &'a ParseForest, fields: Self::Fields) -> Self::Output; + + fn one(forest: &'a ParseForest, node: Node) -> Result where Self: Shaped, { @@ -344,29 +332,26 @@ pub mod typed { let state = state.as_mut(); assert_eq!(state.len(), Self::Shape::STATE_LEN); - Self::Shape::init(forest, node, state); + Self::Shape::init(forest, node.clone(), state); let mut fields = Self::Fields::default(); - Self::Shape::read(forest, node, state, fields.as_mut()); + Self::Shape::read(forest, node.clone(), state, fields.as_mut()); - if Self::Shape::step(forest, node, state) { + if Self::Shape::step(forest, node.clone(), state) { Err(MoreThanOne) } else { Ok(Self::from_shape_fields(forest, fields)) } } - fn all( - forest: &'a ParseForest<'i, G, I>, - node: Node<'i, G>, - ) -> ShapedAllIter<'a, 'i, G, I, Self> + fn all(forest: &'a ParseForest, node: Node) -> ShapedAllIter<'a, G, I, Self> where Self: Shaped, { let mut state = Self::State::default(); assert_eq!(state.as_mut().len(), Self::Shape::STATE_LEN); - Self::Shape::init(forest, node, state.as_mut()); + Self::Shape::init(forest, node.clone(), state.as_mut()); ShapedAllIter { forest, @@ -376,23 +361,23 @@ pub mod typed { } } - pub struct ShapedAllIter<'a, 'i, G: GrammarReflector, I: Input, T: Shaped> { - forest: &'a ParseForest<'i, G, I>, - node: Node<'i, G>, + pub struct ShapedAllIter<'a, G: GrammarReflector, I: Input, T: Shaped> { + forest: &'a ParseForest, + node: Node, state: Option, } - impl<'a, 'i, G: GrammarReflector, I: Input, T: Shaped> Iterator for ShapedAllIter<'a, 'i, G, I, T> + impl<'a, G: GrammarReflector, I: Input, T: Shaped> Iterator for ShapedAllIter<'a, G, I, T> where - T: FromShapeFields<'a, 'i, G, I>, + T: FromShapeFields<'a, G, I>, { type Item = T::Output; fn next(&mut self) -> Option { let state = self.state.as_mut()?.as_mut(); let mut fields = T::Fields::default(); - T::Shape::read(self.forest, self.node, state, fields.as_mut()); - if !T::Shape::step(self.forest, self.node, state) { + T::Shape::read(self.forest, self.node.clone(), state, fields.as_mut()); + if !T::Shape::step(self.forest, self.node.clone(), state) { self.state.take(); } Some(T::from_shape_fields(self.forest, fields)) @@ -404,22 +389,22 @@ pub mod typed { type State = [usize; ::STATE_LEN]; } - impl<'i, G: GrammarReflector, I: Input> FromShapeFields<'_, 'i, G, I> for () { + impl FromShapeFields<'_, G, I> for () { type Output = (); - type Fields = [Option>; 0]; + type Fields = [Option>; 0]; - fn from_shape_fields(_: &ParseForest<'i, G, I>, []: Self::Fields) {} + fn from_shape_fields(_: &ParseForest, []: Self::Fields) {} } - impl<'a, 'i, G: GrammarReflector, I: Input, T> FromShapeFields<'a, 'i, G, I> for Option + impl<'a, G: GrammarReflector, I: Input, T> FromShapeFields<'a, G, I> for Option where - T: FromShapeFields<'a, 'i, G, I, Fields = [Option>; 1]>, + T: FromShapeFields<'a, G, I, Fields = [Option>; 1]>, { type Output = Option; - type Fields = [Option>; 1]; + type Fields = [Option>; 1]; fn from_shape_fields( - forest: &'a ParseForest<'i, G, I>, + forest: &'a ParseForest, [node]: Self::Fields, ) -> Option { Some(T::from_shape_fields(forest, [Some(node?)])) @@ -433,17 +418,16 @@ pub mod typed { type State = S; } - impl<'a, 'i, G: GrammarReflector, I: Input, T, A, S> FromShapeFields<'a, 'i, G, I> - for WithShape + impl<'a, G: GrammarReflector, I: Input, T, A, S> FromShapeFields<'a, G, I> for WithShape where - T: FromShapeFields<'a, 'i, G, I>, + T: FromShapeFields<'a, G, I>, A: Shape, S: Default + AsMut<[usize]>, { type Output = T::Output; type Fields = T::Fields; - fn from_shape_fields(forest: &'a ParseForest<'i, G, I>, fields: T::Fields) -> T::Output { + fn from_shape_fields(forest: &'a ParseForest, fields: T::Fields) -> T::Output { T::from_shape_fields(forest, fields) } } @@ -451,20 +435,20 @@ pub mod typed { pub trait Shape { const STATE_LEN: usize; - fn init<'i, G: GrammarReflector, I: Input>( - forest: &ParseForest<'i, G, I>, - node: Node<'i, G>, + fn init( + forest: &ParseForest, + node: Node, state: &mut [usize], ); - fn read<'i, G: GrammarReflector, I: Input>( - forest: &ParseForest<'i, G, I>, - node: Node<'i, G>, + fn read( + forest: &ParseForest, + node: Node, state: &[usize], - fields: &mut [Option>], + fields: &mut [Option>], ); - fn step<'i, G: GrammarReflector, I: Input>( - forest: &ParseForest<'i, G, I>, - node: Node<'i, G>, + fn step( + forest: &ParseForest, + node: Node, state: &mut [usize], ) -> bool; } @@ -474,22 +458,18 @@ pub mod typed { impl Shape for Leaf { const STATE_LEN: usize = 0; - fn init<'i, G: GrammarReflector, I: Input>( - _: &ParseForest<'i, G, I>, - _: Node<'i, G>, - _: &mut [usize], - ) { + fn init(_: &ParseForest, _: Node, _: &mut [usize]) { } - fn read<'i, G: GrammarReflector, I: Input>( - _: &ParseForest<'i, G, I>, - _: Node<'i, G>, + fn read( + _: &ParseForest, + _: Node, _: &[usize], - _: &mut [Option>], + _: &mut [Option>], ) { } - fn step<'i, G: GrammarReflector, I: Input>( - _: &ParseForest<'i, G, I>, - _: Node<'i, G>, + fn step( + _: &ParseForest, + _: Node, _: &mut [usize], ) -> bool { false @@ -502,23 +482,19 @@ pub mod typed { impl> Shape for Field { const STATE_LEN: usize = 0; - fn init<'i, G: GrammarReflector, I: Input>( - _: &ParseForest<'i, G, I>, - _: Node<'i, G>, - _: &mut [usize], - ) { + fn init(_: &ParseForest, _: Node, _: &mut [usize]) { } - fn read<'i, G: GrammarReflector, I: Input>( - _: &ParseForest<'i, G, I>, - node: Node<'i, G>, + fn read( + _: &ParseForest, + node: Node, _: &[usize], - fields: &mut [Option>], + fields: &mut [Option>], ) { fields[X::default().as_ref().len()] = Some(node); } - fn step<'i, G: GrammarReflector, I: Input>( - _: &ParseForest<'i, G, I>, - _: Node<'i, G>, + fn step( + _: &ParseForest, + _: Node, _: &mut [usize], ) -> bool { false @@ -530,9 +506,9 @@ pub mod typed { impl Shape for Split { const STATE_LEN: usize = 1 + Left::STATE_LEN + Right::STATE_LEN; - fn init<'i, G: GrammarReflector, I: Input>( - forest: &ParseForest<'i, G, I>, - node: Node<'i, G>, + fn init( + forest: &ParseForest, + node: Node, state: &mut [usize], ) { let (state_split, state) = state.split_at_mut(1); @@ -547,11 +523,11 @@ pub mod typed { Left::init(forest, left, state_left); Right::init(forest, right, state_right); } - fn read<'i, G: GrammarReflector, I: Input>( - forest: &ParseForest<'i, G, I>, - node: Node<'i, G>, + fn read( + forest: &ParseForest, + node: Node, state: &[usize], - fields: &mut [Option>], + fields: &mut [Option>], ) { let (state_split, state) = state.split_at(1); let state_split = state_split[0]; @@ -561,20 +537,20 @@ pub mod typed { Left::read(forest, left, state_left, fields); Right::read(forest, right, state_right, fields); } - fn step<'i, G: GrammarReflector, I: Input>( - forest: &ParseForest<'i, G, I>, - node: Node<'i, G>, + fn step( + forest: &ParseForest, + node: Node, state: &mut [usize], ) -> bool { let (state_split, state) = state.split_at_mut(1); let state_split = &mut state_split[0]; let (state_left, state_right) = state.split_at_mut(Left::STATE_LEN); - let (left, right) = forest.split_children(node, *state_split); + let (left, right) = forest.split_children(node.clone(), *state_split); - Right::step(forest, right, state_right) + Right::step(forest, right.clone(), state_right) || Left::step(forest, left, state_left) && { - Right::init(forest, right, state_right); + Right::init(forest, right.clone(), state_right); true } || ({ @@ -601,9 +577,9 @@ pub mod typed { impl Shape for Choice { const STATE_LEN: usize = At::STATE_LEN + Cases::STATE_LEN; - fn init<'i, G: GrammarReflector, I: Input>( - forest: &ParseForest<'i, G, I>, - node: Node<'i, G>, + fn init( + forest: &ParseForest, + node: Node, state: &mut [usize], ) { let (state_at, state_cases) = state.split_at_mut(At::STATE_LEN); @@ -613,39 +589,39 @@ pub mod typed { let child = forest.choice_child(node, choice); state_cases[0] = choice; - At::init(forest, child, state_at); - Cases::init(forest, child, state_cases); + At::init(forest, child.clone(), state_at); + Cases::init(forest, child.clone(), state_cases); } - fn read<'i, G: GrammarReflector, I: Input>( - forest: &ParseForest<'i, G, I>, - node: Node<'i, G>, + fn read( + forest: &ParseForest, + node: Node, state: &[usize], - fields: &mut [Option>], + fields: &mut [Option>], ) { let (state_at, state_cases) = state.split_at(At::STATE_LEN); let child = forest.choice_child(node, state_cases[0]); - At::read(forest, child, state_at, fields); - Cases::read(forest, child, state_cases, fields); + At::read(forest, child.clone(), state_at, fields); + Cases::read(forest, child.clone(), state_cases, fields); } - fn step<'i, G: GrammarReflector, I: Input>( - forest: &ParseForest<'i, G, I>, - node: Node<'i, G>, + fn step( + forest: &ParseForest, + node: Node, state: &mut [usize], ) -> bool { let (state_at, state_cases) = state.split_at_mut(At::STATE_LEN); - let child = forest.choice_child(node, state_cases[0]); + let child = forest.choice_child(node.clone(), state_cases[0]); - At::step(forest, child, state_at) - || Cases::step(forest, child, state_cases) && { - At::init(forest, child, state_at); + At::step(forest, child.clone(), state_at) + || Cases::step(forest, child.clone(), state_cases) && { + At::init(forest, child.clone(), state_at); true } || ({ use std::ops::Bound::*; - forest.possibilities[&node] + forest.possibilities[&node.clone()] .range((Excluded(state_cases[0]), Unbounded)) .next() .cloned() @@ -655,8 +631,8 @@ pub mod typed { let child = forest.choice_child(node, choice); - At::init(forest, child, state_at); - Cases::init(forest, child, state_cases); + At::init(forest, child.clone(), state_at); + Cases::init(forest, child.clone(), state_cases); }) .is_some() } @@ -682,9 +658,9 @@ pub mod typed { (a_gt_b_mask & a) | (!a_gt_b_mask & b) }; - fn init<'i, G: GrammarReflector, I: Input>( - forest: &ParseForest<'i, G, I>, - node: Node<'i, G>, + fn init( + forest: &ParseForest, + node: Node, state: &mut [usize], ) { let (state_choice, state_last) = state.split_at_mut(1); @@ -696,11 +672,11 @@ pub mod typed { Last::init(forest, node, state_last); } } - fn read<'i, G: GrammarReflector, I: Input>( - forest: &ParseForest<'i, G, I>, - node: Node<'i, G>, + fn read( + forest: &ParseForest, + node: Node, state: &[usize], - fields: &mut [Option>], + fields: &mut [Option>], ) { let (state_choice, state_last) = state.split_at(1); let state_choice = state_choice[0]; @@ -711,9 +687,9 @@ pub mod typed { Last::read(forest, node, state_last, fields); } } - fn step<'i, G: GrammarReflector, I: Input>( - forest: &ParseForest<'i, G, I>, - node: Node<'i, G>, + fn step( + forest: &ParseForest, + node: Node, state: &mut [usize], ) -> bool { let (state_choice, state_last) = state.split_at_mut(1); @@ -736,24 +712,20 @@ pub mod typed { impl Shape for CaseEmpty { const STATE_LEN: usize = 0; - fn init<'i, G: GrammarReflector, I: Input>( - _: &ParseForest<'i, G, I>, - _: Node<'i, G>, - _: &mut [usize], - ) { + fn init(_: &ParseForest, _: Node, _: &mut [usize]) { unreachable!() } - fn read<'i, G: GrammarReflector, I: Input>( - _: &ParseForest<'i, G, I>, - _: Node<'i, G>, + fn read( + _: &ParseForest, + _: Node, _: &[usize], - _: &mut [Option>], + _: &mut [Option>], ) { unreachable!() } - fn step<'i, G: GrammarReflector, I: Input>( - _: &ParseForest<'i, G, I>, - _: Node<'i, G>, + fn step( + _: &ParseForest, + _: Node, _: &mut [usize], ) -> bool { unreachable!() @@ -765,28 +737,28 @@ pub mod typed { impl Shape for Opt { const STATE_LEN: usize = A::STATE_LEN; - fn init<'i, G: GrammarReflector, I: Input>( - forest: &ParseForest<'i, G, I>, - node: Node<'i, G>, + fn init( + forest: &ParseForest, + node: Node, state: &mut [usize], ) { if let Some(child) = forest.unpack_opt(node) { A::init(forest, child, state); } } - fn read<'i, G: GrammarReflector, I: Input>( - forest: &ParseForest<'i, G, I>, - node: Node<'i, G>, + fn read( + forest: &ParseForest, + node: Node, state: &[usize], - fields: &mut [Option>], + fields: &mut [Option>], ) { if let Some(child) = forest.unpack_opt(node) { A::read(forest, child, state, fields); } } - fn step<'i, G: GrammarReflector, I: Input>( - forest: &ParseForest<'i, G, I>, - node: Node<'i, G>, + fn step( + forest: &ParseForest, + node: Node, state: &mut [usize], ) -> bool { match forest.unpack_opt(node) { diff --git a/src/high.rs b/src/high.rs deleted file mode 100644 index d04fc99..0000000 --- a/src/high.rs +++ /dev/null @@ -1,161 +0,0 @@ -//! Utilities for emulating HKTs (over lifetimes) in Rust. - -use std::mem; -use std::ops::{Deref, DerefMut}; - -/// Type lambda application, with a lifetime. -pub trait ApplyL<'a> { - type Out; -} - -/// Type lambda taking a lifetime, i.e. `Lifetime -> Type`. -pub trait LambdaL: for<'a> ApplyL<'a> {} - -impl ApplyL<'a>> LambdaL for T {} - -// HACK(eddyb) work around `macro_rules` not being `use`-able. -pub use crate::__high_type_lambda as type_lambda; - -/// Define a new "type lambda" (over a lifetime). -/// -/// For example, `type_lambda!(type<'a> X = Y>;)` defines -/// a `struct X {...}` that implements `ApplyL`, such that -/// `for<'a> >::Out = Y>` holds. -#[macro_export] -macro_rules! __high_type_lambda { - ($($vis:vis type<$lt:lifetime> $name:ident $(<$($T:ident $(: $bound:path)*),*>)* = $ty:ty;)+) => { - $($vis struct $name $(<$($T $(: $bound)*),*>)* { - _marker: ::std::marker::PhantomData<($($($T),*)*)>, - } - impl<$lt, $($($T $(: $bound)*),*)*> $crate::high::ApplyL<$lt> - for $name $(<$($T),*>)* - { - type Out = $ty; - })+ - } -} - -type_lambda! { - pub type<'a> PairL = - (>::Out, >::Out); -} - -// HACK(eddyb) work around projection limitations with a newtype -// FIXME(#52812) replace with `&'a >::Out` -pub struct RefApplyL<'a, 'b, T: LambdaL>(&'a >::Out); - -impl<'a, T: LambdaL> Deref for RefApplyL<'_, 'a, T> { - type Target = >::Out; - fn deref(&self) -> &Self::Target { - self.0 - } -} - -// HACK(eddyb) work around projection limitations with a newtype -// FIXME(#52812) replace with `&'a mut >::Out` -pub struct RefMutApplyL<'a, 'b, T: LambdaL>(&'a mut >::Out); - -impl<'a, T: LambdaL> Deref for RefMutApplyL<'_, 'a, T> { - type Target = >::Out; - fn deref(&self) -> &Self::Target { - self.0 - } -} - -impl DerefMut for RefMutApplyL<'_, '_, T> { - fn deref_mut(&mut self) -> &mut Self::Target { - self.0 - } -} - -/// Proof token for erasable lifetimes (soundly replaceable with existentials). -/// That is, the lifetime is not used in references that borrow outside -/// data, but rather only self-contained (e.g. `indexing` or `owning_ref`). -#[derive(Copy, Clone)] -pub struct ErasableL<'a> { - _marker: ::std::marker::PhantomData<&'a mut &'a ()>, -} - -impl ErasableL<'_> { - /// Trivial proof that `'static` is erasable (it's always valid). - pub const STATIC: ErasableL<'static> = ErasableL { - _marker: ::std::marker::PhantomData, - }; - - /// Enter an `indexing::scope`, where the closure also receives a proof that - /// the generative lifetime is erasable (it doesn't come from a borrow). - pub fn indexing_scope( - a: A, - f: impl for<'id> FnOnce(ErasableL<'id>, ::indexing::Container<'id, A>) -> R, - ) -> R { - ::indexing::scope(a, |container| { - f( - ErasableL { - _marker: ::std::marker::PhantomData, - }, - container, - ) - }) - } -} - -/// Existential over a lifetime, i.e. `exists 'a.T('a)`. -pub struct ExistsL(>::Out); - -impl ExistsL { - /// Erase the lifetime `'a` from the value's type and wrap it in `ExistsL`. - /// This requires a proof that `'a` is erasable at all (see `ErasableL`). - /// To access the value later, use `unpack`, `unpack_ref` or `unpack_mut`. - pub fn pack<'a>(_: ErasableL<'a>, value: >::Out) -> Self { - let erased = unsafe { mem::transmute_copy(&value) }; - mem::forget(value); - ExistsL(erased) - } - - /// Provide owned access to the value, with the original lifetime replaced - /// by a generative lifetime, so that the closure can't assume equality - /// to any other specific lifetime (thanks to lifetime parametricity). - pub fn unpack( - self, - f: impl for<'a> FnOnce(ErasableL<'a>, >::Out) -> R, - ) -> R { - let skolem = unsafe { mem::transmute_copy(&self.0) }; - mem::forget(self); - f( - ErasableL { - _marker: ::std::marker::PhantomData, - }, - skolem, - ) - } - - /// Provide shared access to the value, with the original lifetime replaced - /// by a generative lifetime, so that the closure can't assume equality - /// to any other specific lifetime (thanks to lifetime parametricity). - pub fn unpack_ref( - &self, - f: impl for<'a, 'b> FnOnce(ErasableL<'b>, RefApplyL<'a, 'b, T>) -> R, - ) -> R { - f( - ErasableL { - _marker: ::std::marker::PhantomData, - }, - RefApplyL(unsafe { &*(&self.0 as *const _ as *const _) }), - ) - } - - /// Provide mutable access to the value, with the original lifetime replaced - /// by a generative lifetime, so that the closure can't assume equality - /// to any other specific lifetime (thanks to lifetime parametricity). - pub fn unpack_mut( - &mut self, - f: impl for<'a, 'b> FnOnce(ErasableL<'b>, RefMutApplyL<'a, 'b, T>) -> R, - ) -> R { - f( - ErasableL { - _marker: ::std::marker::PhantomData, - }, - RefMutApplyL(unsafe { &mut *(&mut self.0 as *mut _ as *mut _) }), - ) - } -} diff --git a/src/indexing_str.rs b/src/indexing_str.rs deleted file mode 100644 index 1cbaf26..0000000 --- a/src/indexing_str.rs +++ /dev/null @@ -1,63 +0,0 @@ -//! String slice support for the `indexing` crate. -// FIXME(eddyb) ensure `indexing::Range` can't break -// `str`'s UTF-8 requirement, without overhead - -use indexing::container_traits::{Contiguous, Trustworthy}; -use indexing::{Container, Range}; -use std::ops::Deref; - -pub struct Str(str); - -impl<'a> From<&'a str> for &'a Str { - fn from(s: &'a str) -> Self { - unsafe { &*(s as *const str as *const Str) } - } -} - -impl Deref for Str { - type Target = str; - fn deref(&self) -> &str { - &self.0 - } -} - -unsafe impl Trustworthy for Str { - type Item = u8; - fn base_len(&self) -> usize { - self.0.len() - } -} - -unsafe impl Contiguous for Str { - fn begin(&self) -> *const Self::Item { - self.0.as_ptr() - } - fn end(&self) -> *const Self::Item { - unsafe { self.begin().add(self.0.len()) } - } - fn as_slice(&self) -> &[Self::Item] { - self.0.as_bytes() - } -} - -impl Str { - pub fn slice<'a, 'b, 'i>(input: &'b Container<'i, &'a Self>, range: Range<'i>) -> &'b Self { - // NOTE(eddyb) following code is copied from `str::is_char_boundary`: - let valid_utf8_start = |bytes: &[u8]| { - match bytes.first() { - None => true, - // This is bit magic equivalent to: b < 128 || b >= 192 - Some(&b) => (b as i8) >= -0x40, - } - }; - - let (_, after) = input.split_around(range); - let (bytes, bytes_after) = (&input[range], &input[after]); - - // HACK(eddyb) ensure the range is still a valid `str` - assert!(valid_utf8_start(bytes)); - assert!(valid_utf8_start(bytes_after)); - - unsafe { &*(bytes as *const [u8] as *const Str) } - } -} diff --git a/src/input.rs b/src/input.rs index a304855..596d9c0 100644 --- a/src/input.rs +++ b/src/input.rs @@ -1,47 +1,7 @@ -use crate::indexing_str; -use indexing::container_traits::Trustworthy; -use indexing::{self, Container, Index, Unknown}; -use std::cmp::Ordering; use std::fmt; -use std::hash::{Hash, Hasher}; -use std::ops::{self, Deref, RangeInclusive}; +use std::ops::{self, Deref, Range, RangeInclusive}; use std::str; -#[derive(Copy, Clone, PartialEq, Eq, Debug)] -pub struct Range<'i>(pub indexing::Range<'i>); - -impl<'i> Deref for Range<'i> { - type Target = indexing::Range<'i>; - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl PartialOrd for Range<'_> { - fn partial_cmp(&self, other: &Self) -> Option { - (self.start(), self.end()).partial_cmp(&(other.start(), other.end())) - } -} - -impl Ord for Range<'_> { - fn cmp(&self, other: &Self) -> Ordering { - (self.start(), self.end()).cmp(&(other.start(), other.end())) - } -} - -impl Hash for Range<'_> { - fn hash(&self, state: &mut H) { - (self.start(), self.end()).hash(state); - } -} - -impl Range<'_> { - pub fn subtract_suffix(self, other: Self) -> Self { - assert_eq!(self.end(), other.end()); - Range(self.split_at(other.start() - self.start()).0) - } -} - #[derive(Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord)] pub struct LineColumn { pub line: usize, @@ -78,24 +38,16 @@ impl fmt::Debug for LineColumnRange { } pub trait Input: Sized { - type Container: Trustworthy; - type Slice: ?Sized; + type Container: Deref; + type Slice: std::ops::Index, Output = Self::Slice> + ?Sized; type SourceInfo: fmt::Debug; // FIXME(eddyb) remove - replace with `SourceInfo` for the affected range type SourceInfoPoint: fmt::Debug; fn to_container(self) -> Self::Container; - fn slice<'a, 'i>( - input: &'a Container<'i, Self::Container>, - range: Range<'i>, - ) -> &'a Self::Slice; - fn source_info<'i>( - input: &Container<'i, Self::Container>, - range: Range<'i>, - ) -> Self::SourceInfo; - fn source_info_point<'i>( - input: &Container<'i, Self::Container>, - index: Index<'i, Unknown>, - ) -> Self::SourceInfoPoint; + fn slice<'a>(input: &'a Self::Container, range: Range) -> &'a Self::Slice; + fn source_info(input: &Self::Container, range: Range) -> Self::SourceInfo; + fn source_info_point(input: &Self::Container, index: usize) -> Self::SourceInfoPoint; + fn len(input: &Self::Container) -> usize; } impl Input for &[T] { @@ -106,42 +58,33 @@ impl Input for &[T] { fn to_container(self) -> Self::Container { self } - fn slice<'b, 'i>( - input: &'b Container<'i, Self::Container>, - range: Range<'i>, - ) -> &'b Self::Slice { - &input[range.0] + fn slice<'b>(input: &'b Self::Container, range: Range) -> &'b Self::Slice { + &input[range] + } + fn source_info(_: &Self::Container, range: Range) -> Self::SourceInfo { + range } - fn source_info<'i>(_: &Container<'i, Self::Container>, range: Range<'i>) -> Self::SourceInfo { - range.as_range() + fn source_info_point(_: &Self::Container, index: usize) -> Self::SourceInfoPoint { + index } - fn source_info_point<'i>( - _: &Container<'i, Self::Container>, - index: Index<'i, Unknown>, - ) -> Self::SourceInfoPoint { - index.integer() + fn len(input: &Self::Container) -> usize { + input.len() } } impl<'a> Input for &'a str { - type Container = &'a indexing_str::Str; + type Container = &'a str; type Slice = str; type SourceInfo = LineColumnRange; type SourceInfoPoint = LineColumn; fn to_container(self) -> Self::Container { self.into() } - fn slice<'b, 'i>( - input: &'b Container<'i, Self::Container>, - range: Range<'i>, - ) -> &'b Self::Slice { - indexing_str::Str::slice(input, range.0) - } - fn source_info<'i>( - input: &Container<'i, Self::Container>, - range: Range<'i>, - ) -> Self::SourceInfo { - let start = Self::source_info_point(input, range.first()); + fn slice<'b>(input: &'b Self::Container, range: Range) -> &'b Self::Slice { + &input[range] + } + fn source_info(input: &Self::Container, range: Range) -> Self::SourceInfo { + let start = Self::source_info_point(input, range.start); // HACK(eddyb) add up `LineColumn`s to avoid counting twice. // Ideally we'd cache around a line map, like rustc's `SourceMap`. let mut end = LineColumn::count(Self::slice(input, range)); @@ -151,12 +94,11 @@ impl<'a> Input for &'a str { } LineColumnRange { start, end } } - fn source_info_point<'i>( - input: &Container<'i, Self::Container>, - index: Index<'i, Unknown>, - ) -> Self::SourceInfoPoint { - let prefix_range = Range(input.split_at(index).0); - LineColumn::count(Self::slice(input, prefix_range)) + fn source_info_point<'i>(input: &Self::Container, index: usize) -> Self::SourceInfoPoint { + LineColumn::count(&input[..index]) + } + fn len(input: &Self::Container) -> usize { + input.len() } } diff --git a/src/lib.rs b/src/lib.rs index 93fd2d8..73aa28b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,12 +1,6 @@ #![deny(unsafe_code)] #![deny(rust_2018_idioms)] -// NOTE only these two modules can and do contain unsafe code. -#[allow(unsafe_code)] -mod high; -#[allow(unsafe_code)] -mod indexing_str; - #[forbid(unsafe_code)] pub mod context; #[forbid(unsafe_code)] @@ -34,6 +28,29 @@ use indexmap::IndexMap; use std::collections::HashMap; use std::hash::Hash; +use std::ops::Range; + +trait RangeExt { + fn split_at(&self, idx: Idx) -> (Range, Range); + fn join(&self, other: Range) -> Result, Box>; +} + +impl RangeExt for Range +where + Idx: Copy + Eq, +{ + fn split_at(&self, idx: Idx) -> (Range, Range) { + (self.start..idx, idx..self.end) + } + + fn join(&self, other: Range) -> Result, Box> { + if self.end != other.start { + return Err("ranges must be adjacent".into()); + } + Ok(self.start..other.end) + } +} + pub struct Grammar { pub rules: IndexMap, } diff --git a/src/parser.rs b/src/parser.rs index 9201efe..c9046b0 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,18 +1,18 @@ -use crate::forest::{GrammarReflector, Node, OwnedParseForestAndNode, ParseForest}; -use crate::high::ErasableL; -use crate::input::{Input, InputMatch, Range}; -use indexing::{self, Index, Unknown}; +use super::RangeExt; +use crate::forest::{GrammarReflector, Node, ParseForest}; +use crate::input::{Input, InputMatch}; use std::collections::HashMap; +use std::ops::Range; -pub struct Parser<'a, 'i, G: GrammarReflector, I: Input, Pat> { - state: &'a mut ParserState<'i, G, I, Pat>, - result: Range<'i>, - remaining: Range<'i>, +pub struct Parser<'a, G: GrammarReflector, I: Input, Pat> { + state: &'a mut ParserState, + result: Range, + remaining: Range, } -struct ParserState<'i, G: GrammarReflector, I: Input, Pat> { - forest: ParseForest<'i, G, I>, - last_input_pos: Index<'i, Unknown>, +struct ParserState { + forest: ParseForest, + last_input_pos: usize, expected_pats: Vec, } @@ -24,112 +24,113 @@ pub struct ParseError { pub type ParseResult = Result>; -impl<'i, G: GrammarReflector, I: Input, Pat: Ord> Parser<'_, 'i, G, I, Pat> { +impl Parser<'_, G, I, Pat> { pub fn parse_with( grammar: G, input: I, - f: impl for<'i2> FnOnce(Parser<'_, 'i2, G, I, Pat>) -> Option>, - ) -> ParseResult> { - ErasableL::indexing_scope(input.to_container(), |lifetime, input| { - let range = Range(input.range()); - let mut state = ParserState { - forest: ParseForest { - grammar, - input, - possibilities: HashMap::new(), - }, - last_input_pos: range.first(), - expected_pats: vec![], - }; - - let result = f(Parser { - state: &mut state, - result: Range(range.frontiers().0), - remaining: range, - }); - - let mut error = ParseError { - at: I::source_info_point(&state.forest.input, state.last_input_pos), - expected: state.expected_pats, - }; - error.expected.sort(); - error.expected.dedup(); - - match result { - None => Err(error), - Some(node) => { - // The result is only a successful parse if it's as long as the input. - if node.range == range { - Ok(OwnedParseForestAndNode::pack( - lifetime, - (state.forest, node), - )) - } else { - Err(error) - } + f: impl for<'i2> FnOnce(Parser<'_, G, I, Pat>) -> Option>, + ) -> ParseResult, Node)> { + let container: I::Container = input.to_container(); + let range = 0..I::len(&container); + let mut state = ParserState { + forest: ParseForest { + grammar, + input: container, + possibilities: HashMap::new(), + }, + last_input_pos: 0, + expected_pats: vec![], + }; + + let result = f(Parser { + state: &mut state, + result: 0..0, + remaining: range.clone(), + }); + + let mut error = ParseError { + at: I::source_info_point(&state.forest.input, state.last_input_pos), + expected: state.expected_pats, + }; + error.expected.sort(); + error.expected.dedup(); + + match result { + None => Err(error), + Some(node) => { + // The result is only a successful parse if it's as long as the input. + if node.range == range { + Ok((state.forest, node)) + } else { + Err(error) } } - }) + } } // FIXME(eddyb) find an nicer way for algorithms to manipulate these ranges. - pub fn result(&self) -> Range<'i> { - self.result + pub fn result(&self) -> Range { + self.result.clone() } - pub fn remaining(&self) -> Range<'i> { - self.remaining + pub fn remaining(&self) -> Range { + self.remaining.clone() } /// Get the current result range, and leave behind an empty range /// (at the end of the current result / start of the remaining input). - pub fn take_result(&mut self) -> Range<'i> { - let result = self.result; - self.result = Range(result.frontiers().1); + pub fn take_result(&mut self) -> Range { + let result = self.result.clone(); + self.result = result.end..result.end; result } pub fn with_result_and_remaining<'a>( &'a mut self, - result: Range<'i>, - remaining: Range<'i>, - ) -> Parser<'a, 'i, G, I, Pat> { + result: Range, + remaining: Range, + ) -> Parser<'a, G, I, Pat> { // HACK(eddyb) enforce that `result` and `remaining` are inside `self`. - assert_eq!(self.result, Range(self.remaining.frontiers().0)); - let full_new_range = result.join(remaining.0).unwrap(); - assert!(self.remaining.start() <= full_new_range.start()); - assert_eq!(self.remaining.end(), full_new_range.end()); + assert_eq!(self.result, self.remaining.start..self.remaining.start); + let full_new_range = result.join(remaining.clone()).unwrap(); + assert!(self.remaining.start <= full_new_range.start); + assert_eq!(self.remaining.end, full_new_range.end); Parser { state: self.state, result, - remaining, + remaining: remaining.clone(), } } pub fn input_consume_left<'a, SpecificPat: Into>( &'a mut self, pat: SpecificPat, - ) -> Option> + ) -> Option> where I::Slice: InputMatch, { - let start = self.remaining.first(); + let start = self.remaining.start; if start > self.state.last_input_pos { self.state.last_input_pos = start; self.state.expected_pats.clear(); } - match self.state.forest.input(self.remaining).match_left(&pat) { + match self + .state + .forest + .input(self.remaining.clone()) + .match_left(&pat) + { Some(n) => { - let (matching, after, _) = self.remaining.split_at(n); - if after.first() > self.state.last_input_pos { - self.state.last_input_pos = after.first(); + let (matching, after) = self.remaining.split_at(n); + if after.start > self.state.last_input_pos { + self.state.last_input_pos = after.start; self.state.expected_pats.clear(); } Some(Parser { state: self.state, - result: Range(self.result.join(matching).unwrap()), - remaining: Range(after), + result: (self.result.join(matching).unwrap()), + remaining: (after), }) } None => { @@ -144,18 +145,23 @@ impl<'i, G: GrammarReflector, I: Input, Pat: Ord> Parser<'_, 'i, G, I, Pat> { pub fn input_consume_right<'a, SpecificPat>( &'a mut self, pat: SpecificPat, - ) -> Option> + ) -> Option> where I::Slice: InputMatch, { // FIXME(eddyb) implement error reporting support like in `input_consume_left` - match self.state.forest.input(self.remaining).match_right(&pat) { + match self + .state + .forest + .input(self.remaining.clone()) + .match_right(&pat) + { Some(n) => { - let (before, matching, _) = self.remaining.split_at(self.remaining.len() - n); + let (before, matching) = self.remaining.split_at(self.remaining.len() - n); Some(Parser { state: self.state, - result: Range(matching.join(self.result.0).unwrap()), - remaining: Range(before), + result: matching.join(self.result.clone()).unwrap(), + remaining: before, }) } None => None, @@ -169,21 +175,21 @@ impl<'i, G: GrammarReflector, I: Input, Pat: Ord> Parser<'_, 'i, G, I, Pat> { .possibilities .entry(Node { kind, - range: self.result, + range: self.result.clone(), }) .or_default() .insert(choice); } // FIXME(eddyb) safeguard this against misuse. - pub fn forest_add_split(&mut self, kind: G::NodeKind, left: Node<'i, G>) { - self.result = Range(left.range.join(self.result.0).unwrap()); + pub fn forest_add_split(&mut self, kind: G::NodeKind, left: Node) { + self.result = left.range.join(self.result.clone()).unwrap(); self.state .forest .possibilities .entry(Node { kind, - range: self.result, + range: self.result.clone(), }) .or_default() .insert(left.range.len()); diff --git a/src/proc_macro_input.rs b/src/proc_macro_input.rs index ec17534..0026ba1 100644 --- a/src/proc_macro_input.rs +++ b/src/proc_macro_input.rs @@ -1,45 +1,31 @@ -use crate::input::{Input, InputMatch, Range}; +use crate::input::{Input, InputMatch}; use crate::proc_macro::{FlatTokenPat, Span, TokenStream}; use flat_token::{flatten, FlatToken}; -use indexing::{proof::Provable, Container, Index, Unknown}; -use std::ops; +use std::ops::Range; impl Input for TokenStream { type Container = Vec; type Slice = [FlatToken]; - type SourceInfo = ops::Range; + type SourceInfo = Range; type SourceInfoPoint = Span; fn to_container(self) -> Self::Container { let mut out = vec![]; flatten(self, &mut out); out } - fn slice<'b, 'i>( - input: &'b Container<'i, Self::Container>, - range: Range<'i>, - ) -> &'b Self::Slice { - &input[range.0] + fn slice<'b>(input: &'b Self::Container, range: Range) -> &'b Self::Slice { + &input[range] } - fn source_info<'i>( - input: &Container<'i, Self::Container>, - range: Range<'i>, - ) -> Self::SourceInfo { + + fn source_info(input: &Self::Container, range: Range) -> Self::SourceInfo { // FIXME(eddyb) should be joining up spans, but the API // for that is still "semver-exempt" in `proc-macro2`. - let last = range - .nonempty() - .map(|r| r.last().no_proof()) - .unwrap_or(range.past_the_end()); - Self::source_info_point(input, range.first())..Self::source_info_point(input, last) + Self::source_info_point(input, range.start)..Self::source_info_point(input, range.end) } - fn source_info_point<'i>( - input: &Container<'i, Self::Container>, - index: Index<'i, Unknown>, - ) -> Self::SourceInfoPoint { + + fn source_info_point(input: &Self::Container, index: usize) -> Self::SourceInfoPoint { // Try to get as much information as possible. let (before, after) = input.split_at(index); - let before = &input[before]; - let after = &input[after]; if let Some(first) = after.first() { first.span() } else if let Some(last) = before.last() { @@ -51,6 +37,10 @@ impl Input for TokenStream { Span::call_site() } } + + fn len(input: &Self::Container) -> usize { + input.len() + } } impl InputMatch<[FlatTokenPat<&'_ str>]> for [FlatToken] {