From 6116e41806e7fce57e1cc078c19d117c3e0b5075 Mon Sep 17 00:00:00 2001 From: jyn Date: Fri, 18 Oct 2024 10:21:39 -0400 Subject: [PATCH] Add [`GreenTreeBuilder::revert`] to support backtracking parsers Rowan, and hence CSTree, is designed around hand-written parsers. In particular, the APIs for *building* trees require that each token is recorded only once. Some parsers, and especially parser combinators, use backtracking instead, where the same token may be seen multiple times. To support this, add a new `revert` function which discards all tokens seen since the last checkpoint. --- cstree/src/green/builder.rs | 50 +++++++++++++++ cstree/tests/it/main.rs | 35 ++++++++++- cstree/tests/it/rollback.rs | 117 ++++++++++++++++++++++++++++++++++++ 3 files changed, 201 insertions(+), 1 deletion(-) create mode 100644 cstree/tests/it/rollback.rs diff --git a/cstree/src/green/builder.rs b/cstree/src/green/builder.rs index cf4e967..8fb2c67 100644 --- a/cstree/src/green/builder.rs +++ b/cstree/src/green/builder.rs @@ -472,6 +472,56 @@ where Checkpoint(self.children.len()) } + /// Delete all tokens parsed since the [`Checkpoint`] was created. + /// + /// This is useful for backtracking parsers. + /// + /// NOTE: this does *not* delete any unfinished nodes; you are responsible for only + /// pairing checkpoint/start_node_at. Using `start_node` combined with `revert` has unspecified behavior. + /// + /// NOTE: checkpoints can only be nested "forwards" not backwards. Attempting to go backwards then forwards is + /// unspecified (it will usually panic). + /// + /// Example: + /// ```rust + /// # use cstree::testing::*; + /// # use cstree::build::GreenNodeBuilder; + /// # struct Parser; + /// # impl Parser { + /// # fn peek(&self) -> Option { None } + /// # fn parse_expr(&mut self) {} + /// # } + /// # let mut builder: GreenNodeBuilder = GreenNodeBuilder::new(); + /// # let mut parser = Parser; + /// let checkpoint = builder.checkpoint(); + /// parser.parse_expr(); + /// if let Some(Plus) = parser.peek() { + /// // 1 + 2 = Add(1, 2) + /// builder.start_node_at(checkpoint, Operation); + /// parser.parse_expr(); + /// builder.finish_node(); + /// } else { + /// builder.revert(checkpoint); + /// } + /// ``` + pub fn revert(&mut self, checkpoint: Checkpoint) { + let Checkpoint(checkpoint) = checkpoint; + // This doesn't catch scenarios where we've read more tokens since the previous revert, + // but it's close enough. + assert!( + checkpoint <= self.children.len(), + "cannot rollback to a checkpoint in the future" + ); + if let Some(&(_, first_child)) = self.parents.last() { + assert!( + checkpoint >= first_child, + "checkpoint no longer valid, was an unmatched start_node_at called?" + ); + } + + self.children.truncate(checkpoint); + } + /// Wrap the previous branch marked by [`checkpoint`](GreenNodeBuilder::checkpoint) in a new /// branch and make it current. #[inline] diff --git a/cstree/tests/it/main.rs b/cstree/tests/it/main.rs index a952a23..024bc73 100644 --- a/cstree/tests/it/main.rs +++ b/cstree/tests/it/main.rs @@ -1,5 +1,6 @@ mod basic; mod regressions; +mod rollback; mod sendsync; #[cfg(feature = "serialize")] mod serde; @@ -7,7 +8,8 @@ mod serde; use cstree::{ build::{GreenNodeBuilder, NodeCache}, green::GreenNode, - interning::Interner, + interning::{Interner, Resolver}, + util::NodeOrToken, RawSyntaxKind, Syntax, }; @@ -78,3 +80,34 @@ where } from } + +#[track_caller] +pub fn assert_tree_eq( + (left, left_res): (&SyntaxNode, &impl Resolver), + (right, right_res): (&SyntaxNode, &impl Resolver), +) { + if left.green() == right.green() { + return; + } + + if left.kind() != right.kind() || left.children_with_tokens().len() != right.children_with_tokens().len() { + panic!("{} !=\n{}", left.debug(left_res, true), right.debug(right_res, true)) + } + + for elem in left.children_with_tokens().zip(right.children_with_tokens()) { + match elem { + (NodeOrToken::Node(ln), NodeOrToken::Node(rn)) => assert_tree_eq((ln, left_res), (rn, right_res)), + (NodeOrToken::Node(n), NodeOrToken::Token(t)) => { + panic!("{} != {}", n.debug(left_res, true), t.debug(right_res)) + } + (NodeOrToken::Token(t), NodeOrToken::Node(n)) => { + panic!("{} != {}", t.debug(left_res), n.debug(right_res, true)) + } + (NodeOrToken::Token(lt), NodeOrToken::Token(rt)) => { + if lt.syntax_kind() != rt.syntax_kind() || lt.resolve_text(left_res) != rt.resolve_text(right_res) { + panic!("{} != {}", lt.debug(left_res), rt.debug(right_res)) + } + } + } + } +} diff --git a/cstree/tests/it/rollback.rs b/cstree/tests/it/rollback.rs new file mode 100644 index 0000000..e51c94b --- /dev/null +++ b/cstree/tests/it/rollback.rs @@ -0,0 +1,117 @@ +use super::*; +use cstree::interning::Resolver; + +type GreenNodeBuilder<'cache, 'interner> = cstree::build::GreenNodeBuilder<'cache, 'interner, SyntaxKind>; + +fn with_builder(f: impl FnOnce(&mut GreenNodeBuilder)) -> (SyntaxNode, impl Resolver) { + let mut builder = GreenNodeBuilder::new(); + f(&mut builder); + let (node, cache) = builder.finish(); + (SyntaxNode::new_root(node), cache.unwrap().into_interner().unwrap()) +} + +#[test] +#[should_panic = "`left == right` failed"] +fn comparison_works() { + let (first, res1) = with_builder(|_| {}); + let (second, res2) = with_builder(|builder| { + builder.start_node(SyntaxKind(0)); + builder.token(SyntaxKind(1), "hi"); + builder.finish_node(); + }); + assert_tree_eq((&first, &res1), (&second, &res2)); +} + +#[test] +fn simple() { + let (first, res1) = with_builder(|builder| { + builder.start_node(SyntaxKind(0)); + builder.finish_node(); + }); + let (second, res2) = with_builder(|builder| { + builder.start_node(SyntaxKind(0)); + + // Add a token, then remove it. + let initial = builder.checkpoint(); + builder.token(SyntaxKind(1), "hi"); + builder.revert(initial); + + builder.finish_node(); + }); + assert_tree_eq((&first, &res1), (&second, &res2)); +} + +#[test] +fn nested() { + let (first, res1) = with_builder(|builder| { + builder.start_node(SyntaxKind(0)); + builder.finish_node(); + }); + + let (second, res2) = with_builder(|builder| { + builder.start_node(SyntaxKind(0)); + // Add two tokens, then remove both. + let initial = builder.checkpoint(); + builder.token(SyntaxKind(1), "hi"); + builder.token(SyntaxKind(2), "hello"); + builder.revert(initial); + + builder.finish_node(); + }); + + let (third, res3) = with_builder(|builder| { + builder.start_node(SyntaxKind(0)); + builder.finish_node(); + }); + + assert_tree_eq((&first, &res1), (&second, &res2)); + assert_tree_eq((&first, &res1), (&third, &res3)); +} + +#[test] +#[should_panic = "checkpoint in the future"] +fn misuse() { + with_builder(|builder| { + builder.start_node(SyntaxKind(0)); + + // Add two tokens, but remove them in the wrong order. + let initial = builder.checkpoint(); + builder.token(SyntaxKind(1), "hi"); + let new = builder.checkpoint(); + builder.token(SyntaxKind(2), "hello"); + builder.revert(initial); + builder.revert(new); + + builder.finish_node(); + }); +} + +#[test] +fn misuse2() { + let (first, res1) = with_builder(|builder| { + builder.start_node(SyntaxKind(0)); + builder.token(SyntaxKind(3), "no"); + builder.finish_node(); + }); + + let (second, res2) = with_builder(|builder| { + builder.start_node(SyntaxKind(0)); + + // Add two tokens, revert to the initial state, add three tokens, and try to revert to an earlier checkpoint. + let initial = builder.checkpoint(); + builder.token(SyntaxKind(1), "hi"); + let new = builder.checkpoint(); + builder.token(SyntaxKind(2), "hello"); + builder.revert(initial); + + // This is wrong, but there's not a whole lot the library can do about it. + builder.token(SyntaxKind(3), "no"); + builder.token(SyntaxKind(4), "bad"); + builder.token(SyntaxKind(4), "wrong"); + builder.revert(new); + + builder.finish_node(); + }); + + assert_tree_eq((&first, &res1), (&second, &res2)); +}