diff --git a/crates/postgresql-cst-parser/src/tree_sitter.rs b/crates/postgresql-cst-parser/src/tree_sitter.rs index 1bac64b..b2893bd 100644 --- a/crates/postgresql-cst-parser/src/tree_sitter.rs +++ b/crates/postgresql-cst-parser/src/tree_sitter.rs @@ -2,7 +2,7 @@ mod assert_util; mod convert; -pub use convert::convert_cst; +pub use convert::get_ts_tree_and_range_map; use std::{collections::HashMap, fmt::Display, rc::Rc}; @@ -30,50 +30,32 @@ pub struct TreeCursor<'a> { node_or_token: NodeOrToken<'a>, } +// https://github.com/tree-sitter/tree-sitter/blob/90666c951d53c13cc6cf5002d971a6debed74244/lib/binding_rust/lib.rs#L74-L78 #[derive(Debug, Clone)] -pub struct Range { - pub start_row: usize, - pub start_col: usize, - pub end_row: usize, - pub end_col: usize, -} - -fn is_flatten_all(node_or_token: NodeOrToken) -> bool { - matches!( - node_or_token.kind(), - SyntaxKind::parse_toplevel - | SyntaxKind::stmtmulti - | SyntaxKind::toplevel_stmt - | SyntaxKind::stmt - | SyntaxKind::select_clause - | SyntaxKind::select_with_parens - | SyntaxKind::select_no_parens - | SyntaxKind::simple_select - | SyntaxKind::opt_target_list - // | SyntaxKind::relation_expr - // | SyntaxKind::extended_relation_expr - // | SyntaxKind::qualified_name - // | SyntaxKind::indirection - // | SyntaxKind::indirection_el - // | SyntaxKind::table_ref - | SyntaxKind::alias_clause - | SyntaxKind::opt_alias_clause - ) +pub struct Point { + pub row: usize, + pub column: usize, } -fn is_flatten_except_top(node_or_token: NodeOrToken) -> bool { - matches!( - node_or_token.kind(), - SyntaxKind::target_list | SyntaxKind::from_list - ) && node_or_token.parent().unwrap().kind() == node_or_token.kind() +impl std::fmt::Display for Point { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "({}, {})", self.row, self.column) + } } -fn is_flatten(node_or_token: NodeOrToken) -> bool { - is_flatten_all(node_or_token) || is_flatten_except_top(node_or_token) +// https://github.com/tree-sitter/tree-sitter/blob/90666c951d53c13cc6cf5002d971a6debed74244/lib/binding_rust/lib.rs#L80-L88 +#[derive(Debug, Clone)] +pub struct Range { + start_byte: usize, + end_byte: usize, + start_position: Point, + end_position: Point, } -fn is_skip(kind: SyntaxKind) -> bool { - matches!(kind, SyntaxKind::Whitespace) +impl std::fmt::Display for Range { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "[{}-{}]", self.start_position, self.end_position) + } } impl<'a> Node<'a> { @@ -93,19 +75,35 @@ impl<'a> Node<'a> { .unwrap() } + pub fn start_position(&self) -> Point { + self.range().start_position + } + + pub fn end_position(&self) -> Point { + self.range().end_position + } + pub fn text(&self) -> &'a str { - // self.node_or_token - // .as_token() - // .map(|t| t.text()) - // .unwrap_or_default() - let start = self.node_or_token.text_range().start().into(); - let end = self.node_or_token.text_range().end().into(); - &self.input[start..end] + let Range { + start_byte, + end_byte, + .. + } = self.range(); + + &self.input[start_byte..end_byte] } - pub fn children(&self) {} + pub fn utf8_text() { + unimplemented!() + } - pub fn child_count(&self) {} + pub fn child_count(&self) -> usize { + if let Some(node) = self.node_or_token.as_node() { + node.children_with_tokens().count() + } else { + 0 + } + } pub fn next_sibling(&self) -> Option> { self.node_or_token @@ -133,100 +131,48 @@ impl<'a> From> for TreeCursor<'a> { } impl<'a> TreeCursor<'a> { - pub fn goto_first_child(&mut self) -> bool { - if self.node_or_token.as_node().is_none() { - return false; - } - - let mut cursor = self.clone(); - - // TODO 書き捨てコードなのでリファクタ - loop { - if let Some(node) = cursor.node_or_token.as_node() { - if let Some(child) = node.first_child_or_token() { - cursor.node_or_token = child; - - if is_skip(child.kind()) || is_flatten(child) { - continue; - } - - self.node_or_token = cursor.node_or_token; - return true; - } - } - if let Some(sibling) = cursor.node_or_token.next_sibling_or_token() { - cursor.node_or_token = sibling; - - if is_skip(sibling.kind()) || is_flatten(sibling) { - continue; - } - - return true; - } else { - cursor.node_or_token = NodeOrToken::Node(cursor.node_or_token.parent().unwrap()); - } + pub fn node(&self) -> Node<'a> { + Node { + input: self.input, + range_map: Rc::clone(&self.range_map), + node_or_token: self.node_or_token, } } - pub fn goto_next_sibling(&mut self) -> bool { - let mut cursor = self.clone(); - - loop { - while let Some(sibling) = cursor.node_or_token.next_sibling_or_token() { - cursor.node_or_token = sibling; - - if is_skip(sibling.kind()) { - continue; - } - - if is_flatten(sibling) { - cursor.goto_first_child(); - } - - self.node_or_token = cursor.node_or_token; + pub fn goto_first_child(&mut self) -> bool { + if let Some(current_node) = self.node_or_token.as_node() { + if let Some(child) = current_node.first_child_or_token() { + self.node_or_token = child; return true; } - - if let Some(parent) = cursor.node_or_token.parent() { - if !is_flatten(NodeOrToken::Node(parent)) { - return false; - } - - cursor.node_or_token = NodeOrToken::Node(parent); - } else { - return false; - } } + false } - pub fn goto_direct_prev_sibling(&mut self) -> bool { - if let Some(prev) = self.node_or_token.prev_sibling_or_token() { - self.node_or_token = prev; + pub fn goto_parent(&mut self) -> bool { + if let Some(parent) = self.node_or_token.parent() { + self.node_or_token = NodeOrToken::Node(parent); true } else { false } } - pub fn goto_parent(&mut self) -> bool { - while let Some(parent) = self.node_or_token.parent() { - self.node_or_token = NodeOrToken::Node(parent); - - if is_flatten(self.node_or_token) { - continue; - } - - return true; + pub fn goto_next_sibling(&mut self) -> bool { + if let Some(sibling) = self.node_or_token.next_sibling_or_token() { + self.node_or_token = sibling; + true + } else { + false } - - false } - pub fn node(&self) -> Node<'a> { - Node { - input: self.input, - range_map: Rc::clone(&self.range_map), - node_or_token: self.node_or_token, + pub fn goto_direct_prev_sibling(&mut self) -> bool { + if let Some(prev) = self.node_or_token.prev_sibling_or_token() { + self.node_or_token = prev; + true + } else { + false } } @@ -238,50 +184,11 @@ impl<'a> TreeCursor<'a> { } } -pub fn as_tree_sitter_cursor<'a>(input: &'a str, node: &'a ResolvedNode) -> TreeCursor<'a> { - let mut range_map = HashMap::new(); - - let new_line_indices: Vec<_> = input - .char_indices() - .filter(|&(_, c)| c == '\n') - .map(|(i, _)| i) - .collect(); - - traverse_pre_order(node, |node_or_token| { - let text_range = node_or_token.text_range(); - - let before_start_new_line_count = - match new_line_indices.binary_search(&text_range.start().into()) { - Ok(i) => i, - Err(i) => i, - }; - - let before_end_new_line_count = - match new_line_indices.binary_search(&text_range.end().into()) { - Ok(i) => i, - Err(i) => i, - }; - - range_map.insert( - node_or_token.text_range(), - Range { - start_row: before_start_new_line_count, - start_col: usize::from(node_or_token.text_range().start()) - - match before_start_new_line_count { - 0 => 0, - i => new_line_indices[i - 1] + 1, - }, - end_row: before_end_new_line_count, - end_col: usize::from(node_or_token.text_range().end()) - - 1 - - match before_end_new_line_count { - 0 => 0, - i => new_line_indices[i - 1], - }, - }, - ); - }); - +pub fn as_tree_sitter_cursor<'a>( + input: &'a str, + node: &'a ResolvedNode, + range_map: HashMap, +) -> TreeCursor<'a> { TreeCursor { input, range_map: Rc::new(range_map), @@ -289,40 +196,9 @@ pub fn as_tree_sitter_cursor<'a>(input: &'a str, node: &'a ResolvedNode) -> Tree } } -fn traverse_pre_order(node: &ResolvedNode, mut f: F) { - let mut node_or_token = NodeOrToken::Node(node); - - loop { - f(node_or_token); - - if let Some(node) = node_or_token.as_node() { - if let Some(child) = node.first_child_or_token() { - node_or_token = child; - continue; - } - } - - if let Some(sibling) = node_or_token.next_sibling_or_token() { - node_or_token = sibling; - } else { - loop { - if let Some(parent) = node_or_token.parent() { - node_or_token = NodeOrToken::Node(parent); - } else { - return; - } - - if let Some(sibling) = node_or_token.next_sibling_or_token() { - node_or_token = sibling; - break; - } - } - } - } -} - pub fn dump_as_tree_sitter_like(input: &str, node: &ResolvedNode) { - let mut cursor = as_tree_sitter_cursor(input, node); + let (node, range_map) = get_ts_tree_and_range_map(input, node); + let mut cursor = as_tree_sitter_cursor(input, &node, range_map); let mut depth = 0; loop { @@ -348,7 +224,14 @@ pub fn dump_as_tree_sitter_like(input: &str, node: &ResolvedNode) { #[cfg(test)] mod tests { - use crate::{cst, tree_sitter::dump_as_tree_sitter_like, ParseError}; + use crate::{ + cst, parse, + syntax_kind::SyntaxKind, + tree_sitter::{ + as_tree_sitter_cursor, dump_as_tree_sitter_like, get_ts_tree_and_range_map, TreeCursor, + }, + ParseError, + }; #[test] fn test() -> Result<(), ParseError> { @@ -362,10 +245,299 @@ FROM , B"#; // dbg!(input); let node = cst::parse(input)?; - dbg!(&node); + // dbg!(&node); dump_as_tree_sitter_like(input, &node); Ok(()) } + + #[test] + fn tree_sitter_like_traverse() { + const UNIT: usize = 2; + + fn visit(cursor: &mut TreeCursor, depth: usize, src: &str) { + (0..(depth * UNIT)).for_each(|_| print!("-")); + + print!("{}", cursor.node().kind()); + + if cursor.node().child_count() == 0 { + // print!(" \"{}\"", cursor.node().utf8_text(src.as_bytes()).unwrap()); // tree-sitter style + print!(" \"{}\"", cursor.node().text().escape_default()); // postgresql-cst-parser style + } + println!( + // " [{}-{}]", + // cursor.node().start_position(), + // cursor.node().end_position() + " {}", + cursor.node().range() + ); + + // 子供を走査 + if cursor.goto_first_child() { + visit(cursor, depth + 1, src); + while cursor.goto_next_sibling() { + visit(cursor, depth + 1, src); + } + cursor.goto_parent(); + } + } + + let src = r#" +-- comment +SELECT + 1 as X +, 2 -- comment +, 3 +FROM + A +, B +; +select + 1 +, 2 +; + +"#; + + let node = parse(&src).unwrap(); + let (node, range_map) = get_ts_tree_and_range_map(&src, &node); + let mut cursor = as_tree_sitter_cursor(src, &node, range_map); + + visit(&mut cursor, 0, &src); + } + + #[test] + fn goto_first_child_from_node() { + let src = "select a, b, c from tbl;"; + let (root, range_map) = get_ts_tree_and_range_map(&src, &parse(&src).unwrap()); + let first_select = root + .descendants() + .find(|x| x.kind() == SyntaxKind::SelectStmt) + .unwrap(); + + let mut cursor = as_tree_sitter_cursor(src, &first_select, range_map); + assert_eq!(cursor.node().kind(), SyntaxKind::SelectStmt); + + assert!(cursor.goto_first_child()); + assert_eq!(cursor.node().kind(), SyntaxKind::SELECT); + } + + #[test] + fn goto_first_child_from_token() { + let src = "select a, b, c from tbl;"; + let (root, range_map) = get_ts_tree_and_range_map(&src, &parse(&src).unwrap()); + let column_id_node = root + .descendants() + .find(|x| x.kind() == SyntaxKind::ColId) + .unwrap(); + + let mut cursor = as_tree_sitter_cursor(&src, column_id_node, range_map); + cursor.goto_first_child(); + assert_eq!(cursor.node().kind(), SyntaxKind::IDENT); + + assert!(!cursor.goto_first_child()); + assert_eq!(cursor.node().kind(), SyntaxKind::IDENT); + } + + #[test] + fn goto_parent_from_root() { + let src = "select a, b, c from tbl;"; + let (root, range_map) = get_ts_tree_and_range_map(&src, &parse(&src).unwrap()); + + let mut cursor = as_tree_sitter_cursor(src, &root, range_map); + + assert_eq!(cursor.node().kind(), SyntaxKind::Root); + assert!(!cursor.goto_parent()); + assert_eq!(cursor.node().kind(), SyntaxKind::Root); + } + + #[test] + fn goto_parent_from_node() { + let src = "select a, b, c from tbl;"; + let (root, range_map) = get_ts_tree_and_range_map(&src, &parse(&src).unwrap()); + + let target_element = root + .descendants() + .find(|x| x.kind() == SyntaxKind::target_el) + .unwrap(); + let mut cursor = as_tree_sitter_cursor(src, &target_element, range_map); + assert_eq!(cursor.node().kind(), SyntaxKind::target_el); + + assert!(cursor.goto_parent()); + assert_eq!(cursor.node().kind(), SyntaxKind::target_list); + } + + #[test] + fn goto_parent_from_token() { + let src = "select a, b, c from tbl;"; + let (root, range_map) = get_ts_tree_and_range_map(&src, &parse(&src).unwrap()); + + let column_id_node = root + .descendants() + .find(|x| x.kind() == SyntaxKind::ColId) + .unwrap(); + let mut cursor = as_tree_sitter_cursor(src, &column_id_node, range_map); + + cursor.goto_first_child(); + assert_eq!(cursor.node().kind(), SyntaxKind::IDENT); + + assert!(cursor.goto_parent()); + assert_eq!(cursor.node().kind(), SyntaxKind::ColId); + } + + #[test] + fn goto_next_sibling() { + let src = "select a,b,c from tbl;"; + let (root, range_map) = get_ts_tree_and_range_map(&src, &parse(&src).unwrap()); + + let target_element = root + .descendants() + .find(|x| x.kind() == SyntaxKind::target_el) + .unwrap(); + let mut cursor = as_tree_sitter_cursor(src, &target_element, range_map); + // + // - target_list + // - target_el (1) + // - Comma "," + // - target_el (2) + // - Comma "," + // - target_el (3) + // + + // 1 + assert_eq!(cursor.node().kind(), SyntaxKind::target_el); + + assert!(cursor.goto_next_sibling()); + assert_eq!(cursor.node().kind(), SyntaxKind::Comma); + + // 2 + assert!(cursor.goto_next_sibling()); + assert_eq!(cursor.node().kind(), SyntaxKind::target_el); + + assert!(cursor.goto_next_sibling()); + assert_eq!(cursor.node().kind(), SyntaxKind::Comma); + + // 3 + assert!(cursor.goto_next_sibling()); + assert_eq!(cursor.node().kind(), SyntaxKind::target_el); + + // No more siblings + assert!(!cursor.goto_next_sibling()); + assert_eq!(cursor.node().kind(), SyntaxKind::target_el); + } + + #[test] + fn range() { + let src = r#" +-- comment +SELECT + 1 as X +, 2 -- comment 2 +, 3 +FROM + A +, B"#; + + let node = parse(&src).unwrap(); + let (node, range_map) = get_ts_tree_and_range_map(&src, &node); + + let mut cursor = as_tree_sitter_cursor(&src, &node, range_map); + let mut text_buf = String::from("\n"); + + 'traverse: loop { + if cursor.node().child_count() == 0 { + text_buf.push_str(&format!("{}\n", cursor.node().range())); + } + + if cursor.goto_first_child() { + } else if cursor.goto_next_sibling() { + } else { + loop { + if !cursor.goto_parent() { + break 'traverse; + } + + if cursor.goto_next_sibling() { + break; + } + } + } + } + + let expected = r#" +[(1, 0)-(1, 10)] +[(2, 0)-(2, 6)] +[(3, 1)-(3, 2)] +[(3, 3)-(3, 5)] +[(3, 6)-(3, 7)] +[(4, 0)-(4, 1)] +[(4, 2)-(4, 3)] +[(4, 4)-(4, 16)] +[(5, 0)-(5, 1)] +[(5, 2)-(5, 3)] +[(6, 0)-(6, 4)] +[(7, 1)-(7, 2)] +[(8, 0)-(8, 1)] +[(8, 2)-(8, 3)] +"#; + + assert_eq!(text_buf, expected); + } + + #[test] + fn texts() { + let src = r#" +-- comment +SELECT + 1 as X +, 2 -- comment 2 +, 3 +FROM + A +, B"#; + + let node = parse(&src).unwrap(); + let (node, range_map) = get_ts_tree_and_range_map(&src, &node); + + let mut cursor = as_tree_sitter_cursor(&src, &node, range_map); + let mut text_buf = Vec::new(); + + 'traverse: loop { + if cursor.node().child_count() == 0 { + text_buf.push(cursor.node().text()); + } + + if cursor.goto_first_child() { + } else if cursor.goto_next_sibling() { + } else { + loop { + if !cursor.goto_parent() { + break 'traverse; + } + + if cursor.goto_next_sibling() { + break; + } + } + } + } + + let mut text_buf = text_buf.iter(); + assert_eq!(text_buf.next(), Some(&"-- comment")); + assert_eq!(text_buf.next(), Some(&"SELECT")); + assert_eq!(text_buf.next(), Some(&"1")); + assert_eq!(text_buf.next(), Some(&"as")); + assert_eq!(text_buf.next(), Some(&"X")); + assert_eq!(text_buf.next(), Some(&",")); + assert_eq!(text_buf.next(), Some(&"2")); + assert_eq!(text_buf.next(), Some(&"-- comment 2")); + assert_eq!(text_buf.next(), Some(&",")); + assert_eq!(text_buf.next(), Some(&"3")); + assert_eq!(text_buf.next(), Some(&"FROM")); + assert_eq!(text_buf.next(), Some(&"A")); + assert_eq!(text_buf.next(), Some(&",")); + assert_eq!(text_buf.next(), Some(&"B")); + assert_eq!(text_buf.next(), None); + } } diff --git a/crates/postgresql-cst-parser/src/tree_sitter/convert.rs b/crates/postgresql-cst-parser/src/tree_sitter/convert.rs index bdeb8ab..d367dae 100644 --- a/crates/postgresql-cst-parser/src/tree_sitter/convert.rs +++ b/crates/postgresql-cst-parser/src/tree_sitter/convert.rs @@ -1,40 +1,177 @@ +use std::collections::HashMap; + use cstree::{build::GreenNodeBuilder, syntax::SyntaxNode}; -use crate::{syntax_kind::SyntaxKind, PostgreSQLSyntax, ResolvedNode}; +use crate::{syntax_kind::SyntaxKind, NodeOrToken, PostgreSQLSyntax, ResolvedNode}; + +use super::Point; + +type SequentialRange = cstree::text::TextRange; // Range representation by cstree (Sequential bytes) +type RowColumnRange = super::Range; // tree-sitter like range representation (Rows and Columns) -/// Converts the given CST into a node structure and hierarchy that closely matches what `tree-sitter-sql` produces. -pub fn convert_cst(root: &ResolvedNode) -> ResolvedNode { +pub fn get_ts_tree_and_range_map( + src: &str, + root: &ResolvedNode, +) -> (ResolvedNode, HashMap) { let mut builder = GreenNodeBuilder::new(); + let mut row_column_ranges: Vec = vec![]; - // Build `Root` node - builder.start_node(SyntaxKind::Root); - walk_and_build(&mut builder, root); - builder.finish_node(); + // Build new tree, and Collect row-column style Ranges + { + let new_line_indices: Vec<_> = src + .char_indices() + .filter(|&(_, c)| c == '\n') + .map(|(i, _)| i) + .collect(); + + row_column_ranges.push(get_row_column_range( + &NodeOrToken::Node(root), + &new_line_indices, + )); + + builder.start_node(SyntaxKind::Root); + // process subtrees + // These Nodes will be ignored: + // - Unneeded node + // - Nested node + // - Whitespace token + // + // Each Node in the tree: + // 1. Add new Node (or Token) to New Tree + // 2. Create tree-sitter compatible `Range`s based on the original text. + walk_and_build( + root, + &new_line_indices, + &mut builder, + &mut row_column_ranges, + ); + builder.finish_node(); + } + // Get New tree let (tree, cache) = builder.finish(); + let new_root = + SyntaxNode::new_root_with_resolver(tree, cache.unwrap().into_interner().unwrap()); + + // Create a mapping between the TextRanges of nodes and tokens (in bytes) and the original text ranges (in rows and columns). + let range_map = create_mapping(&new_root, row_column_ranges); + + (new_root, range_map) +} + +fn get_row_column_range(node_or_token: &NodeOrToken, new_line_indices: &[usize]) -> RowColumnRange { + let text_range: SequentialRange = node_or_token.text_range(); + + let before_start_new_line_count = + match new_line_indices.binary_search(&text_range.start().into()) { + Ok(i) => i, + Err(i) => i, + }; + + let before_end_new_line_count = match new_line_indices.binary_search(&text_range.end().into()) { + Ok(i) => i, + Err(i) => i, + }; + + let start_position = Point { + row: before_start_new_line_count, + column: usize::from(text_range.start()) + - match before_start_new_line_count { + 0 => 0, + i => new_line_indices[i - 1] + 1, + }, + }; + + let end_position = Point { + row: before_end_new_line_count, + column: usize::from(text_range.end()) + - 1 + - match before_end_new_line_count { + 0 => 0, + i => new_line_indices[i - 1], + }, + }; - SyntaxNode::new_root_with_resolver(tree, cache.unwrap().into_interner().unwrap()) + RowColumnRange { + start_byte: text_range.start().into(), + end_byte: text_range.end().into(), + start_position, + end_position, + } +} + +fn traverse_pre_order(node: &ResolvedNode, mut f: F) { + let mut node_or_token = NodeOrToken::Node(node); + + loop { + f(node_or_token); + + if let Some(node) = node_or_token.as_node() { + if let Some(child) = node.first_child_or_token() { + node_or_token = child; + continue; + } + } + + if let Some(sibling) = node_or_token.next_sibling_or_token() { + node_or_token = sibling; + } else { + loop { + if let Some(parent) = node_or_token.parent() { + node_or_token = NodeOrToken::Node(parent); + } else { + return; + } + + if let Some(sibling) = node_or_token.next_sibling_or_token() { + node_or_token = sibling; + break; + } + } + } + } +} + +fn create_mapping( + root: &ResolvedNode, + row_column_ranges: Vec, +) -> HashMap { + assert_eq!( + root.descendants_with_tokens().count(), + row_column_ranges.len() + ); + + let mut range_map: HashMap = HashMap::new(); + let mut range_iter = row_column_ranges.iter(); + traverse_pre_order(root, |node_or_token| { + if let Some(original_range) = range_iter.next() { + let byte_range = node_or_token.text_range(); + range_map.insert(byte_range, original_range.clone()); + } + }); + + assert!(range_iter.next().is_none()); + range_map } -/// Traverse the CST and rewrite certain nodes -/// e.g. flatten list node, remove option node fn walk_and_build( - builder: &mut GreenNodeBuilder<'static, 'static, PostgreSQLSyntax>, node: &ResolvedNode, + new_line_indices: &Vec, + builder: &mut GreenNodeBuilder<'static, 'static, PostgreSQLSyntax>, + row_column_ranges: &mut Vec, ) { use cstree::util::NodeOrToken; + let parent_kind = node.kind(); let children = node.children_with_tokens(); for child in children { match child { - NodeOrToken::Node(n) => { - match n.kind() { - child_kind @ (SyntaxKind::stmtmulti - | SyntaxKind::target_list - | SyntaxKind::from_list) => { + NodeOrToken::Node(child_node) => { + match child_node.kind() { + child_kind @ (SyntaxKind::target_list | SyntaxKind::from_list) => { if parent_kind == child_kind { - // [Flatten] + // [Node: Flatten] // // This patten does not construct node. // @@ -44,17 +181,39 @@ fn walk_and_build( // +- target_el // +- ... // - walk_and_build(builder, n); + walk_and_build( + child_node, + new_line_indices, + builder, + row_column_ranges, + ); } else { // Node is target for flattening, but at the top level of the nest - builder.start_node(n.kind()); - walk_and_build(builder, n); + + row_column_ranges.push(get_row_column_range( + &NodeOrToken::Node(child_node), + new_line_indices, + )); + + builder.start_node(child_node.kind()); + walk_and_build( + child_node, + new_line_indices, + builder, + row_column_ranges, + ); builder.finish_node(); } } - SyntaxKind::opt_target_list => { - // [Removal] + SyntaxKind::parse_toplevel + | SyntaxKind::stmtmulti + | SyntaxKind::toplevel_stmt + | SyntaxKind::stmt + | SyntaxKind::select_no_parens + | SyntaxKind::simple_select + | SyntaxKind::opt_target_list => { + // [Node: Removal] // // Ignore current node, and continue building its children. // @@ -64,19 +223,36 @@ fn walk_and_build( // +- child_1 +- child_2 // +- child_1 // - walk_and_build(builder, n); + walk_and_build(child_node, new_line_indices, builder, row_column_ranges); } - // Default pattern + // [Node: Default] _ => { - builder.start_node(n.kind()); - walk_and_build(builder, n); + row_column_ranges.push(get_row_column_range( + &NodeOrToken::Node(child_node), + new_line_indices, + )); + builder.start_node(child_node.kind()); + walk_and_build(child_node, new_line_indices, builder, row_column_ranges); builder.finish_node(); } } } - NodeOrToken::Token(t) => { - builder.token(t.kind(), t.text()); + NodeOrToken::Token(child_token) => { + // [Token: Removal] + // Note: + // This process will break the lossless property of the CST. + // `text()` for Nodes and `text_range()` for Nodes and Tokens will become incompatible with the original text. + if child_token.kind() == SyntaxKind::Whitespace { + continue; + } + + // [Token: Default] + row_column_ranges.push(get_row_column_range( + &NodeOrToken::Token(child_token), + new_line_indices, + )); + builder.token(child_token.kind(), child_token.text()); } } } @@ -84,12 +260,11 @@ fn walk_and_build( #[cfg(test)] mod tests { - use crate::{cst, tree_sitter::convert::convert_cst}; + use crate::{cst, tree_sitter::convert::get_ts_tree_and_range_map}; #[test] - /// Assert that the CST is not broken by the conversion. - fn restored_texts_are_equal() { - let input = r#" + fn whitespace_is_removed() { + let original = r#" SELECT 1 as X , 2 @@ -98,11 +273,12 @@ FROM A , B"#; - let root = cst::parse(input).unwrap(); - let new_root = convert_cst(&root); + let root = cst::parse(&original).unwrap(); + let (new_root, _) = get_ts_tree_and_range_map(&original, &root); - // format!("{ResolvedNode}") returns original input str. - assert_eq!(format!("{root}"), format!("{new_root}")); + let whitespace_removed: String = original.split_whitespace().collect(); + // Lossless property of the CST is broken. + assert_eq!(new_root.text(), whitespace_removed.as_str()); } mod removal { @@ -111,7 +287,7 @@ FROM syntax_kind::SyntaxKind, tree_sitter::{ assert_util::{assert_exists, assert_not_exists}, - convert::convert_cst, + convert::get_ts_tree_and_range_map, }, }; @@ -121,7 +297,7 @@ FROM let root = cst::parse(input).unwrap(); assert_exists(&root, SyntaxKind::opt_target_list); - let new_root = convert_cst(&root); + let (new_root, _) = get_ts_tree_and_range_map(input, &root); assert_not_exists(&new_root, SyntaxKind::opt_target_list); } } @@ -132,7 +308,7 @@ FROM syntax_kind::SyntaxKind, tree_sitter::{ assert_util::{assert_no_direct_nested_kind, assert_node_count}, - convert::convert_cst, + convert::get_ts_tree_and_range_map, }, }; @@ -143,7 +319,7 @@ FROM let root = cst::parse(input).unwrap(); assert_node_count(&root, SyntaxKind::target_list, 3); - let new_root = convert_cst(&root); + let (new_root, _) = get_ts_tree_and_range_map(input, &root); assert_node_count(&new_root, SyntaxKind::target_list, 1); assert_no_direct_nested_kind(&new_root, SyntaxKind::target_list); } @@ -152,7 +328,7 @@ FROM fn no_nested_stmtmulti() { let input = "select a,b,c;\nselect d,e from t;"; let root = cst::parse(input).unwrap(); - let new_root = convert_cst(&root); + let (new_root, _) = get_ts_tree_and_range_map(input, &root); assert_no_direct_nested_kind(&new_root, SyntaxKind::stmtmulti); } @@ -161,7 +337,7 @@ FROM fn no_nested_from_list() { let input = "select * from t1, t2;"; let root = cst::parse(input).unwrap(); - let new_root = convert_cst(&root); + let (new_root, _) = get_ts_tree_and_range_map(&input, &root); assert_no_direct_nested_kind(&new_root, SyntaxKind::from_list); }