diff --git a/crates/core/src/pattern_compiler/pattern_compiler.rs b/crates/core/src/pattern_compiler/pattern_compiler.rs index a65fec33f..7e055db47 100644 --- a/crates/core/src/pattern_compiler/pattern_compiler.rs +++ b/crates/core/src/pattern_compiler/pattern_compiler.rs @@ -74,9 +74,18 @@ impl PatternCompiler { context: &mut dyn SnippetCompilationContext, is_rhs: bool, ) -> Result> { + println!("\n=== Starting from_snippet_node ==="); + println!("Node kind: {}", node.node.kind()); + println!("Node text: {}", node.text()?); + println!("Context range: {:?}", context_range); + println!("Is RHS: {}", is_rhs); + let snippet_start = node.node.start_byte() as usize; let ranges = metavariable_ranges(&node, context.get_lang()); + println!("Found metavariable ranges: {:?}", ranges); + let range_map = metavariable_range_mapping(ranges, snippet_start); + println!("Range mapping: {:?}", range_map); fn node_to_astnode( node: NodeWithSource, @@ -85,34 +94,50 @@ impl PatternCompiler { context: &mut dyn SnippetCompilationContext, is_rhs: bool, ) -> Result> { + println!("\n-> Processing node: {}", node.node.kind()); + println!(" Text: {}", node.text()?); + let sort = node.node.kind_id(); - // probably safe to assume node is named, but just in case - // maybe doesn't even matter, but is what I expect, - // make this ann assertion? + println!(" Sort ID: {} Value: {}", sort, node.node.kind()); + + // Check for metavariables let metavariable = metavariable_descendent(&node, context_range, range_map, context, is_rhs)?; + println!(" Metavariable check result: {:?}", metavariable.is_some()); if let Some(metavariable) = metavariable { + println!(" -> Returning metavariable pattern"); return Ok(metavariable); } + let language = *context.get_lang(); let node_types = language.node_types(); + // Handle leaf nodes if node_types[sort as usize].is_empty() { + println!(" Processing leaf node"); let content = node.text()?; if (node.node.named_child_count() == 0) && language.replaced_metavariable_regex().is_match(&content) { + println!(" -> Checking for implicit metavariable regex"); let regex = implicit_metavariable_regex(&node, context_range, range_map, context)?; if let Some(regex) = regex { + println!(" -> Returning regex pattern"); return Ok(Pattern::Regex(Box::new(regex))); } } + println!(" -> Returning leaf node pattern"); return Ok(Pattern::AstLeafNode(AstLeafNode::new( sort, &content, &language, )?)); } + + // Handle non-leaf nodes + println!(" Processing non-leaf node with fields"); let fields: &Vec = &node_types[sort as usize]; + println!(" Number of fields: {}", fields.len()); + let args = fields .iter() .filter(|field| { @@ -120,20 +145,29 @@ impl PatternCompiler { .node .child_by_field_id(field.id()) .map(|n| NodeWithSource::new(n, node.source)); - // Then check if it's an empty, optional field + if language.is_disregarded_snippet_field(sort, field.id(), &child_with_source) { + println!(" Skipping disregarded field: {}", field.id()); return false; } - // Otherwise compile it true }) .map(|field| { + println!( + " Processing field ID: {} Name: {}", + field.id(), + field.name() + ); let field_id = field.id(); let mut nodes_list = node .named_children_by_field_id(field_id) .map(|n| node_to_astnode(n, context_range, range_map, context, is_rhs)) .collect::>>>()?; + + println!(" Field nodes count: {}", nodes_list.len()); + if !field.multiple() { + println!(" -> Single field pattern"); return Ok(( field_id, false, @@ -146,14 +180,18 @@ impl PatternCompiler { ))), )); } + if nodes_list.len() == 1 && matches!( nodes_list.first(), Some(Pattern::Variable(_)) | Some(Pattern::Underscore) ) { + println!(" -> Single variable/underscore list pattern"); return Ok((field_id, true, nodes_list.pop().unwrap())); } + + println!(" -> Multiple field list pattern"); Ok(( field_id, true, @@ -161,9 +199,15 @@ impl PatternCompiler { )) }) .collect::)>>>()?; + + println!(" -> Returning AST node pattern with {} args", args.len()); Ok(Pattern::AstNode(Box::new(ASTNode { sort, args }))) } - node_to_astnode(node, context_range, &range_map, context, is_rhs) + + let result = node_to_astnode(node, context_range, &range_map, context, is_rhs); + println!("\n=== Completed from_snippet_node ==="); + println!("Result: {:?}\n", result.is_ok()); + result } } diff --git a/crates/core/src/pattern_compiler/snippet_compiler.rs b/crates/core/src/pattern_compiler/snippet_compiler.rs index 3f0e0f7be..4d4b35d15 100644 --- a/crates/core/src/pattern_compiler/snippet_compiler.rs +++ b/crates/core/src/pattern_compiler/snippet_compiler.rs @@ -78,6 +78,11 @@ pub(crate) fn dynamic_snippet_from_source( source_range: ByteRange, context: &mut dyn SnippetCompilationContext, ) -> Result { + println!("\n=== Starting dynamic_snippet_from_source ==="); + println!("Raw source: {}", raw_source); + println!("Source range: {:?}", source_range); + + // Process escape sequences let source_string = raw_source .replace("\\n", "\n") .replace("\\$", "$") @@ -85,25 +90,54 @@ pub(crate) fn dynamic_snippet_from_source( .replace("\\`", "`") .replace("\\\"", "\"") .replace("\\\\", "\\"); + println!("After escape processing: {}", source_string); + let source = source_string.as_str(); + + // Find all metavariables in the source let metavariables = split_snippet(source, context.get_lang()); + println!("Found {} metavariables:", metavariables.len()); + for (range, var) in &metavariables { + println!(" - {} at range {:?}", var, range); + } + + // Create parts alternating between string literals and variables let mut parts = Vec::with_capacity(2 * metavariables.len() + 1); let mut last = 0; - // Reverse the iterator so we go over the variables in ascending order. + + // Process metavariables in reverse order to maintain correct positions + println!("\nProcessing parts:"); for (byte_range, var) in metavariables.into_iter().rev() { - parts.push(DynamicSnippetPart::String( - source[last..byte_range.start].to_string(), - )); + // Add text before the variable + let prefix = &source[last..byte_range.start]; + println!("Adding string part: {:?}", prefix); + parts.push(DynamicSnippetPart::String(prefix.to_string())); + + // Calculate variable range in original source let range = ByteRange::new( source_range.start + byte_range.start, source_range.start + byte_range.start + var.len(), ); + println!("Processing variable {} at range {:?}", var, range); + + // Register the variable and add it as a part let part = context.register_snippet_variable(&var, Some(range))?; + println!("Added variable part: {:?}", part); parts.push(part); + last = byte_range.end; } - parts.push(DynamicSnippetPart::String(source[last..].to_string())); - Ok(DynamicSnippet { parts }) + + // Add remaining text after last variable + let remaining = &source[last..]; + println!("Adding final string part: {:?}", remaining); + parts.push(DynamicSnippetPart::String(remaining.to_string())); + + println!("\nFinal DynamicSnippet has {} parts", parts.len()); + println!("=== Completed dynamic_snippet_from_source ===\n"); + let snippet = DynamicSnippet { parts }; + println!("{:#?}", &snippet); + Ok(snippet) } pub(crate) fn parse_snippet_content( @@ -112,68 +146,98 @@ pub(crate) fn parse_snippet_content( context: &mut dyn SnippetCompilationContext, is_rhs: bool, ) -> Result> { - // we check for CURLY_VAR_REGEX in the content, and if found - // compile into a DynamicPattern, rather than a CodeSnippet. - // This is because the syntax should only ever be necessary - // when treating a metavariable as a string to substitute - // rather than an AST node to match on. eg. in the following - // `const ${name}Handler = useCallback(async () => $body, []);` - // $name does not correspond to a node, but rather prepends a - // string to "Handler", which will together combine into an - // identifier. - if context + println!("\n=== Starting parse_snippet_content ==="); + println!("Source: {}", source); + println!("Range: {:?}", range); + println!("Is RHS: {}", is_rhs); + + // Check for bracketed metavariables like ${name} + let has_bracketed_vars = context .get_lang() .metavariable_bracket_regex() - .is_match(source) - { + .is_match(source); + println!("Has bracketed variables: {}", has_bracketed_vars); + + if has_bracketed_vars { + println!("Processing bracketed metavariables pattern"); if is_rhs { - Ok(Pattern::Dynamic( + println!("-> Creating dynamic pattern for RHS"); + return Ok(Pattern::Dynamic( dynamic_snippet_from_source(source, range, context).map(DynamicPattern::Snippet)?, - )) + )); } else { + println!("-> Error: bracketed vars not allowed on LHS"); bail!("bracketed metavariables are only allowed on the rhs of a snippet"); } - } else { - if context - .get_lang() - .exact_variable_regex() - .is_match(source.trim()) - { - match source.trim() { - "$_" => return Ok(Pattern::Underscore), - "^_" => return Ok(Pattern::Underscore), - name => { - let var = context.register_variable(name, Some(range))?; - return Ok(Pattern::Variable(var)); - } + } + + // Check for single metavariable patterns + let is_exact_variable = context + .get_lang() + .exact_variable_regex() + .is_match(source.trim()); + println!("Is exact variable match: {}", is_exact_variable); + + if is_exact_variable { + println!("Processing exact variable pattern: {}", source.trim()); + match source.trim() { + "$_" => { + println!("-> Returning Underscore pattern"); + return Ok(Pattern::Underscore); + } + "^_" => { + println!("-> Returning Underscore pattern"); + return Ok(Pattern::Underscore); + } + name => { + println!("-> Creating Variable pattern for: {}", name); + let var = context.register_variable(name, Some(range))?; + return Ok(Pattern::Variable(var)); } } - let snippet_trees = context.get_lang().parse_snippet_contexts(source); - let snippet_nodes = nodes_from_indices(&snippet_trees); - if snippet_nodes.is_empty() { - // not checking if is_rhs. So could potentially - // be harder to find bugs where we expect the pattern - // to parse. unfortunately got rid of check to support - // passing non-node snippets as args. - return Ok(Pattern::Dynamic( - dynamic_snippet_from_source(source, range, context).map(DynamicPattern::Snippet)?, - )); - } - let snippet_patterns: Vec<(SortId, Pattern)> = snippet_nodes - .into_iter() - .map(|node| { - Ok(( - node.node.kind_id(), - PatternCompiler::from_snippet_node(node, range, context, is_rhs)?, - )) - }) - .collect::)>>>()?; - let dynamic_snippet = dynamic_snippet_from_source(source, range, context) - .map_or(None, |s| Some(DynamicPattern::Snippet(s))); - Ok(Pattern::CodeSnippet(MarzanoCodeSnippet::new( - snippet_patterns, - dynamic_snippet, - source, - ))) } + + // Parse regular code snippet + println!("Parsing snippet as code..."); + let snippet_trees = context.get_lang().parse_snippet_contexts(source); + //print snippet trees + println!("snippet_trees: {:#?}", snippet_trees); + + let snippet_nodes = nodes_from_indices(&snippet_trees); + println!("Number of parsed nodes: {}", snippet_nodes.len()); + + if snippet_nodes.is_empty() { + println!("No AST nodes found - creating dynamic snippet pattern"); + return Ok(Pattern::Dynamic( + dynamic_snippet_from_source(source, range, context).map(DynamicPattern::Snippet)?, + )); + } + + println!("Processing {} AST nodes", snippet_nodes.len()); + let snippet_patterns: Vec<(SortId, Pattern)> = snippet_nodes + .into_iter() + .map(|node| { + println!("Processing node kind: {}", node.node.kind()); + Ok(( + node.node.kind_id(), + PatternCompiler::from_snippet_node(node, range, context, is_rhs)?, + )) + }) + .collect::)>>>()?; + + println!("Creating dynamic snippet"); + let dynamic_snippet = dynamic_snippet_from_source(source, range, context) + .map_or(None, |s| Some(DynamicPattern::Snippet(s))); + + println!( + "-> Returning CodeSnippet pattern with {} patterns", + snippet_patterns.len() + ); + println!("=== Completed parse_snippet_content ===\n"); + + Ok(Pattern::CodeSnippet(MarzanoCodeSnippet::new( + snippet_patterns, + dynamic_snippet, + source, + ))) } diff --git a/crates/language/src/language.rs b/crates/language/src/language.rs index 1aa8f7f32..5eb0eda7a 100644 --- a/crates/language/src/language.rs +++ b/crates/language/src/language.rs @@ -294,6 +294,8 @@ impl Parser for MarzanoParser { } } +use tree_sitter::Node; + #[enum_dispatch] pub trait MarzanoLanguage<'a>: Language = NodeWithSource<'a>> + NodeTypes { /// tree sitter language to parse the source @@ -304,15 +306,79 @@ pub trait MarzanoLanguage<'a>: Language = NodeWithSource<'a>> + NodeTyp } fn parse_snippet_contexts(&self, source: &str) -> Vec> { + println!("\n=== Starting parse_snippet_contexts ==="); + println!("Original source: {}", source); + + // First substitute $ with ยต in metavariables let source = self.substitute_metavariable_prefix(source); - self.snippet_context_strings() + println!("After metavariable substitution: {}", source); + + // Get all available context pairs + let contexts = self.snippet_context_strings(); + println!("\nAvailable contexts ({} total):", contexts.len()); + for (i, (prefix, suffix)) in contexts.iter().enumerate() { + println!(" {}. prefix='{}', suffix='{}'", i + 1, prefix, suffix); + } + + let results: Vec> = self + .snippet_context_strings() .iter() - .map(|(pre, post)| self.get_parser().parse_snippet(pre, &source, post)) + .enumerate() + .map(|(i, (pre, post))| { + println!("\nTrying context {}", i + 1); + println!("Wrapping snippet with: prefix='{}', suffix='{}'", pre, post); + + let result = self.get_parser().parse_snippet(pre, &source, post); + let root_node = &result.tree.root_node(); + + println!("Parse result:"); + println!(" Start byte: {}", result.snippet_start); + println!(" End byte: {}", result.snippet_end); + println!(" Tree: {:#?}", root_node.node.to_sexp()); + self.print_tree(root_node, 0); + println!(" Has error: {}", root_node.node.has_error()); + println!(" Is error: {}", root_node.node.is_error()); + println!(" Is missing: {}", root_node.node.is_missing()); + println!(" Root kind: {}", root_node.node.kind()); + + result + }) .filter(|result| { let root_node = &result.tree.root_node().node; - !(root_node.has_error() || root_node.is_error() || root_node.is_missing()) + let is_valid = + !(root_node.has_error() || root_node.is_error() || root_node.is_missing()); + + if is_valid { + println!(" -> Valid parse, keeping this result"); + } else { + println!(" -> Invalid parse, filtering out this result"); + } + + is_valid }) - .collect() + .collect(); + + println!("\nFinal Results:"); + println!("Successfully parsed in {} contexts", results.len()); + for (i, result) in results.iter().enumerate() { + println!( + " {}. Root kind: {}", + i + 1, + result.tree.root_node().node.kind() + ); + } + println!("=== Completed parse_snippet_contexts ===\n"); + + results + } + + fn print_tree(&self, node: &NodeWithSource, depth: usize) { + let indent = " ".repeat(depth); + println!("{}{:?}", indent, node.node.kind(),); + + for child in node.children() { + self.print_tree(&child, depth + 1); + } } fn align_padding<'b>( @@ -562,62 +628,136 @@ pub fn nodes_from_indices(indices: &[SnippetTree]) -> Vec } fn snippet_nodes_from_index(snippet: &SnippetTree) -> Option { + println!("\n=== Starting snippet_nodes_from_index ==="); + println!("Snippet source: {}", snippet.source); + println!( + "Snippet range: {}..{}", + snippet.snippet_start, snippet.snippet_end + ); + let mut snippet_root = snippet.tree.root_node(); + println!( + "Initial root node: kind={}, range={}..{}", + snippet_root.node.kind(), + snippet_root.node.start_byte(), + snippet_root.node.end_byte() + ); + if snippet_root.node.is_missing() { + println!("Root node is missing, returning None"); return None; } let mut id = snippet_root.node.id(); + println!("Initial node id: {:?}", id); - // find the the most senior node with the same index as the snippet + // Find the most senior node containing the snippet while snippet_root.node.start_byte() < snippet.snippet_start || snippet_root.node.end_byte() > snippet.snippet_end { - if snippet_root.named_children().count() == 0 { - if snippet_root.text().unwrap().trim() == snippet.source.trim() { + println!( + "\nCurrent node: kind={}, range={}..{}", + snippet_root.node.kind(), + snippet_root.node.start_byte(), + snippet_root.node.end_byte() + ); + + let named_child_count = snippet_root.named_children().count(); + println!("Named children count: {}", named_child_count); + + if named_child_count == 0 { + let root_text = snippet_root.text().unwrap(); + println!("Leaf node found. Comparing:"); + println!(" Root text (trimmed): '{}'", root_text.trim()); + println!(" Snippet (trimmed): '{}'", snippet.source.trim()); + + if root_text.trim() == snippet.source.trim() { + println!("-> Exact match found, returning node"); return Some(snippet_root); } else { + println!("-> No match, returning None"); return None; } } + + let mut found_child = false; for child in snippet_root.named_children() { + println!( + "Checking child: kind={}, range={}..{}", + child.node.kind(), + child.node.start_byte(), + child.node.end_byte() + ); + if child.node.start_byte() <= snippet.snippet_start && child.node.end_byte() >= snippet.snippet_end { + println!("-> Found containing child, moving down"); snippet_root = child; + found_child = true; break; } } - // sanity check to avoid infinite loop + + if !found_child { + println!("No containing child found"); + } + + // Infinite loop check if snippet_root.node.id() == id { + println!( + "Same node encountered twice ({}), checking text match", + snippet_root.node.kind() + ); + println!( + "'{}' != '{}'", + snippet_root.text().unwrap().trim(), + snippet.source.trim() + ); if snippet_root.text().unwrap().trim() != snippet.source.trim() { + println!("-> Text doesn't match, returning None"); return None; } + println!("-> Text matches, breaking loop"); break; } id = snippet_root.node.id(); } - // in order to handle white space and other superfluos - // stuff in the snippet we assume the root - // is correct as long as it's the largest node within - // the snippet length. Maybe this is too permissive? + // Handle whitespace and collect nodes + println!("\nCollecting nodes with same range"); let mut nodes = vec![]; let root_start = snippet_root.node.start_byte(); let root_end = snippet_root.node.end_byte(); + println!("Target range: {}..{}", root_start, root_end); + if root_start > snippet.snippet_start || root_end < snippet.snippet_end { + println!("Root node doesn't fully contain snippet, returning None"); return None; } + while snippet_root.node.start_byte() == root_start && snippet_root.node.end_byte() == root_end { + println!("Adding node: kind={}", snippet_root.node.kind()); let first_child = snippet_root.named_children().next(); nodes.push(snippet_root); + if let Some(child) = first_child { - snippet_root = child + println!("Moving to child: kind={}", child.node.kind()); + snippet_root = child; } else { + println!("No more children, breaking"); break; } } - nodes.last().cloned() + + println!("Final nodes collected: {}", nodes.len()); + let result = nodes.last().cloned(); + println!( + "=== Completed snippet_nodes_from_index: found={} ===\n", + result.is_some() + ); + + result } // todo