From 74717cc81e7e9f5b95921ef636a843cff9d1f954 Mon Sep 17 00:00:00 2001 From: Nicolas BACQUEY Date: Mon, 21 Oct 2024 12:11:48 +0200 Subject: [PATCH] WIP: query coverage check --- topiary-cli/src/cli.rs | 7 +++++++ topiary-cli/src/main.rs | 26 +++++++++++++++++++++++++- topiary-core/src/lib.rs | 11 ++++++++++- topiary-core/src/tree_sitter.rs | 30 ++++++++++++++---------------- 4 files changed, 56 insertions(+), 18 deletions(-) diff --git a/topiary-cli/src/cli.rs b/topiary-cli/src/cli.rs index a23b1304..54078995 100644 --- a/topiary-cli/src/cli.rs +++ b/topiary-cli/src/cli.rs @@ -144,6 +144,13 @@ pub enum Commands { #[command(display_order = 4)] Prefetch, + /// Checks how much of the tree-sitter query is used + #[command(display_order = 5)] + Coverage { + #[command(flatten)] + input: ExactlyOneInput, + }, + /// Generate shell completion script #[command(display_order = 100)] Completion { diff --git a/topiary-cli/src/main.rs b/topiary-cli/src/main.rs index 3636df53..ead85f85 100644 --- a/topiary-cli/src/main.rs +++ b/topiary-cli/src/main.rs @@ -6,7 +6,7 @@ mod visualisation; use std::{ error::Error, - io::{BufReader, BufWriter}, + io::{BufReader, BufWriter, empty}, process::ExitCode, }; @@ -147,6 +147,30 @@ async fn run() -> CLIResult<()> { config.prefetch_languages()?; } + Commands::Coverage { input } => { + // We are guaranteed (by clap) to have exactly one input, so it's safe to unwrap + let input = Inputs::new(&config, &input).next().unwrap()?; + + // We don't need a `LanguageDefinitionCache` when there's only one input, + // which saves us the thread-safety overhead + let language = input.to_language().await?; + + log::info!( + "Checking query coverage of {}, as {}", + input.source(), + input.language().name, + ); + + let mut buf_input = BufReader::new(input); + + formatter( + &mut buf_input, + &mut empty(), + &language, + Operation::Coverage, + )?; + } + Commands::Completion { shell } => { // The CLI parser fails if no shell is provided/detected, so it's safe to unwrap here cli::completion(shell.unwrap()); diff --git a/topiary-core/src/lib.rs b/topiary-core/src/lib.rs index cea567fe..9785e9e0 100644 --- a/topiary-core/src/lib.rs +++ b/topiary-core/src/lib.rs @@ -169,6 +169,8 @@ pub enum Operation { /// Choose the type of visualation Topiary should ouput output_format: Visualisation, }, + /// Checks the coverage of the query file by the input + Coverage, } /// The function that takes an input and formats, or visualises an output. @@ -240,7 +242,6 @@ pub fn formatter( &language.query, &language.grammar, tolerate_parsing_errors, - false, )?; // Various post-processing of whitespace @@ -271,6 +272,14 @@ pub fn formatter( Visualisation::Json => serde_json::to_writer(output, &root)?, }; } + + Operation::Coverage => { + let () = tree_sitter::check_query_coverage( + &content, + &language.query, + &language.grammar, + )?; + } }; Ok(()) diff --git a/topiary-core/src/tree_sitter.rs b/topiary-core/src/tree_sitter.rs index 59ddcb47..493347c4 100644 --- a/topiary-core/src/tree_sitter.rs +++ b/topiary-core/src/tree_sitter.rs @@ -202,9 +202,8 @@ pub fn apply_query( query: &TopiaryQuery, grammar: &topiary_tree_sitter_facade::Language, tolerate_parsing_errors: bool, - should_check_input_exhaustivity: bool, ) -> FormatterResult { - let (tree, grammar) = parse(input_content, grammar, tolerate_parsing_errors)?; + let (tree, _grammar) = parse(input_content, grammar, tolerate_parsing_errors)?; let root = tree.root_node(); let source = input_content.as_bytes(); @@ -222,11 +221,6 @@ pub fn apply_query( }); } - if should_check_input_exhaustivity { - let ref_match_count = matches.len(); - check_input_exhaustivity(ref_match_count, query, grammar, &root, source)?; - } - // Find the ids of all tree-sitter nodes that were identified as a leaf // We want to avoid recursing into them in the collect_leafs function. let specified_leaf_nodes: HashSet = collect_leaf_ids(&matches, capture_names.clone()); @@ -500,15 +494,21 @@ fn check_predicates(predicates: &QueryPredicates) -> FormatterResult<()> { /// Check if the input tests all patterns in the query, by successively disabling /// all patterns. If disabling a pattern does not decrease the number of matches, /// then that pattern originally matched nothing in the input. -fn check_input_exhaustivity( - ref_match_count: usize, +pub fn check_query_coverage( + input_content: &str, original_query: &TopiaryQuery, grammar: &topiary_tree_sitter_facade::Language, - root: &Node, - source: &[u8], ) -> FormatterResult<()> { + let (tree, grammar) = parse(input_content, grammar, false)?; + let root = tree.root_node(); + let source = input_content.as_bytes(); + + // Match queries + let mut cursor = QueryCursor::new(); + let ref_match_count = original_query.query.matches(&root, source, &mut cursor).count(); let pattern_count = original_query.query.pattern_count(); let query_content = &original_query.query_content; + // This particular test avoids a SIGSEGV error that occurs when trying // to count the matches of an empty query (see #481) if pattern_count == 1 { @@ -526,7 +526,7 @@ fn check_input_exhaustivity( .map_err(|e| FormatterError::Query("Error parsing query file".into(), Some(e)))?; query.disable_pattern(i); let mut cursor = QueryCursor::new(); - let match_count = query.matches(root, source, &mut cursor).count(); + let match_count = query.matches(&root, source, &mut cursor).count(); if match_count == ref_match_count { let index_start = query.start_byte_for_pattern(i); let index_end = if i == pattern_count - 1 { @@ -542,12 +542,10 @@ fn check_input_exhaustivity( } #[cfg(target_arch = "wasm32")] -fn check_input_exhaustivity( - _ref_match_count: usize, +fn check_query_coverage( + _input_content: &str, _original_query: &TopiaryQuery, _grammar: &topiary_tree_sitter_facade::Language, - _root: &Node, - _source: &[u8], ) -> FormatterResult<()> { unimplemented!(); }