Skip to content

Commit

Permalink
WIP: query coverage check
Browse files Browse the repository at this point in the history
  • Loading branch information
nbacquey committed Oct 28, 2024
1 parent f68cfd4 commit 74717cc
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 18 deletions.
7 changes: 7 additions & 0 deletions topiary-cli/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,13 @@ pub enum Commands {
#[command(display_order = 4)]
Prefetch,

/// Checks how much of the tree-sitter query is used
#[command(display_order = 5)]
Coverage {
#[command(flatten)]
input: ExactlyOneInput,
},

/// Generate shell completion script
#[command(display_order = 100)]
Completion {
Expand Down
26 changes: 25 additions & 1 deletion topiary-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ mod visualisation;

use std::{
error::Error,
io::{BufReader, BufWriter},
io::{BufReader, BufWriter, empty},
process::ExitCode,
};

Expand Down Expand Up @@ -147,6 +147,30 @@ async fn run() -> CLIResult<()> {
config.prefetch_languages()?;
}

Commands::Coverage { input } => {
// We are guaranteed (by clap) to have exactly one input, so it's safe to unwrap
let input = Inputs::new(&config, &input).next().unwrap()?;

// We don't need a `LanguageDefinitionCache` when there's only one input,
// which saves us the thread-safety overhead
let language = input.to_language().await?;

log::info!(
"Checking query coverage of {}, as {}",
input.source(),
input.language().name,
);

let mut buf_input = BufReader::new(input);

formatter(
&mut buf_input,
&mut empty(),
&language,
Operation::Coverage,
)?;
}

Commands::Completion { shell } => {
// The CLI parser fails if no shell is provided/detected, so it's safe to unwrap here
cli::completion(shell.unwrap());
Expand Down
11 changes: 10 additions & 1 deletion topiary-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@ pub enum Operation {
/// Choose the type of visualation Topiary should ouput
output_format: Visualisation,
},
/// Checks the coverage of the query file by the input
Coverage,
}

/// The function that takes an input and formats, or visualises an output.
Expand Down Expand Up @@ -240,7 +242,6 @@ pub fn formatter(
&language.query,
&language.grammar,
tolerate_parsing_errors,
false,
)?;

// Various post-processing of whitespace
Expand Down Expand Up @@ -271,6 +272,14 @@ pub fn formatter(
Visualisation::Json => serde_json::to_writer(output, &root)?,
};
}

Operation::Coverage => {
let () = tree_sitter::check_query_coverage(
&content,
&language.query,
&language.grammar,
)?;
}
};

Ok(())
Expand Down
30 changes: 14 additions & 16 deletions topiary-core/src/tree_sitter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,9 +202,8 @@ pub fn apply_query(
query: &TopiaryQuery,
grammar: &topiary_tree_sitter_facade::Language,
tolerate_parsing_errors: bool,
should_check_input_exhaustivity: bool,
) -> FormatterResult<AtomCollection> {
let (tree, grammar) = parse(input_content, grammar, tolerate_parsing_errors)?;
let (tree, _grammar) = parse(input_content, grammar, tolerate_parsing_errors)?;
let root = tree.root_node();
let source = input_content.as_bytes();

Expand All @@ -222,11 +221,6 @@ pub fn apply_query(
});
}

if should_check_input_exhaustivity {
let ref_match_count = matches.len();
check_input_exhaustivity(ref_match_count, query, grammar, &root, source)?;
}

// Find the ids of all tree-sitter nodes that were identified as a leaf
// We want to avoid recursing into them in the collect_leafs function.
let specified_leaf_nodes: HashSet<usize> = collect_leaf_ids(&matches, capture_names.clone());
Expand Down Expand Up @@ -500,15 +494,21 @@ fn check_predicates(predicates: &QueryPredicates) -> FormatterResult<()> {
/// Check if the input tests all patterns in the query, by successively disabling
/// all patterns. If disabling a pattern does not decrease the number of matches,
/// then that pattern originally matched nothing in the input.
fn check_input_exhaustivity(
ref_match_count: usize,
pub fn check_query_coverage(
input_content: &str,
original_query: &TopiaryQuery,
grammar: &topiary_tree_sitter_facade::Language,
root: &Node,
source: &[u8],
) -> FormatterResult<()> {
let (tree, grammar) = parse(input_content, grammar, false)?;
let root = tree.root_node();
let source = input_content.as_bytes();

// Match queries
let mut cursor = QueryCursor::new();
let ref_match_count = original_query.query.matches(&root, source, &mut cursor).count();
let pattern_count = original_query.query.pattern_count();
let query_content = &original_query.query_content;

// This particular test avoids a SIGSEGV error that occurs when trying
// to count the matches of an empty query (see #481)
if pattern_count == 1 {
Expand All @@ -526,7 +526,7 @@ fn check_input_exhaustivity(
.map_err(|e| FormatterError::Query("Error parsing query file".into(), Some(e)))?;
query.disable_pattern(i);
let mut cursor = QueryCursor::new();
let match_count = query.matches(root, source, &mut cursor).count();
let match_count = query.matches(&root, source, &mut cursor).count();
if match_count == ref_match_count {
let index_start = query.start_byte_for_pattern(i);
let index_end = if i == pattern_count - 1 {
Expand All @@ -542,12 +542,10 @@ fn check_input_exhaustivity(
}

#[cfg(target_arch = "wasm32")]
fn check_input_exhaustivity(
_ref_match_count: usize,
fn check_query_coverage(
_input_content: &str,
_original_query: &TopiaryQuery,
_grammar: &topiary_tree_sitter_facade::Language,
_root: &Node,
_source: &[u8],
) -> FormatterResult<()> {
unimplemented!();
}

0 comments on commit 74717cc

Please sign in to comment.