Skip to content

Commit

Permalink
Merge pull request #523 from tweag/erin/reuse-query-idempotence
Browse files Browse the repository at this point in the history
Create the query only once
  • Loading branch information
Erin van der Veen authored Jun 20, 2023
2 parents f236f8b + d662f65 commit 738a178
Show file tree
Hide file tree
Showing 6 changed files with 75 additions and 40 deletions.
5 changes: 3 additions & 2 deletions topiary-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use crate::{
output::OutputFile,
visualise::Visualisation,
};
use topiary::{formatter, Language, Operation, SupportedLanguage};
use topiary::{formatter, Language, Operation, SupportedLanguage, TopiaryQuery};

#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
Expand Down Expand Up @@ -130,7 +130,7 @@ async fn run() -> CLIResult<()> {
language.query_file()?
};

let query = (|| {
let query_content = (|| {
let mut reader = BufReader::new(File::open(&query_path)?);
let mut contents = String::new();
reader.read_to_string(&mut contents)?;
Expand All @@ -145,6 +145,7 @@ async fn run() -> CLIResult<()> {
})?;

let grammar = language.grammar().await?;
let query = TopiaryQuery::new(&grammar, &query_content)?;

let operation = if let Some(visualisation) = args.visualise {
Operation::Visualise {
Expand Down
7 changes: 4 additions & 3 deletions topiary-playground/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#[cfg(target_arch = "wasm32")]
use topiary::{formatter, Configuration, FormatterResult, Operation};
use topiary::{formatter, Configuration, FormatterResult, Operation, TopiaryQuery};
#[cfg(target_arch = "wasm32")]
use tree_sitter_facade::TreeSitter;
#[cfg(target_arch = "wasm32")]
Expand Down Expand Up @@ -41,7 +41,7 @@ pub async fn format(
#[cfg(target_arch = "wasm32")]
async fn format_inner(
input: &str,
query: &str,
query_content: &str,
language_name: &str,
check_idempotence: bool,
tolerate_parsing_errors: bool,
Expand All @@ -51,11 +51,12 @@ async fn format_inner(
let configuration = Configuration::parse_default_configuration()?;
let language = configuration.get_language(language_name)?;
let grammar = language.grammar_wasm().await?;
let query = TopiaryQuery::new(&grammar, query_content)?;

formatter(
&mut input.as_bytes(),
&mut output,
query,
&query,
language,
&grammar,
Operation::Format {
Expand Down
5 changes: 3 additions & 2 deletions topiary/benches/benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@ use criterion::async_executor::FuturesExecutor;
use criterion::{criterion_group, criterion_main, Criterion};
use std::fs;
use std::io;
use topiary::Configuration;
use topiary::{formatter, Operation};
use topiary::{Configuration, TopiaryQuery};

async fn format() {
let input = fs::read_to_string("tests/samples/input/ocaml.ml").unwrap();
let query = fs::read_to_string("../languages/ocaml.scm").unwrap();
let query_content = fs::read_to_string("../languages/ocaml.scm").unwrap();
let configuration = Configuration::parse_default_configuration().unwrap();
let language = configuration.get_language("ocaml").unwrap();
let grammar = language.grammar().await.unwrap();
let query = TopiaryQuery::new(&grammar, &query_content).unwrap();

let mut input = input.as_bytes();
let mut output = io::BufWriter::new(Vec::new());
Expand Down
24 changes: 14 additions & 10 deletions topiary/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ pub use crate::{
configuration::{default_configuration_toml, Configuration},
error::{FormatterError, IoError},
language::{Language, SupportedLanguage},
tree_sitter::{apply_query, SyntaxNode, Visualisation},
tree_sitter::{apply_query, SyntaxNode, TopiaryQuery, Visualisation},
};

mod atom_collection;
Expand Down Expand Up @@ -145,21 +145,22 @@ pub enum Operation {
/// # tokio_test::block_on(async {
/// use std::fs::File;
/// use std::io::{BufReader, Read};
/// use topiary::{formatter, Configuration, FormatterError, Operation};
/// use topiary::{formatter, Configuration, FormatterError, TopiaryQuery, Operation};
///
/// let input = "[1,2]".to_string();
/// let mut input = input.as_bytes();
/// let mut output = Vec::new();
/// let mut query_file = BufReader::new(File::open("../languages/json.scm").expect("query file"));
/// let mut query = String::new();
/// query_file.read_to_string(&mut query).expect("read query file");
/// let mut query_content = String::new();
/// query_file.read_to_string(&mut query_content).expect("read query file");
///
/// let config = Configuration::parse_default_configuration().unwrap();
/// let language = config.get_language("json").unwrap();
/// let grammar = language
/// .grammar()
/// .await
/// .expect("grammar");
/// let query = TopiaryQuery::new(&grammar, &query_content).unwrap();
///
/// match formatter(&mut input, &mut output, &query, &language, &grammar, Operation::Format{ skip_idempotence: false, tolerate_parsing_errors: false }) {
/// Ok(()) => {
Expand All @@ -177,7 +178,7 @@ pub enum Operation {
pub fn formatter(
input: &mut impl io::Read,
output: &mut impl io::Write,
query: &str,
query: &TopiaryQuery,
language: &Language,
grammar: &tree_sitter_facade::Language,
operation: Operation,
Expand All @@ -196,6 +197,7 @@ pub fn formatter(
} => {
// All the work related to tree-sitter and the query is done here
log::info!("Apply Tree-sitter query");

let mut atoms =
tree_sitter::apply_query(&content, query, grammar, tolerate_parsing_errors, false)?;

Expand Down Expand Up @@ -257,7 +259,7 @@ fn trim_whitespace(s: &str) -> String {
/// `Err(FormatterError::Formatting(...))` if the formatting failed
fn idempotence_check(
content: &str,
query: &str,
query: &TopiaryQuery,
language: &Language,
grammar: &tree_sitter_facade::Language,
tolerate_parsing_errors: bool,
Expand Down Expand Up @@ -310,23 +312,24 @@ mod tests {

use crate::{
configuration::Configuration, error::FormatterError, formatter,
test_utils::pretty_assert_eq, Operation,
test_utils::pretty_assert_eq, Operation, TopiaryQuery,
};

/// Attempt to parse invalid json, expecting a failure
#[test(tokio::test)]
async fn parsing_error_fails_formatting() {
let mut input = r#"{"foo":{"bar"}}"#.as_bytes();
let mut output = Vec::new();
let query = "(#language! json)";
let query_content = "(#language! json)";
let configuration = Configuration::parse_default_configuration().unwrap();
let language = configuration.get_language("json").unwrap();
let grammar = language.grammar().await.unwrap();
let query = TopiaryQuery::new(&grammar, query_content).unwrap();

match formatter(
&mut input,
&mut output,
query,
&query,
language,
&grammar,
Operation::Format {
Expand All @@ -352,10 +355,11 @@ mod tests {
let expected = "{ \"one\": {\"bar\" \"baz\"}, \"two\": \"bar\" }\n";

let mut output = Vec::new();
let query = fs::read_to_string("../languages/json.scm").unwrap();
let query_content = fs::read_to_string("../languages/json.scm").unwrap();
let configuration = Configuration::parse_default_configuration().unwrap();
let language = configuration.get_language("json").unwrap();
let grammar = language.grammar().await.unwrap();
let query = TopiaryQuery::new(&grammar, &query_content).unwrap();

formatter(
&mut input,
Expand Down
60 changes: 41 additions & 19 deletions topiary/src/tree_sitter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,35 @@ struct Position {
column: u32,
}

/// Topiary often needs both the tree-sitter `Query` and the original content
/// beloging to the file from which the query was parsed. This struct is a simple
/// convenience wrapper that combines the `Query` with its original string.
pub struct TopiaryQuery {
pub query: Query,
pub query_content: String,
}

impl TopiaryQuery {
/// Creates a new `TopiaryQuery` from a tree-sitter language/grammar and the
/// contents of the query file.
///
/// # Errors
///
/// This function will return an error if tree-sitter failed to parse the query file.
pub fn new(
grammar: &tree_sitter_facade::Language,
query_content: &str,
) -> FormatterResult<TopiaryQuery> {
let query = Query::new(grammar, query_content)
.map_err(|e| FormatterError::Query("Error parsing query file".into(), Some(e)))?;

Ok(TopiaryQuery {
query,
query_content: query_content.to_owned(),
})
}
}

impl From<Point> for Position {
fn from(point: Point) -> Self {
Self {
Expand Down Expand Up @@ -92,23 +121,21 @@ struct LocalQueryMatch<'a> {
/// - A unknown capture name was encountered in the query.
pub fn apply_query(
input_content: &str,
query_content: &str,
query: &TopiaryQuery,
grammar: &tree_sitter_facade::Language,
tolerate_parsing_errors: bool,
should_check_input_exhaustivity: bool,
) -> FormatterResult<AtomCollection> {
let (tree, grammar) = parse(input_content, grammar, tolerate_parsing_errors)?;
let root = tree.root_node();
let source = input_content.as_bytes();
let query = Query::new(grammar, query_content)
.map_err(|e| FormatterError::Query("Error parsing query file".into(), Some(e)))?;

// Match queries
let mut cursor = QueryCursor::new();
let mut matches: Vec<LocalQueryMatch> = Vec::new();
let capture_names = query.capture_names();
let capture_names = query.query.capture_names();

for query_match in query.matches(&root, source, &mut cursor) {
for query_match in query.query.matches(&root, source, &mut cursor) {
let local_captures: Vec<QueryCapture> = query_match.captures().collect();

matches.push(LocalQueryMatch {
Expand All @@ -119,14 +146,7 @@ pub fn apply_query(

if should_check_input_exhaustivity {
let ref_match_count = matches.len();
check_input_exhaustivity(
ref_match_count,
&query,
query_content,
grammar,
&root,
source,
)?;
check_input_exhaustivity(ref_match_count, query, grammar, &root, source)?;
}

// Find the ids of all tree-sitter nodes that were identified as a leaf
Expand All @@ -152,7 +172,7 @@ pub fn apply_query(

let mut predicates = QueryPredicates::default();

for p in query.general_predicates(m.pattern_index) {
for p in query.query.general_predicates(m.pattern_index) {
predicates = handle_predicate(&p, &predicates)?;
}
check_predicates(&predicates)?;
Expand Down Expand Up @@ -362,13 +382,13 @@ fn check_predicates(predicates: &QueryPredicates) -> FormatterResult<()> {
/// then that pattern originally matched nothing in the input.
fn check_input_exhaustivity(
ref_match_count: usize,
original_query: &Query,
query_content: &str,
original_query: &TopiaryQuery,
grammar: &tree_sitter_facade::Language,
root: &Node,
source: &[u8],
) -> FormatterResult<()> {
let pattern_count = original_query.pattern_count();
let pattern_count = original_query.query.pattern_count();
let query_content = &original_query.query_content;
// This particular test avoids a SIGSEGV error that occurs when trying
// to count the matches of an empty query (see #481)
if pattern_count == 1 {
Expand All @@ -379,6 +399,9 @@ fn check_input_exhaustivity(
}
}
for i in 0..pattern_count {
// We don't need to use TopiaryQuery in this test since we have no need
// for duplicate versions of the query_content string, instead we create the query
// manually.
let mut query = Query::new(grammar, query_content)
.map_err(|e| FormatterError::Query("Error parsing query file".into(), Some(e)))?;
query.disable_pattern(i);
Expand All @@ -401,8 +424,7 @@ fn check_input_exhaustivity(
#[cfg(target_arch = "wasm32")]
fn check_input_exhaustivity(
_ref_match_count: usize,
_original_query: &Query,
_query_content: &str,
_original_query: &TopiaryQuery,
_grammar: &tree_sitter_facade::Language,
_root: &Node,
_source: &[u8],
Expand Down
14 changes: 10 additions & 4 deletions topiary/tests/sample-tester.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use test_log::test;

use topiary::{
apply_query, formatter, test_utils::pretty_assert_eq, Configuration, FormatterError, Language,
Operation,
Operation, TopiaryQuery,
};

#[test(tokio::test)]
Expand All @@ -31,10 +31,12 @@ async fn input_output_tester() {

let mut input = BufReader::new(fs::File::open(file.path()).unwrap());
let mut output = Vec::new();
let query = fs::read_to_string(language.query_file().unwrap()).unwrap();
let query_content = fs::read_to_string(language.query_file().unwrap()).unwrap();

let grammar = language.grammar().await.unwrap();

let query = TopiaryQuery::new(&grammar, &query_content).unwrap();

info!(
"Formatting file {} as {}.",
file.path().display(),
Expand Down Expand Up @@ -78,10 +80,12 @@ async fn formatted_query_tester() {

let mut input = BufReader::new(fs::File::open(file.path()).unwrap());
let mut output = Vec::new();
let query = fs::read_to_string(language.query_file().unwrap()).unwrap();
let query_content = fs::read_to_string(language.query_file().unwrap()).unwrap();

let grammar = language.grammar().await.unwrap();

let query = TopiaryQuery::new(&grammar, &query_content).unwrap();

formatter(
&mut input,
&mut output,
Expand Down Expand Up @@ -122,7 +126,9 @@ async fn exhaustive_query_tester() {

let grammar = language.grammar().await.unwrap();

apply_query(&input_content, &query_content, &grammar, false, true).unwrap_or_else(|e| {
let query = TopiaryQuery::new(&grammar, &query_content).unwrap();

apply_query(&input_content, &query, &grammar, false, true).unwrap_or_else(|e| {
if let FormatterError::PatternDoesNotMatch(_) = e {
panic!("Found untested query in file {query_file:?}:\n{e}");
} else {
Expand Down

0 comments on commit 738a178

Please sign in to comment.