Skip to content

Commit

Permalink
Prepare to write function tests
Browse files Browse the repository at this point in the history
- Keep track of which versions of functions we call.
- Add comments for pending tests
- Add a TODO list of functions to implement
  • Loading branch information
emk committed Oct 13, 2023
1 parent 2fe40d9 commit 7407721
Show file tree
Hide file tree
Showing 5 changed files with 131 additions and 25 deletions.
48 changes: 40 additions & 8 deletions src/analyze.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,16 @@ use std::collections::HashMap;

use derive_visitor::{Drive, Visitor};

use crate::ast::{FunctionName, SpecialDateFunctionCall, SqlProgram};
use crate::ast::{FunctionCall, SpecialDateFunctionCall, SqlProgram};

/// A `phf` set of functions that are known to take any number of arguments.
static KNOWN_VARARG_FUNCTIONS: phf::Set<&'static str> = phf::phf_set! {
"COALESCE", "CONCAT",
};

/// Count all the function calls in a [`SqlProgram`].
#[derive(Debug, Default, Visitor)]
#[visitor(FunctionName(enter), SpecialDateFunctionCall(enter))]
#[visitor(FunctionCall(enter), SpecialDateFunctionCall(enter))]
pub struct FunctionCallCounts {
counts: HashMap<String, usize>,
}
Expand All @@ -20,24 +25,51 @@ impl FunctionCallCounts {
sql_program.drive(self)
}

fn record_call(&mut self, name: &str) {
let count = self.counts.entry(name.to_ascii_uppercase()).or_default();
fn record_call(&mut self, name: String) {
let count = self.counts.entry(name).or_default();
*count += 1;
}

fn enter_function_name(&mut self, function_name: &FunctionName) {
self.record_call(&function_name.unescaped_bigquery());
fn enter_function_call(&mut self, function_call: &FunctionCall) {
let base_name = function_call.name.unescaped_bigquery().to_ascii_uppercase();
let mut name = format!("{}(", base_name);
if KNOWN_VARARG_FUNCTIONS.contains(base_name.as_str()) {
name.push('*');
} else {
// Push '_' separated by ','.
for i in 0..function_call.args.nodes.len() {
if i > 0 {
name.push(',');
}
name.push('_');
}
}
name.push(')');
if function_call.over_clause.is_some() {
name.push_str(" OVER(..)");
}
self.record_call(name);
}

fn enter_special_date_function_call(
&mut self,
special_date_function_call: &SpecialDateFunctionCall,
) {
self.record_call(
let mut name = format!(
"{}(",
special_date_function_call
.function_name
.unescaped_bigquery(),
.unescaped_bigquery()
.to_ascii_uppercase(),
);
for i in 0..special_date_function_call.args.nodes.len() {
if i > 0 {
name.push(',');
}
name.push('_');
}
name.push_str(") (special)");
self.record_call(name);
}

/// Get a list of functions and how often they were called, sorted by
Expand Down
33 changes: 17 additions & 16 deletions src/cmd/sql_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ pub async fn cmd_sql_test(opt: &SqlTestOpt) -> Result<()> {
// Keep track of our test results.
let mut test_count = 0usize;
let mut test_failures: Vec<(PathBuf, Error)> = vec![];
let mut pending_paths: Vec<PathBuf> = vec![];
let mut pending: Vec<(PathBuf, String)> = vec![];

// Build a glob matching our test files, for use with `glob`.
let dir_path_str = opt.dir_path.as_os_str().to_str().ok_or_else(|| {
Expand Down Expand Up @@ -72,22 +72,23 @@ pub async fn cmd_sql_test(opt: &SqlTestOpt) -> Result<()> {

// Skip pending tests unless asked to run them.
if !opt.pending {
// Look for lines of the form `-- pending: db1, db2, ...`.
static PENDING_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(?m)^--\s*pending:\s*([a-zA-Z0-9_][a-zA-Z0-9_, ]*)").unwrap()
});
// Look for lines of the form `-- pending: db1 Comment`.
static PENDING_RE: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?m)^--\s*pending:\s*([a-zA-Z0-9_]+)(\s+.*)?").unwrap());
let target_string = driver.target().to_string();
if let Some(caps) = PENDING_RE.captures(&query) {
let dbs = caps.get(1).unwrap().as_str();
if dbs.split(',').any(|db| db.trim() == target_string) {
let db = caps.get(1).unwrap().as_str();
let comment = caps.get(2).map_or("", |m| m.as_str().trim());
if db == target_string {
print!("P");
let _ = io::stdout().flush();

pending_paths.push(
pending.push((
path.strip_prefix(&base_dir)
.unwrap_or_else(|_| &path)
.to_owned(),
);
comment.to_owned(),
));

continue;
}
Expand All @@ -114,19 +115,19 @@ pub async fn cmd_sql_test(opt: &SqlTestOpt) -> Result<()> {
e.emit();
}

if !pending_paths.is_empty() {
if !pending.is_empty() {
println!("\nPending tests:");
for path in &pending_paths {
println!(" {}", path.display());
for (path, comment) in &pending {
println!(" {} ({})", path.display(), comment);
}
}

if test_count == 0 {
Err(Error::Other("No tests found".into()))
} else if test_failures.is_empty() {
print!("\nOK: {} tests passed", test_count);
if !pending_paths.is_empty() {
print!(", {} pending", pending_paths.len());
if !pending.is_empty() {
print!(", {} pending", pending.len());
}
println!();
Ok(())
Expand All @@ -136,8 +137,8 @@ pub async fn cmd_sql_test(opt: &SqlTestOpt) -> Result<()> {
test_failures.len(),
test_count,
);
if !pending_paths.is_empty() {
print!(", {} pending", pending_paths.len());
if !pending.is_empty() {
print!(", {} pending", pending.len());
}
println!();

Expand Down
2 changes: 1 addition & 1 deletion tests/sql/data_types/literal_scalars.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
-- pending: snowflake
-- pending: snowflake Test harness Arrow library reads 1.5 as 15

CREATE OR REPLACE TABLE __result1 AS
SELECT
Expand Down
53 changes: 53 additions & 0 deletions tests/sql/functions/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Function tests

This directory contains tests for BigQuery SQL functions, to see if we can run
them on other platforms.

## Tests to implement

Here is a list of functions that are high priorities to implement. You can
generate your own version of this list by running `joinery parse
--count-function-calls queries.csv`.

- [ ] REGEXP_REPLACE(_,_,_)
- [ ] REGEXP_EXTRACT(_,_)
- [ ] COALESCE(*)
- [ ] LOWER(_)
- [ ] TO_HEX(_)
- [ ] SHA256(_)
- [ ] LENGTH(_)
- [ ] CONCAT(*)
- [ ] TRIM(_)
- [ ] ARRAY_TO_STRING(_,_)
- [ ] SUM(_)
- [ ] FARM_FINGERPRINT(_)
- [ ] ANY_VALUE(_)
- [ ] ROW_NUMBER() OVER(..)
- [ ] COUNTIF(_)
- [ ] UPPER(_)
- [ ] ARRAY_AGG(_)
- [ ] DATE_TRUNC(_,_) (special)
- [ ] MIN(_)
- [ ] FORMAT_DATETIME(_,_)
- [ ] RAND()
- [ ] RANK() OVER(..)
- [ ] ARRAY_LENGTH(_)
- [ ] SUM(_) OVER(..)
- [ ] DATETIME_SUB(_,_)
- [ ] DATE_DIFF(_,_,_) (special)
- [ ] CURRENT_DATETIME()
- [ ] DATE_SUB(_,_)
- [ ] EXP(_)
- [ ] MAX(_)
- [ ] GENERATE_UUID()
- [ ] DATE(_)
- [ ] LEAST(_,_)
- [ ] APPROX_QUANTILES(_,_)
- [ ] GENERATE_DATE_ARRAY(_,_,_)
- [ ] DATE_ADD(_,_)
- [ ] LAG(_) OVER(..)
- [ ] DATETIME_DIFF(_,_,_) (special)
- [ ] DATETIME_TRUNC(_,_) (special)
- [ ] FIRST_VALUE(_) OVER(..)
- [ ] DATETIME(_)
- [ ] LEAST(_)
20 changes: 20 additions & 0 deletions tests/sql/functions/simple/regexp.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
-- pending: snowflake REGEX_EXTRACT needs to be wrapped with REGEXP_SUBSTR
-- pending: sqlite3 No regex fuctions
--
-- REGEXP_REPLACE, REGEXP_EXTRACT
--
-- https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions
--
-- We should consider testing a larger set of regular expression features,
-- because different databases may support different regex syntax.

CREATE OR REPLACE TABLE __result1 AS
SELECT
REGEXP_REPLACE('foo', r'oo', 'ee') AS replaced,
REGEXP_EXTRACT('foobar', r'o+') AS extracted;

CREATE OR REPLACE TABLE __expected1 (
replaced STRING,
extracted STRING,
);
INSERT INTO __expected1 VALUES ('fee', 'oo');

0 comments on commit 7407721

Please sign in to comment.